### Linear Regression example

In [1]:
from pyspark.sql import SparkSession

In [2]:
spark = SparkSession.builder.appName('lrex').getOrCreate()

In [3]:
from pyspark.ml.regression import LinearRegression

In [4]:
path = '/home/jovyan/work/original/Python-and-Spark-for-Big-Data-master/Spark_for_Machine_Learning/Linear_Regression/sample_linear_regression_data.txt'

In [5]:
all_data = spark.read.format('libsvm').load(path)

In [8]:
train, test = all_data.randomSplit([0.7, 0.3])

In [9]:
train.describe().show()

+-------+-------------------+
|summary|              label|
+-------+-------------------+
|  count|                355|
|   mean|0.16870231810444683|
| stddev| 10.228006013021558|
|    min|-28.571478869743427|
|    max|  27.78383192005107|
+-------+-------------------+



In [10]:
test.describe().show()

+-------+-------------------+
|summary|              label|
+-------+-------------------+
|  count|                146|
|   mean| 0.4713149109082899|
| stddev| 10.565789264044893|
|    min|-28.046018037776633|
|    max| 22.923352376063196|
+-------+-------------------+



In [11]:
lr = LinearRegression(featuresCol='features', labelCol='label', predictionCol='prediction')

In [12]:
model = lr.fit(train)

In [13]:
test_results = model.evaluate(test)

In [14]:
test_results.rootMeanSquaredError

10.752270872723422

In [15]:
unlabeled_data = test.select('features')

In [16]:
predictions = model.transform(unlabeled_data)
predictions.show()

+--------------------+-------------------+
|            features|         prediction|
+--------------------+-------------------+
|(10,[0,1,2,3,4,5,...|-0.8728680117150064|
|(10,[0,1,2,3,4,5,...| 0.6536015525517873|
|(10,[0,1,2,3,4,5,...|-1.7872207027138451|
|(10,[0,1,2,3,4,5,...|  1.588542709572002|
|(10,[0,1,2,3,4,5,...|  0.789574479518723|
|(10,[0,1,2,3,4,5,...| 0.1558950998304992|
|(10,[0,1,2,3,4,5,...| 0.7051463188575392|
|(10,[0,1,2,3,4,5,...|  2.757284440989177|
|(10,[0,1,2,3,4,5,...| 3.4049774977837526|
|(10,[0,1,2,3,4,5,...| 1.3588251010287065|
|(10,[0,1,2,3,4,5,...|0.06318290414212309|
|(10,[0,1,2,3,4,5,...|-2.2962344526398835|
|(10,[0,1,2,3,4,5,...|  3.542499319980746|
|(10,[0,1,2,3,4,5,...| 2.1176315587355368|
|(10,[0,1,2,3,4,5,...|-0.4122458777168794|
|(10,[0,1,2,3,4,5,...|  2.704014528006548|
|(10,[0,1,2,3,4,5,...| -4.422903024990537|
|(10,[0,1,2,3,4,5,...|-1.3382466218880986|
|(10,[0,1,2,3,4,5,...| 2.1546498657632713|
|(10,[0,1,2,3,4,5,...| -3.694825583884575|
+----------