In [48]:
from pyspark.sql import SparkSession

In [49]:
spark = SparkSession.builder.appName('cruise').getOrCreate()

In [50]:
df = spark.read.csv('/home/aravind/Downloads/newchanged.csv',inferSchema=True,header=True)

In [51]:
df.show()

+-----+------+-------+
|users|places|ratings|
+-----+------+-------+
|    1|     0|   0.93|
|    1|     1|    1.8|
|    1|     2|   2.29|
|    1|     3|   0.62|
|    1|     4|    0.8|
|    1|     5|   2.42|
|    1|     6|   3.19|
|    1|     7|   2.79|
|    1|     8|   1.82|
|    1|     9|   2.42|
|    2|     0|   1.02|
|    2|     1|    2.2|
|    2|     2|   2.66|
|    2|     3|   0.64|
|    2|     4|   1.42|
|    2|     5|   3.18|
|    2|     6|   3.21|
|    2|     7|   2.63|
|    2|     8|   1.86|
|    2|     9|   2.32|
+-----+------+-------+
only showing top 20 rows



In [52]:
from pyspark.ml.recommendation import ALS

In [53]:
from pyspark.ml.evaluation import RegressionEvaluator 

In [54]:
df.describe().show()

+-------+-----------------+-----------------+------------------+
|summary|            users|           places|           ratings|
+-------+-----------------+-----------------+------------------+
|  count|             9800|             9800|              9800|
|   mean|            490.5|              4.5| 1.695890816326541|
| stddev|282.9159194735791|2.872427879450618|0.9840713958919984|
|    min|                1|                0|               0.0|
|    max|              980|                9|              3.76|
+-------+-----------------+-----------------+------------------+



In [55]:
Train_data, Test_data = df.randomSplit([0.8,0.2])

In [56]:
Train_data.describe().show()

+-------+------------------+-----------------+------------------+
|summary|             users|           places|           ratings|
+-------+------------------+-----------------+------------------+
|  count|              7831|             7831|              7831|
|   mean|490.98288851998467|4.483335461626868|1.6894151449367931|
| stddev| 282.8233868721279|2.857929643817219|0.9859576204345447|
|    min|                 1|                0|               0.0|
|    max|               980|                9|              3.66|
+-------+------------------+-----------------+------------------+



In [57]:
Test_data.describe().show()

+-------+------------------+------------------+------------------+
|summary|             users|            places|           ratings|
+-------+------------------+------------------+------------------+
|  count|              1969|              1969|              1969|
|   mean|488.57948197054344| 4.566277298120873| 1.721645505332653|
| stddev|283.34740323542746|2.9291776514436756|0.9763573698705665|
|    min|                 1|                 0|              0.13|
|    max|               980|                 9|              3.76|
+-------+------------------+------------------+------------------+



In [58]:
als =ALS(maxIter=10,regParam=0.01,userCol='users',itemCol='places',ratingCol='ratings')

In [59]:
model = als.fit(Train_data)

In [60]:
prediction = model.transform(Test_data)

In [61]:
prediction.show()

+-----+------+-------+----------+
|users|places|ratings|prediction|
+-----+------+-------+----------+
|  858|     1|   1.64| 0.9154941|
|   85|     1|   0.28|0.79905254|
|   65|     1|   1.12| 0.9755691|
|  458|     1|   1.04| 0.8589483|
|   53|     1|   1.88|  0.966382|
|  472|     1|   1.28|0.99111557|
|  322|     1|    1.0|0.81166124|
|  362|     1|    1.6|0.96153694|
|  633|     1|    1.2| 0.8341851|
|  673|     1|   0.68|0.70244807|
|  961|     1|   1.24|0.88974833|
|  876|     1|   1.32| 0.9265636|
|  950|     1|   1.72|0.91778696|
|  193|     1|   0.64|0.94685173|
|  530|     1|   0.96| 0.8568044|
|  756|     1|   1.28|  1.033205|
|  847|     1|   1.24| 0.9708561|
|  939|     1|   1.52| 0.9357635|
|  183|     1|   1.48| 0.9150862|
|  300|     1|   1.92| 0.8927729|
+-----+------+-------+----------+
only showing top 20 rows



In [62]:
evaluator = RegressionEvaluator(metricName='rmse',labelCol='ratings',predictionCol='prediction')

In [63]:
rmse = evaluator.evaluate(prediction)

In [64]:
print("RMSE Value")
print(rmse)

RMSE Value
0.7116435718383631


In [65]:
single_user = Test_data.filter(Test_data['users']==11).select(['users','places'])

In [66]:
single_user.show()

+-----+------+
|users|places|
+-----+------+
|   11|     5|
+-----+------+



In [67]:
recomendation = model.transform(single_user)

In [68]:
recomendation.orderBy('prediction',ascending=False).show()

+-----+------+----------+
|users|places|prediction|
+-----+------+----------+
|   11|     5| 1.2660258|
+-----+------+----------+

