In [5]:
from __future__ import print_function

import sys
if sys.version >= '3':
    long = int

from pyspark.sql import SparkSession

from pyspark.ml.evaluation import RegressionEvaluator
from pyspark.ml.recommendation import ALS
from pyspark.sql import Row

In [6]:
spark = SparkSession\
        .builder\
        .appName("ALS_ALGORITHM")\
        .getOrCreate()

In [7]:
lines = spark.read.text("movies_ratings.txt").rdd
parts = lines.map(lambda row: row.value.split("::"))
ratingsRDD = parts.map(lambda p: Row(userId=int(p[0]), movieId=int(p[1]),
                                     rating=float(p[2]), timestamp=long(p[3])))
ratings = spark.createDataFrame(ratingsRDD)

In [11]:
ratings.show(100)

+-------+------+----------+------+
|movieId|rating| timestamp|userId|
+-------+------+----------+------+
|      2|   3.0|1424380312|     0|
|      3|   1.0|1424380312|     0|
|      5|   2.0|1424380312|     0|
|      9|   4.0|1424380312|     0|
|     11|   1.0|1424380312|     0|
|     12|   2.0|1424380312|     0|
|     15|   1.0|1424380312|     0|
|     17|   1.0|1424380312|     0|
|     19|   1.0|1424380312|     0|
|     21|   1.0|1424380312|     0|
|     23|   1.0|1424380312|     0|
|     26|   3.0|1424380312|     0|
|     27|   1.0|1424380312|     0|
|     28|   1.0|1424380312|     0|
|     29|   1.0|1424380312|     0|
|     30|   1.0|1424380312|     0|
|     31|   1.0|1424380312|     0|
|     34|   1.0|1424380312|     0|
|     37|   1.0|1424380312|     0|
|     41|   2.0|1424380312|     0|
|     44|   1.0|1424380312|     0|
|     45|   2.0|1424380312|     0|
|     46|   1.0|1424380312|     0|
|     47|   1.0|1424380312|     0|
|     48|   1.0|1424380312|     0|
|     50|   1.0|1424

In [12]:
(training, test) = ratings.randomSplit([0.8, 0.2])

In [13]:
als = ALS(maxIter=5, regParam=0.01, userCol="userId", itemCol="movieId", ratingCol="rating",
              coldStartStrategy="drop")
model = als.fit(training)

In [14]:
predictions = model.transform(test)
evaluator = RegressionEvaluator(metricName="rmse", labelCol="rating",
                                    predictionCol="prediction")
rmse = evaluator.evaluate(predictions)
print("Root-mean-square error = " + str(rmse))

Root-mean-square error = 1.9788105980048145


In [15]:
userRecs = model.recommendForAllUsers(10)

In [19]:
userRecs.show(10, False)

+------+--------------------------------------------------------------------------------------------------------------------------------------------------------------+
|userId|recommendations                                                                                                                                               |
+------+--------------------------------------------------------------------------------------------------------------------------------------------------------------+
|28    |[[46,5.355798], [32,5.1973286], [91,5.1836843], [69,5.03712], [12,4.9391327], [89,4.26267], [87,4.141993], [22,3.9555876], [66,3.9539108], [35,3.9065576]]    |
|26    |[[94,5.4574556], [88,5.183529], [24,4.764553], [7,4.7160697], [38,4.396932], [36,4.16525], [68,4.025549], [73,3.9031422], [4,3.8591478], [9,3.8457222]]       |
|27    |[[46,3.556524], [18,3.5069609], [54,3.2736502], [7,3.2522733], [88,3.2310824], [33,3.1425195], [80,2.9946015], [75,2.9854343], [83,2.979297], [19,2.9373

In [20]:
movieRecs = model.recommendForAllItems(10)

In [22]:
movieRecs.show(10, False)

+-------+-------------------------------------------------------------------------------------------------------------------------------------------------------------+
|movieId|recommendations                                                                                                                                              |
+-------+-------------------------------------------------------------------------------------------------------------------------------------------------------------+
|31     |[[12,3.7305567], [23,3.5932126], [16,3.1825297], [14,3.0158415], [8,2.8461747], [9,2.3499084], [6,2.2296731], [7,2.0625465], [25,1.8526692], [21,1.7987369]] |
|85     |[[16,4.914732], [8,4.8663273], [14,4.4541655], [17,3.689227], [7,3.5876262], [1,3.1137006], [6,2.8839586], [21,2.81791], [10,2.6213877], [22,2.617127]]      |
|65     |[[12,5.479553], [23,5.1004734], [20,3.0673318], [9,2.9843714], [11,2.6549845], [15,1.9406167], [3,1.890595], [5,1.8414164], [19,1.8066692], [6,1.661729

In [23]:
users = ratings.select(als.getUserCol()).distinct()

In [24]:
users.show()

+------+
|userId|
+------+
|    26|
|    29|
|    19|
|     0|
|    22|
|     7|
|    25|
|     6|
|     9|
|    27|
|    17|
|    28|
|     5|
|     1|
|    10|
|     3|
|    12|
|     8|
|    11|
|     2|
+------+
only showing top 20 rows



In [25]:
userRecsOnlyItemId = userRecs.select(userRecs['userId'], userRecs['recommendations']['movieId'])

In [26]:
userRecsOnlyItemId.show(10, False)

+------+----------------------------------------+
|userId|recommendations.movieId                 |
+------+----------------------------------------+
|28    |[46, 32, 91, 69, 12, 89, 87, 22, 66, 35]|
|26    |[94, 88, 24, 7, 38, 36, 68, 73, 4, 9]   |
|27    |[46, 18, 54, 7, 88, 33, 80, 75, 83, 19] |
|12    |[65, 64, 17, 35, 32, 50, 16, 31, 94, 49]|
|22    |[54, 74, 75, 24, 51, 88, 22, 94, 98, 39]|
|1     |[17, 90, 62, 20, 68, 9, 85, 77, 53, 94] |
|13    |[53, 93, 2, 29, 1, 39, 18, 74, 92, 25]  |
|6     |[25, 29, 8, 93, 39, 43, 40, 37, 83, 85] |
|16    |[17, 69, 85, 51, 76, 52, 54, 53, 22, 29]|
|3     |[18, 32, 51, 75, 88, 98, 54, 69, 29, 24]|
+------+----------------------------------------+
only showing top 10 rows



## 50 recomendacoes para todos os usuarios

In [27]:
userRecs = model.recommendForAllUsers(50)

In [28]:
userRecs.show()

+------+--------------------+
|userId|     recommendations|
+------+--------------------+
|    28|[[46,5.355798], [...|
|    26|[[94,5.4574556], ...|
|    27|[[46,3.556524], [...|
|    12|[[65,5.479553], [...|
|    22|[[54,5.100646], [...|
|     1|[[17,4.553739], [...|
|    13|[[53,3.7978308], ...|
|     6|[[25,5.0338297], ...|
|    16|[[17,5.397605], [...|
|     3|[[18,4.251042], [...|
|    20|[[18,5.245725], [...|
|     5|[[66,4.2299576], ...|
|    19|[[32,4.0966134], ...|
|    15|[[53,3.9364486], ...|
|    17|[[46,5.0480375], ...|
|     9|[[49,4.9857273], ...|
|     4|[[18,5.511528], [...|
|     8|[[29,5.2466345], ...|
|    23|[[65,5.1004734], ...|
|     7|[[29,4.4058585], ...|
+------+--------------------+
only showing top 20 rows



## Recomenda 50 usuarios para os itens

In [29]:
movieRecs = model.recommendForAllItems(50)

In [30]:
movieRecs.show()

+-------+--------------------+
|movieId|     recommendations|
+-------+--------------------+
|     31|[[12,3.7305567], ...|
|     85|[[16,4.914732], [...|
|     65|[[12,5.479553], [...|
|     53|[[24,7.8166633], ...|
|     78|[[18,1.6156389], ...|
|     34|[[3,2.8681424], [...|
|     81|[[18,2.8161654], ...|
|     28|[[18,5.0215693], ...|
|     76|[[14,4.702339], [...|
|     26|[[24,3.7669318], ...|
|     27|[[23,4.8367357], ...|
|     44|[[18,2.9582152], ...|
|     12|[[28,4.9391327], ...|
|     91|[[28,5.1836843], ...|
|     22|[[22,4.7456975], ...|
|     93|[[2,4.9816723], [...|
|     47|[[25,3.8437107], ...|
|      1|[[18,6.134018], [...|
|     52|[[24,5.0278773], ...|
|     13|[[11,3.9801643], ...|
+-------+--------------------+
only showing top 20 rows



In [32]:
recomendationToSave = model.recommendForAllUsers(10)
recomendationToSave = recomendationToSave.select(recomendationToSave['userId'], recomendationToSave['recommendations']['movieId'].alias('movieId'))

In [33]:
import pymongo
from pymongo import MongoClient

client = MongoClient('localhost', 27017)
db = client.eadpuc

In [34]:
colecao = recomendationToSave.collect()

for row in colecao:
    print(row.asDict())
    db.suggestions.insert_one(row.asDict())

{'userId': 28, 'movieId': [46, 32, 91, 69, 12, 89, 87, 22, 66, 35]}
{'userId': 26, 'movieId': [94, 88, 24, 7, 38, 36, 68, 73, 4, 9]}
{'userId': 27, 'movieId': [46, 18, 54, 7, 88, 33, 80, 75, 83, 19]}
{'userId': 12, 'movieId': [65, 64, 17, 35, 32, 50, 16, 31, 94, 49]}
{'userId': 22, 'movieId': [54, 74, 75, 24, 51, 88, 22, 94, 98, 39]}
{'userId': 1, 'movieId': [17, 90, 62, 20, 68, 9, 85, 77, 53, 94]}
{'userId': 13, 'movieId': [53, 93, 2, 29, 1, 39, 18, 74, 92, 25]}
{'userId': 6, 'movieId': [25, 29, 8, 93, 39, 43, 40, 37, 83, 85]}
{'userId': 16, 'movieId': [17, 69, 85, 51, 76, 52, 54, 53, 22, 29]}
{'userId': 3, 'movieId': [18, 32, 51, 75, 88, 98, 54, 69, 29, 24]}
{'userId': 20, 'movieId': [18, 90, 22, 38, 94, 68, 77, 60, 75, 17]}
{'userId': 5, 'movieId': [66, 68, 49, 90, 69, 94, 62, 13, 48, 50]}
{'userId': 19, 'movieId': [32, 98, 7, 24, 94, 36, 88, 8, 74, 54]}
{'userId': 15, 'movieId': [53, 1, 18, 90, 2, 10, 59, 35, 17, 38]}
{'userId': 17, 'movieId': [46, 20, 90, 17, 55, 62, 68, 22, 10, 8

In [35]:
recomendationToSave.show()

+------+--------------------+
|userId|             movieId|
+------+--------------------+
|    28|[46, 32, 91, 69, ...|
|    26|[94, 88, 24, 7, 3...|
|    27|[46, 18, 54, 7, 8...|
|    12|[65, 64, 17, 35, ...|
|    22|[54, 74, 75, 24, ...|
|     1|[17, 90, 62, 20, ...|
|    13|[53, 93, 2, 29, 1...|
|     6|[25, 29, 8, 93, 3...|
|    16|[17, 69, 85, 51, ...|
|     3|[18, 32, 51, 75, ...|
|    20|[18, 90, 22, 38, ...|
|     5|[66, 68, 49, 90, ...|
|    19|[32, 98, 7, 24, 9...|
|    15|[53, 1, 18, 90, 2...|
|    17|[46, 20, 90, 17, ...|
|     9|[49, 7, 27, 32, 2...|
|     4|[18, 38, 90, 41, ...|
|     8|[29, 85, 53, 93, ...|
|    23|[65, 32, 49, 27, ...|
|     7|[29, 25, 8, 85, 9...|
+------+--------------------+
only showing top 20 rows

