In [1]:
from pyspark.sql import SparkSession, Row
from pyspark.ml.recommendation import ALS
from pyspark.ml.evaluation import RegressionEvaluator

In [2]:
spark = SparkSession.builder.master('local[*]').getOrCreate()

24/08/02 13:44:17 WARN Utils: Your hostname, Eduardos-MacBook-Pro.local resolves to a loopback address: 127.0.0.1; using 10.75.124.249 instead (on interface en0)
24/08/02 13:44:17 WARN Utils: Set SPARK_LOCAL_IP if you need to bind to another address
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
24/08/02 13:44:18 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable


In [3]:
lines = spark.read.text('dados/sample_movielens_ratings.txt').rdd

In [4]:
parts = lines.map(lambda row: row.value.split('::'))

In [5]:
ratings = spark.createDataFrame(parts.map(lambda p: Row(userId=int(p[0]),
                                                        movieId=int(p[1]),
                                                        rating=float(p[2]),
                                                        ts=int(p[3]))))

In [6]:
ratings.show(5)

+------+-------+------+----------+
|userId|movieId|rating|        ts|
+------+-------+------+----------+
|     0|      2|   3.0|1424380312|
|     0|      3|   1.0|1424380312|
|     0|      5|   2.0|1424380312|
|     0|      9|   4.0|1424380312|
|     0|     11|   1.0|1424380312|
+------+-------+------+----------+
only showing top 5 rows



In [7]:
train, test = ratings.randomSplit([0.8, 0.2])

In [8]:
als = ALS(maxIter=5, regParam=0.01, userCol='userId', itemCol='movieId',
          ratingCol='rating', coldStartStrategy='drop')

In [9]:
model = als.fit(train)

24/08/02 13:44:21 WARN InstanceBuilder: Failed to load implementation from:dev.ludovic.netlib.blas.JNIBLAS
24/08/02 13:44:21 WARN InstanceBuilder: Failed to load implementation from:dev.ludovic.netlib.blas.VectorBLAS
24/08/02 13:44:21 WARN InstanceBuilder: Failed to load implementation from:dev.ludovic.netlib.lapack.JNILAPACK


In [10]:
pred = model.transform(test)

In [11]:
reg_eval = RegressionEvaluator(metricName='rmse', labelCol='rating',
                               predictionCol='prediction')

In [13]:
rmse = reg_eval.evaluate(pred)
rmse

1.628218401435827

In [17]:
model.recommendForAllUsers(5).show(truncate=False)

+------+-------------------------------------------------------------------------------------+
|userId|recommendations                                                                      |
+------+-------------------------------------------------------------------------------------+
|20    |[{62, 4.29599}, {77, 3.9916}, {75, 3.9131606}, {94, 3.8011534}, {88, 3.2866983}]     |
|10    |[{92, 3.653714}, {40, 3.4714453}, {12, 3.3473773}, {47, 2.9954152}, {42, 2.9847329}] |
|0     |[{92, 4.10992}, {93, 4.092936}, {74, 3.9752047}, {53, 3.960501}, {9, 3.547743}]      |
|1     |[{68, 3.923828}, {62, 3.7473352}, {85, 3.2675493}, {17, 3.0492234}, {9, 2.9157958}]  |
|21    |[{53, 5.0317965}, {29, 4.8867846}, {52, 4.8132186}, {70, 4.608746}, {25, 4.427818}]  |
|11    |[{87, 5.486801}, {7, 5.4709706}, {18, 5.153531}, {23, 5.124407}, {32, 5.099137}]     |
|12    |[{32, 6.2705617}, {33, 5.586066}, {71, 5.2896442}, {49, 5.2055054}, {27, 4.9191566}] |
|22    |[{85, 5.8632426}, {22, 5.073741}, {59, 4.9

In [18]:
model.recommendForAllItems(5).show(truncate=False)

+-------+------------------------------------------------------------------------------------+
|movieId|recommendations                                                                     |
+-------+------------------------------------------------------------------------------------+
|20     |[{17, 4.6680803}, {23, 3.589833}, {5, 3.4326787}, {1, 2.55934}, {29, 2.1259885}]    |
|40     |[{28, 5.18599}, {2, 4.0384655}, {16, 3.494694}, {10, 3.4714453}, {0, 3.140332}]     |
|10     |[{22, 4.776389}, {23, 4.046126}, {17, 3.8078053}, {15, 3.1494632}, {26, 2.622221}]  |
|50     |[{11, 3.819575}, {23, 3.4947522}, {29, 3.447913}, {12, 3.0973465}, {26, 3.0148926}] |
|80     |[{3, 4.129349}, {18, 3.069425}, {27, 3.0693264}, {22, 2.9856703}, {11, 2.4773421}]  |
|70     |[{21, 4.608746}, {8, 4.247119}, {14, 4.1278934}, {4, 3.622132}, {6, 3.4799469}]     |
|60     |[{7, 3.1803684}, {8, 3.1081936}, {22, 2.9689586}, {21, 2.8687534}, {3, 2.779909}]   |
|90     |[{9, 6.1719522}, {24, 4.870863}, {16, 4.8