# Recommendation Model
This part aims to load previously prepared dataset, instantiate ASL model, make evaluations and save recomendations for some users

In [1]:
from pyspark.ml.recommendation import ALS
from pyspark.ml.evaluation import RegressionEvaluator

In [2]:
# load dataset
df_data = spark.read.load('preparedDataset.parquet')

In [3]:
df_data.printSchema()

root
 |-- event_type: integer (nullable = true)
 |-- user_id: integer (nullable = true)
 |-- product_id: integer (nullable = true)



In [4]:
# splitting dataset
train,test=df_data.randomSplit(weights = [0.8,0.2], seed = 10)
print(train.count())
print(test.count())

3396744
848904


In [5]:
# set up a model
recomend=ALS(\
    maxIter=3,\
    regParam=0.5,\
    implicitPrefs=True,\
    alpha=0.9,\
    rank=8,\
    userCol='user_id',\
    itemCol='product_id',\
    ratingCol='event_type',\
    nonnegative=True,\
    coldStartStrategy="drop"\
)

In [6]:
# train the model
recomend_model=recomend.fit(train)

In [7]:
# make predictions on test
predicted_ratings=recomend_model.transform(test)

In [8]:
# set up evaluator with Root Mean Square Error
evaluator=RegressionEvaluator(metricName='rmse',predictionCol='prediction',labelCol='event_type')

In [9]:
# print the evaluation of predictions
rmse=evaluator.evaluate(predicted_ratings)
print(rmse)

0.13261228899579972


In [None]:
# save the ALS model
recomend_model.write().overwrite().save("ALSRecommModel")

In [23]:
# get list of users that have more events to make recommendations
# for local environment use 2 - 5
# for AWS use 1000 - 10000
amountOfUsers = 5
users = train.groupBy('user_id')\
                .count()\
                .distinct()\
                .orderBy('count',ascending=False)\
                .select(recomend.getUserCol())\
                .limit(amountOfUsers)
users.show()

+---------+
|  user_id|
+---------+
|512475445|
|512365995|
|526731152|
|513021392|
|512505687|
+---------+



In [24]:
# make recommendations
# for local environment use 2 - 3
# for AWS use 5 - 10
amountOfItems = 5
userSubsetRecs = recomend_model.recommendForUserSubset(users, amountOfItems)

In [25]:
userSubsetRecs.show(amountOfUsers, truncate=False)

+---------+-----------------------------------------------------------------------------------------------------------------------------+
|user_id  |recommendations                                                                                                              |
+---------+-----------------------------------------------------------------------------------------------------------------------------+
|513021392|[[1004870, 5.1943073E-4], [1004767, 4.3788424E-4], [1004856, 4.0432677E-4], [4804056, 3.0939886E-4], [1002524, 2.3378969E-4]]|
|512475445|[[1002100, 0.0], [1002460, 0.0], [1002540, 0.0], [1002550, 0.0], [1003050, 0.0]]                                             |
|512365995|[[1002100, 0.0], [1002460, 0.0], [1002540, 0.0], [1002550, 0.0], [1003050, 0.0]]                                             |
|512505687|[[1002100, 0.0], [1002460, 0.0], [1002540, 0.0], [1002550, 0.0], [1003050, 0.0]]                                             |
|526731152|[[1002100, 0.0], [10024

In [26]:
# save to parquet file for further work
users.write.mode("overwrite").parquet("userList.parquet")
userSubsetRecs.write.mode("overwrite").parquet("userSubsetRecs.parquet")