In [1]:
from pyspark.sql import SparkSession
from pyspark.ml.recommendation import ALS 
from pyspark.ml.evaluation import RegressionEvaluator

In [2]:
spark = SparkSession.builder.appName('ALS Book Recommendation System').getOrCreate()

In [4]:
bookDf = spark.read.csv(r"C:\Users\bedir\OneDrive\Masaüstü\Big_Data_Uygulamaları\Tavsiye Sistemleri\books\books.csv",header=True,inferSchema=True)

In [5]:
bookDf.show(5)

+---+-------+------------+-------+-----------+---------+----------------+--------------------+-------------------------+--------------------+--------------------+-------------+--------------+-------------+------------------+-----------------------+---------+---------+---------+---------+---------+--------------------+--------------------+
| id|book_id|best_book_id|work_id|books_count|     isbn|          isbn13|             authors|original_publication_year|      original_title|               title|language_code|average_rating|ratings_count|work_ratings_count|work_text_reviews_count|ratings_1|ratings_2|ratings_3|ratings_4|ratings_5|           image_url|     small_image_url|
+---+-------+------------+-------+-----------+---------+----------------+--------------------+-------------------------+--------------------+--------------------+-------------+--------------+-------------+------------------+-----------------------+---------+---------+---------+---------+---------+--------------------

In [6]:
ratingDf = spark.read.csv(r"C:\Users\bedir\OneDrive\Masaüstü\Big_Data_Uygulamaları\Tavsiye Sistemleri\books\ratings.csv",header=True,inferSchema=True)

In [7]:
ratingDf.show(5)

+-------+-------+------+
|book_id|user_id|rating|
+-------+-------+------+
|      1|    314|     5|
|      1|    439|     3|
|      1|    588|     5|
|      1|   1169|     4|
|      1|   1185|     4|
+-------+-------+------+
only showing top 5 rows



In [8]:
als = ALS(userCol="user_id",itemCol="book_id",ratingCol="rating",coldStartStrategy="drop",nonnegative=True,implicitPrefs=False)


In [9]:
(training,test) = ratingDf.randomSplit([0.8,0.2])

In [10]:
model = als.fit(training)

In [11]:
preds = model.transform(test)
preds.show(5)

+-------+-------+------+----------+
|book_id|user_id|rating|prediction|
+-------+-------+------+----------+
|   1580|   1088|     5| 3.7465312|
|   1238|  35654|     4| 3.3009927|
|   2142|  47211|     3|  3.214529|
|   2866|    540|     4|   3.58134|
|   2142|  19526|     5|  3.988532|
+-------+-------+------+----------+
only showing top 5 rows



In [12]:
evalutor = RegressionEvaluator(metricName="rmse",labelCol="rating",predictionCol="prediction")

In [13]:
rmse = evalutor.evaluate(preds)
print(f" RMSE : {str(rmse)}")

 RMSE : 0.9175757661329738


In [14]:
bookRecocommend = model.recommendForAllUsers(numItems=5)

In [15]:
userRecommend = model.recommendForAllItems(numUsers=5)

In [16]:
userRecommend.first()

Row(book_id=26, recommendations=[Row(user_id=3655, rating=5.378143310546875), Row(user_id=41565, rating=5.314365863800049), Row(user_id=38818, rating=5.310755729675293), Row(user_id=50889, rating=5.26554012298584), Row(user_id=35916, rating=5.261685371398926)])