In [2]:
import sys
import os
from pyspark.sql import SparkSession
from pyspark.conf import SparkConf
from pyspark.ml.recommendation import ALSModel
from pyspark.sql.window import Window
import pyspark.sql.functions as F
# sys.path.append('.\src')
# from shared.load import load_ratings, load_book_metadata 

In [3]:
conf = SparkConf()
spark = SparkSession.builder.config(conf=conf).getOrCreate()

In [20]:
dataPath = "hdfs://localhost:9000/user/nhom7/book/data/"
bookFilePath = dataPath + "BX-Books.csv"
ratingsFilePath = dataPath + "BX-Book-Ratings.csv"
books_df = spark.read.options(inferSchema="true", header="true", delimiter=';').csv(bookFilePath)
rating_df = spark.read.options(inferSchema="true", header="true", delimiter=';').csv(ratingsFilePath)

books_df.show(5)
rating_df.show(5)

In [41]:
# change ids from strings to integers
w = Window.partitionBy(F.lit(1)).orderBy('ISBN')
bookid_df = rating_df.select('ISBN').distinct().withColumn('Book-ID', F.row_number().over(w))
rating_df = rating_df.join(bookid_df, 'ISBN')

rating_df.show()

+-------------+-------+-----------+-------+
|         ISBN|User-ID|Book-Rating|Book-ID|
+-------------+-------+-----------+-------+
|   9044922572|  89192|          0|      7|
|   9044922718|  89192|          0|      8|
| "9170010242"| 227945|         10|     22|
|(THEWINDMILLP| 194500|          0|     27|
|   )959326839| 100120|          0|     32|
|0 00 612183 7| 216444|          0|     38|
|0 00 655241 2| 129289|          8|     40|
|0 297 64548 X| 104168|          8|     44|
|0 440 20615 4|  71726|          9|     46|
|0 7525 1962 x| 266641|          0|     53|
|0 75280 122 8|  14387|          0|     54|
|0 907 062 008|  13874|         10|     56|
|0 907 062 008|  37527|          0|     56|
|0 907 062 008|  52203|          0|     56|
|0 907 062 008|  90232|          0|     56|
|0 907 062 008| 137397|         10|     56|
|0 907 062 008| 163824|          0|     56|
|0 907 062 008| 188658|         10|     56|
|0 907 062 008| 206865|          0|     56|
|  0*708880258| 218258|         

In [44]:
user_id = 11198
# select where user id
read_books_df = rating_df.filter(F.col('User-ID') == user_id)\
                        .join(books_df, 'ISBN')

In [45]:
print('Books user has read:')
read_books_df.select('ISBN','Book-Title','Year-Of-Publication','Publisher', 'Book-Rating').show()

Books user has read:
+----------+--------------------+-------------------+--------------------+-----------+
|      ISBN|          Book-Title|Year-Of-Publication|           Publisher|Book-Rating|
+----------+--------------------+-------------------+--------------------+-----------+
|3426622254|Panic Room. Der R...|               2002|Droemersche Verla...|          5|
|3485008702|Ein Krokodil f�?¼...|               2001|       Nymphenburger|          8|
|3404129423|Der dritte Zwilling.|               1999|             L�?¼bbe|          7|
|3426048086|Gorillas im Nebel...|               1991|Droemersche Verla...|          0|
|3453023226|  Das Double. Roman.|               1986|               Heyne|          0|
|3453177398|Ein hei�?�?er Som...|               2000|               Heyne|          0|
|3453195841|Das s�?¼�?�?e Ver...|               2002|               Heyne|          0|
|340411566X|Der Club der tote...|               1990|             L�?¼bbe|          0|
|3423125772|Zauber geg

In [46]:
# load als model
model = ALSModel.load("hdfs://localhost:9000/user/nhom7/book/als_model")

In [47]:
# transform
predicted_ratings = model.recommendForUserSubset(read_books_df, 10)

In [48]:
print('Predicted rating:')
# convert to into interpretable format
predicted_ratings = predicted_ratings.withColumn('rec', F.explode('recommendations'))\
                                        .select(F.col('rec.Book-ID'), F.col('rec.rating'))\
                                        .join(rating_df, 'Book-ID')\
                                        .join(books_df, 'ISBN')\
                                        .select('ISBN', 'Book-Title', 'rating')\
                                        .distinct()\
                                        .show(truncate=False)

Predicted rating:
+----------+-------------------------------------------------+---------+
|ISBN      |Book-Title                                       |rating   |
+----------+-------------------------------------------------+---------+
|0505520001|The Outlaw Viking                                |11.641361|
|0152024417|Auntie Claus and the Key to Christmas            |11.061791|
|0740718975|Zelda Wisdom                                     |10.270445|
|0743474635|The Million - Dollar Wound                       |11.420538|
|0761101608|1001 Ways to Energize Employees                  |10.278934|
|0842378510|Complete Book of Practical Proverbs and Wacky Wit|10.270445|
|0932194826|Time for Joy : Daily Affirmations                |10.270445|
|3596148340|NeuLand. Ganz einfache Geschichten.              |10.358913|
|1592866646|K!Ck                                             |10.33663 |
+----------+-------------------------------------------------+---------+

