In [1]:
import numpy as np
import pandas as pd
import os
import gzip
import json
import re
import pathlib

BASE_DIR = pathlib.Path().resolve()

Read the needed csv files

In [2]:
book_df = pd.read_csv(os.path.join(BASE_DIR, 'df_books.csv'))
user_df = pd.read_csv(os.path.join(BASE_DIR, 'df_users.csv'))

In [3]:
count_zero_rows = len(user_df[user_df['user_rating'] == 0])
print(count_zero_rows)

551885


Check if any value in the relevant columns for building the recommender models is missing

In [4]:
user_df[['book_id', 'user_id', 'user_rating']].isnull().values.any()

False

Change dtypes in the book dataframe

In [5]:
book_df = book_df.astype({'publication_year': 'Int64', 'num_pages': 'Int64'})

Creating a dataframe for the recommender systems consisting of columns for book_id, user_id, and user_ratings <br>
Renaming of the columns is necessary for the lenskit specification

In [6]:
user_counts = user_df['user_id'].value_counts()
users_with_enough_ratings = user_counts[user_counts >= 15].index
filtered_df = user_df[user_df['user_id'].isin(users_with_enough_ratings)]

In [7]:
model_df = filtered_df[['book_id', 'user_id', 'user_rating']]
model_df = model_df.rename(columns={'book_id': 'item', 'user_id': 'user', 'user_rating': 'rating'})

In [8]:
book_df = book_df[book_df['title_without_series'].notna()]

Import the lenskit modules

In [9]:
from lenskit.algorithms import Recommender, item_knn, user_knn, als
from lenskit import crossfold as xf
from lenskit import topn, util
from sklearn.preprocessing import OrdinalEncoder

Encode the user column to work properly in the fitting process

In [23]:
enc = OrdinalEncoder()
enc.fit(model_df[['user']])
model_df[['user']] = enc.transform(model_df[['user']])
model_df = model_df.astype({'user': 'Int64'})

Configurating the 3 different algorithms

In [10]:
nnbrs = 20
min_nbrs = 1
min_sim = 0.1
feedback = 'explicit'
center = True

algo_ii = item_knn.ItemItem(nnbrs=nnbrs, min_nbrs=min_nbrs, min_sim=min_sim, feedback=feedback, center=center)

In [11]:
nnbrs = 30
min_nbrs = 1
min_sim = 0.1
feedback = 'explicit'
center = True

algo_uu = user_knn.UserUser(nnbrs=nnbrs, min_nbrs=min_nbrs, min_sim=min_sim, feedback=feedback, center=center)

In [12]:
features = 50
iterations = 20
reg = 0.1
damping = 5

algo_als = als.BiasedMF(features=features, iterations=iterations, reg=reg, damping=damping)

Copying the algorithms and adapting them for later recommendation task

In [13]:
fit_algo_ii = Recommender.adapt(algo_ii)
fit_algo_uu = Recommender.adapt(algo_uu)
fit_algo_als = Recommender.adapt(algo_als)

Fitting the algorithms using 5 times crossvalidation

In [27]:
for i, tp in enumerate(xf.partition_users(model_df, 5, xf.SampleFrac(0.2))):
    train_split = tp.train.copy()
    
    #fit_algo_ii.fit(train_split)
    #print('Finished round {} of fitting the Item-Item model'.format(i+1))
    fit_algo_uu.fit(train_split)
    print("Finished round {} of fitting the User-User model".format(i+1))
    fit_algo_als.fit(train_split)
    print("Finished round {} of fitting the ALS model".format(i+1))

Finished round 1 of fitting the User-User model
Finished round 1 of fitting the ALS model
Finished round 2 of fitting the User-User model
Finished round 2 of fitting the ALS model
Finished round 3 of fitting the User-User model
Finished round 3 of fitting the ALS model
Finished round 4 of fitting the User-User model
Finished round 4 of fitting the ALS model
Finished round 5 of fitting the User-User model
Finished round 5 of fitting the ALS model


In [15]:
from pandas import Series

Creating a new user which was not a part of the fitting process. Creating top-10 recommendation for this user with each algorithm

In [29]:
 user_ratings = {
     18490: 4, # Frankenstein
     29579: 4, # Foundation
     333867: 4, # The Stars My Destination
     95558: 3, # Solaris
     234225: 5, # Dune
     16690: 5, # The Moon is a Harsh Mistress
     77566: 5, # Hyperion
     7677: 5, # Jurassic Park
     5470: 4, # 1984
     5129: 3, # Brave New World
     4981: 4, # Slaughterhouse-Five
     2767052: 5, # The Hunger Games
     830: 4, # Snow Crash
     7613: 3, # Animal Farm
     227463: 4 # A Clockwork Orange
} 

#recs_ii = fit_algo_ii.recommend(user=-1, n=10, ratings=Series(user_ratings))
recs_uu = fit_algo_uu.recommend(user=-1, n=10, ratings=Series(user_ratings))
recs_als = fit_algo_als.recommend(user=-1, n=10, ratings=Series(user_ratings))

In [17]:
recs_ii

Unnamed: 0,item,score
0,727624,5.201815
1,19848,5.035149
2,1001558,4.776514
3,16068627,4.776502
4,10114864,4.701815
5,827715,4.14626
6,6760149,3.883634
7,1118509,3.590704
8,1505932,3.559064


In [41]:
recs_als

Unnamed: 0,item,score
0,33231678,5.280865
1,33020988,5.24599
2,576565,5.223198
3,26258444,5.212183
4,25521906,5.202399
5,24431330,5.196244
6,340381,5.193257
7,25819278,5.187523
8,28796537,5.16975
9,33118312,5.167935


In [51]:
book_df.loc[book_df['book_id']==33118312	]

Unnamed: 0,book_id,title_without_series,book_description,publication_year,publisher,ratings_count,book_average_rating,cover_page,book_url,is_ebook,num_pages
315028,33118312,Refugee,JOSEF is a Jewish boy living in 1930s Nazi Ger...,2017,Scholastic,2172,4.66,https://images.gr-assets.com/books/1487620933m...,https://www.goodreads.com/book/show/33118312-r...,True,352


In [28]:
pd.set_option('display.max_colwidth', 100)
book_df.loc[book_df['title_without_series'].str.contains('A Clockwork Orange')].sort_values(by=['ratings_count'], ascending=False)

Unnamed: 0,book_id,title_without_series,book_description,publication_year,publisher,ratings_count,book_average_rating,cover_page,book_url,is_ebook,num_pages
713043,227463,A Clockwork Orange,"A vicious fifteen-year-old ""droog"" is the central character of this 1963 classic, whose stark te...",1995.0,W. W. Norton Company,440227,3.98,https://images.gr-assets.com/books/1348339306m/227463.jpg,https://www.goodreads.com/book/show/227463.A_Clockwork_Orange,False,212.0
1139156,8810,A Clockwork Orange,"Told by the central character, Alex, this brilliant, hilarious, and disturbing novel creates an ...",2000.0,Penguin Classics,4250,3.98,https://images.gr-assets.com/books/1391825616m/8810.jpg,https://www.goodreads.com/book/show/8810.A_Clockwork_Orange,False,141.0
1148977,8811,A Clockwork Orange,"In Burgess's infamous nightmare vision of youth culture in revolt, 15-year-old Alex, a charismat...",1998.0,Penguin Modern Classics,1258,3.98,https://s.gr-assets.com/assets/nophoto/book/111x148-bcc042a9c91a29c1d680899eff700a03.png,https://www.goodreads.com/book/show/8811.A_Clockwork_Orange,False,160.0
906389,12399254,A Clockwork Orange,A vicious fifteen-year-old droog is the central character of this 1963 classic. In Anthony Burge...,2011.0,W. W. Norton & Company,1245,3.98,https://s.gr-assets.com/assets/nophoto/book/111x148-bcc042a9c91a29c1d680899eff700a03.png,https://www.goodreads.com/book/show/12399254-a-clockwork-orange,True,
304890,62373,A Clockwork Orange,"Fifteen-year-old Alex and his three friends start an evening's mayhem by hitting an old man, tea...",1972.0,Penguin Books,987,3.98,https://images.gr-assets.com/books/1389477534m/62373.jpg,https://www.goodreads.com/book/show/62373.A_Clockwork_Orange,False,149.0
84852,5387987,A Clockwork Orange,Anthony Burgess's A Clockwork Orangeis the shocking seminal novel that spawned one of the most n...,2008.0,Penguin Group,904,3.98,https://images.gr-assets.com/books/1422761097m/5387987.jpg,https://www.goodreads.com/book/show/5387987-a-clockwork-orange,False,141.0
1170304,9598224,A Clockwork Orange,"""What we were after was lashings of ultraviolence.""\nIn this nightmare vision of youth in revolt...",2011.0,Penguin,809,3.98,https://images.gr-assets.com/books/1331324380m/9598224.jpg,https://www.goodreads.com/book/show/9598224-a-clockwork-orange,False,141.0
1150833,200249,A Clockwork Orange,"Der junge Alex prugelt, vergewaltigt, totet - bis man mit Hilfe moderner Technik einen wahren Ch...",1997.0,Heyne Taschenbuch,606,3.98,https://images.gr-assets.com/books/1300717599m/200249.jpg,https://www.goodreads.com/book/show/200249.A_Clockwork_Orange,False,224.0
229477,8812,A Clockwork Orange,"Adrift in the impersonal, iron-gray society of the superstate, the novel's main character, 15-ye...",1971.0,Ballantine Books,603,3.98,https://images.gr-assets.com/books/1466407235m/8812.jpg,https://www.goodreads.com/book/show/8812.A_Clockwork_Orange,False,191.0
569632,19378389,A Clockwork Orange,"This is a fully restored edition of Anthony Burgess' original text of A Clockwork Orange, with a...",2013.0,Penguin Books,548,3.98,https://images.gr-assets.com/books/1386783595m/19378389.jpg,https://www.goodreads.com/book/show/19378389-a-clockwork-orange,False,306.0


Saving the recommendations to csv files

In [31]:
#recs_uu.to_csv('recs_uu.csv', encoding='utf-8')
recs_ii.to_csv('recs_ii.csv', encoding='utf-8')
#recs_als.to_csv('recs_als.csv', encoding='utf-8')

In [9]:
pd.set_option('display.max_colwidth', 2000)
book_df.loc[book_df['title_without_series'].str.contains('We, the Drowned')]

Unnamed: 0,book_id,title_without_series,book_description,publication_year,publisher,ratings_count,book_average_rating,cover_page,book_url,is_ebook,num_pages
375584,12793962,"We, the Drowned","In 1848 a motley crew of Danish sailors sets sail from a small town of Marstal to fight the Germans. Not all of them return - and those who do will never be the same again. Among them is the daredevil Laurids Madsen, who promptly escapes again into the anonymity of the high seas. Spanning four generations, two world wars and a hundred years, We, The Drowned is an epic tale of adventure, ruthlessness and passion.",2011.0,Vintage,203,4.22,https://images.gr-assets.com/books/1438026498m/12793962.jpg,https://www.goodreads.com/book/show/12793962-we-the-drowned,False,693
399073,7988467,"We, the Drowned","It is an epic drama of adventure, courage, ruthlessness and passion by one of Scandinavia's most acclaimed storytellers.\nIn 1848 a motley crew of Danish sailors sets sail from the small island town of Marstal to fight the Germans. Not all of them return - and those who do will never be the same. Among them is the daredevil Laurids Madsen, who promptly escapes again into the anonymity of the high seas.\nAs soon as he is old enough, his son Albert sets off in search of his missing father on a voyage that will take him to the furthest reaches of the globe and into the clutches of the most nefarious company. Bearing a mysterious shrunken head, and plagued by premonitions of bloodshed, he returns to a town increasingly run by women - among them a widow intent on liberating all men from the tyranny of the sea.\nFrom the barren rocks of Newfoundland to the lush plantations of Samoa, from the roughest bars in Tasmania, to the frozen coasts of northern Russia, We, The Drowned spans four generations, two world wars and a hundred years. Carsten Jensen conjures a wise, humorous, thrilling story of fathers and sons, of the women they love and leave behind, and of the sea's murderous promise. This is a novel destined to take its place among the greatest seafaring literature.",2011.0,Houghton Mifflin Harcourt,4248,4.22,https://images.gr-assets.com/books/1303518744m/7988467.jpg,https://www.goodreads.com/book/show/7988467-we-the-drowned,False,688
475556,10477987,"We, the Drowned","It is an epic drama of adventure, courage, ruthlessness and passion by one of Scandinavia's most acclaimed storytellers.\nIn 1848 a motley crew of Danish sailors sets sail from the small island town of Marstal to fight the Germans. Not all of them return - and those who do will never be the same. Among them is the daredevil Laurids Madsen, who promptly escapes again into the anonymity of the high seas.\nAs soon as he is old enough, his son Albert sets off in search of his missing father on a voyage that will take him to the furthest reaches of the globe and into the clutches of the most nefarious company. Bearing a mysterious shrunken head, and plagued by premonitions of bloodshed, he returns to a town increasingly run by women - among them a widow intent on liberating all men from the tyranny of the sea.\nFrom the barren rocks of Newfoundland to the lush plantations of Samoa, from the roughest bars in Tasmania, to the frozen coasts of northern Russia, We, The Drowned spans four generations, two world wars and a hundred years. Carsten Jensen conjures a wise, humorous, thrilling story of fathers and sons, of the women they love and leave behind, and of the sea's murderous promise. This is a novel destined to take its place among the greatest seafaring literature.",,,264,4.22,https://s.gr-assets.com/assets/nophoto/book/111x148-bcc042a9c91a29c1d680899eff700a03.png,https://www.goodreads.com/book/show/10477987-we-the-drowned,True,693
1176715,13429545,"We, the Drowned","AN INTERNATIONAL BESTSELLER\nA THRILLING EPIC TALE OF THE SEA\n""We, the Drownedsets sail beyond the narrow channels of the seafaring genre and approaches Tolstoy in its evocation of war's confusion, its power to stun victors and vanquished alike . . . A gorgeous, unsparing novel."" -- Washington Post\n""A generational saga, a swashbuckling sailor's tale, and the account of a small town coming into modernity--both Melville and Steinbeck might have been pleased to read it."" -- New Republic\nHailed in Europe as an instant classic, We, the Drowned is the story of the port town of Marstal, Denmark, whose inhabitants sailed the world from the mid-nineteenth century to the end of the Second World War. The novel tells of ships wrecked and blown up in wars, of places of terror and violence that continue to lure each generation; there are cannibals here, shrunken heads, prophetic dreams, and miraculous survivals. The result is a brilliant seafaring novel, a gripping saga encompassing industrial growth, the years of expansion and exploration, the crucible of the first half of the twentieth century, and most of all, the sea.\nCalled ""one of the most exciting authors in Nordic literature"" by Henning Mankell, Carsten Jensen has worked as a literary critic and a journalist, reporting from China, Cambodia, Latin America, the Pacific Islands, and Afghanistan. He lives in Copenhagen and Marstal.",2012.0,Mariner Books,151,4.22,https://images.gr-assets.com/books/1344671069m/13429545.jpg,https://www.goodreads.com/book/show/13429545-we-the-drowned,False,688


In [79]:
book_df.loc[book_df['book_id']==28600911]

Unnamed: 0,book_id,title_without_series,book_description,publication_year,publisher,ratings_count,book_average_rating,cover_page,book_url,is_ebook,num_pages
273607,28600911,"Curse of Strahd (Dungeons & Dragons, 5th Edition)",Unravel the mysteries of Ravenloft(r) in this ...,2016,Wizards of the Coast,279,4.5,https://images.gr-assets.com/books/1456090369m...,https://www.goodreads.com/book/show/28600911-c...,False,256
