In [1]:
import pandas as pd

my_books = pd.read_csv("liked_books.csv", index_col=0)
my_books["book_id"] = my_books["book_id"].astype(str)

In [2]:
my_books

Unnamed: 0,user_id,book_id,rating,title
0,-1,2517439,5,"The Forever War (The Forever War, #1)"
1,-1,113576,5,The Smartest Guys in the Room: The Amazing Ris...
2,-1,35100,5,Battle Cry of Freedom
3,-1,228221,5,The Mask of Command
5,-1,17662739,5,"2001: A Space Odyssey (Space Odyssey, #1)"
6,-1,356824,5,India After Gandhi: The History of the World's...
7,-1,12125412,5,The Lady or the Tiger?: and Other Logic Puzzles
8,-1,139069,5,Endurance: Shackleton's Incredible Voyage
10,-1,76680,5,"Foundation (Foundation, #1)"
11,-1,1898,5,Into Thin Air: A Personal Account of the Mount...


In [3]:
csv_book_mapping = {}

with open("book_id_map.csv", "r") as f:
    while True:
        line = f.readline()
        if not line:
            break
        csv_id, book_id = line.strip().split(",")
        csv_book_mapping[csv_id] = book_id

In [4]:
book_set = set(my_books["book_id"])

In [5]:
book_set

{'113576',
 '12125412',
 '1215032',
 '128029',
 '139069',
 '1685995',
 '17662739',
 '18949861',
 '1898',
 '228221',
 '228665',
 '2517439',
 '25659450',
 '28187',
 '2913377',
 '35100',
 '356824',
 '437143',
 '5096865',
 '5439',
 '5578108',
 '6448772',
 '76680',
 '77203',
 '8161140',
 '82599',
 '883438'}

In [6]:
!dir  | findstr goodreads_interactions.csv

28-03-2023  15:39     4,318,621,741 goodreads_interactions.csv


In [7]:
overlap_users = {}

with open("goodreads_interactions.csv", 'r') as f:
    while True:
        line = f.readline()
        if not line:
            break
        user_id, csv_id, _, rating, _ = line.split(",")
        
        book_id = csv_book_mapping.get(csv_id)
        
        if book_id in book_set:
            if user_id not in overlap_users:
                overlap_users[user_id] = 1
            else:
                overlap_users[user_id] += 1

In [16]:
len(overlap_users)

316341

In [9]:
filtered_overlap_users = set([k for k in overlap_users if overlap_users[k] > my_books.shape[0]/5])


In [10]:
len(filtered_overlap_users)

1258

In [11]:
interactions_list = []

with open("goodreads_interactions.csv", 'r') as f:
    while True:
        line = f.readline()
        if not line:
            break
        user_id, csv_id, _, rating, _ = line.split(",")
        
        if user_id in filtered_overlap_users:
            book_id = csv_book_mapping[csv_id]
            interactions_list.append([user_id, book_id, rating])

In [12]:
interactions_list[0]

['282', '627206', '4']

In [13]:
interactions = pd.DataFrame(interactions_list, columns=["user_id", "book_id", "rating"])

In [14]:
interactions = pd.concat([my_books[["user_id", "book_id", "rating"]], interactions])

In [17]:
interactions

Unnamed: 0,user_id,book_id,rating
0,-1,2517439,5
1,-1,113576,5
2,-1,35100,5
3,-1,228221,5
5,-1,17662739,5
...,...,...,...
5638696,804100,475178,0
5638697,804100,186074,0
5638698,804100,153008,0
5638699,804100,45107,0


In [18]:
interactions["book_id"] = interactions["book_id"].astype(str)
interactions["user_id"] = interactions["user_id"].astype(str)
interactions["rating"] = pd.to_numeric(interactions["rating"])

In [21]:
interactions["user_id"].unique()

array(['-1', '282', '874', ..., '442043', '712588', '804100'],
      dtype=object)

In [19]:
interactions["user_index"] = interactions["user_id"].astype("category").cat.codes

In [22]:
interactions.iloc[2000]

user_id          874
book_id         5308
rating             3
user_index      1216
book_index    630945
Name: 1973, dtype: object

In [23]:
interactions["user_index"].unique()

array([   0,  555, 1216, ..., 1054, 1143, 1183], dtype=int16)

In [20]:
interactions["book_index"] = interactions["book_id"].astype("category").cat.codes

In [24]:
len(interactions["book_index"].unique())

802870

In [25]:
from scipy.sparse import coo_matrix

ratings_mat_coo = coo_matrix((interactions["rating"], (interactions["user_index"], interactions["book_index"])))

In [26]:
ratings_mat_coo

<1259x802870 sparse matrix of type '<class 'numpy.int64'>'
	with 5638728 stored elements in COOrdinate format>

In [27]:
ratings_mat_coo.shape

(1259, 802870)

In [28]:
ratings_mat = ratings_mat_coo.tocsr()

In [29]:
interactions[interactions["user_id"] == "-1"]

Unnamed: 0,user_id,book_id,rating,user_index,book_index
0,-1,2517439,5,0,414880
1,-1,113576,5,0,38971
2,-1,35100,5,0,575858
3,-1,228221,5,0,356004
5,-1,17662739,5,0,214285
6,-1,356824,5,0,581743
7,-1,12125412,5,0,59763
8,-1,139069,5,0,124430
10,-1,76680,5,0,722098
11,-1,1898,5,0,276178


In [30]:
my_index = 0

In [31]:
from sklearn.metrics.pairwise import cosine_similarity

similarity = cosine_similarity(ratings_mat[my_index,:], ratings_mat).flatten()

In [33]:
similarity[2]

0.06143442518998915

In [34]:
import numpy as np

indices = np.argpartition(similarity, -15)[-15:]

In [35]:
indices

array([1188,  942,  218,  129,  496,  435, 1208,  795, 1213, 1210, 1143,
        321,  294,  862,    0], dtype=int64)

In [36]:
similar_users = interactions[interactions["user_index"].isin(indices)].copy()

In [37]:
similar_users = similar_users[similar_users["user_id"]!="-1"]

In [38]:
similar_users

Unnamed: 0,user_id,book_id,rating,user_index,book_index
45312,4133,5359,3,942,632143
45313,4133,10464963,4,942,13492
45314,4133,3858,3,942,593622
45315,4133,11827808,4,942,51904
45316,4133,7913305,4,942,732465
...,...,...,...,...,...
5638521,712588,32388712,3,1143,543119
5638522,712588,16322,5,1143,183365
5638523,712588,860543,0,1143,759827
5638524,712588,853510,5,1143,756768


In [40]:
book_recs = similar_users.groupby("book_id").rating.agg(['count', 'mean'])

In [41]:
book_recs

Unnamed: 0_level_0,count,mean
book_id,Unnamed: 1_level_1,Unnamed: 2_level_1
1,6,3.833333
100322,1,0.000000
100365,1,0.000000
10046142,1,0.000000
1005,3,0.000000
...,...,...
99561,2,2.500000
99610,1,3.000000
99664,1,4.000000
9969571,3,2.333333


In [42]:
books_titles = pd.read_json("books_titles.json")
books_titles["book_id"] = books_titles["book_id"].astype(str)

In [43]:
book_recs = book_recs.merge(books_titles, how="inner", on="book_id")

In [44]:
book_recs


Unnamed: 0,book_id,count,mean,title,ratings,url,cover_image,mod_title
0,1,6,3.833333,Harry Potter and the Half-Blood Prince (Harry ...,1713866,https://www.goodreads.com/book/show/1.Harry_Po...,https://images.gr-assets.com/books/1361039191m...,harrypotterandthehalfbloodprinceharrypotter6
1,100322,1,0.000000,Assata: An Autobiography,11057,https://www.goodreads.com/book/show/100322.Assata,https://images.gr-assets.com/books/1328857268m...,assataanautobiography
2,100365,1,0.000000,The Mote in God's Eye,48736,https://www.goodreads.com/book/show/100365.The...,https://images.gr-assets.com/books/1399490037m...,themoteingodseye
3,10046142,1,0.000000,Dancing in the Glory of Monsters: The Collapse...,2391,https://www.goodreads.com/book/show/10046142-d...,https://images.gr-assets.com/books/1328757755m...,dancinginthegloryofmonstersthecollapseofthecon...
4,1005,3,0.000000,Think and Grow Rich,87634,https://www.goodreads.com/book/show/1005.Think...,https://s.gr-assets.com/assets/nophoto/book/11...,thinkandgrowrich
...,...,...,...,...,...,...,...,...
2849,99561,2,2.500000,Looking for Alaska,804587,https://www.goodreads.com/book/show/99561.Look...,https://images.gr-assets.com/books/1394798630m...,lookingforalaska
2850,99610,1,3.000000,The Best Laid Plans,17434,https://www.goodreads.com/book/show/99610.The_...,https://images.gr-assets.com/books/1353374848m...,thebestlaidplans
2851,99664,1,4.000000,The Painted Veil,24606,https://www.goodreads.com/book/show/99664.The_...,https://images.gr-assets.com/books/1320421719m...,thepaintedveil
2852,9969571,3,2.333333,Ready Player One,376328,https://www.goodreads.com/book/show/9969571-re...,https://images.gr-assets.com/books/1500930947m...,readyplayerone


In [45]:
book_recs["adjusted_count"] = book_recs["count"] * (book_recs["count"] / book_recs["ratings"])
book_recs["score"] = book_recs["mean"] * book_recs["adjusted_count"]
book_recs = book_recs[~book_recs["book_id"].isin(my_books["book_id"])]

In [46]:
book_recs

Unnamed: 0,book_id,count,mean,title,ratings,url,cover_image,mod_title,adjusted_count,score
0,1,6,3.833333,Harry Potter and the Half-Blood Prince (Harry ...,1713866,https://www.goodreads.com/book/show/1.Harry_Po...,https://images.gr-assets.com/books/1361039191m...,harrypotterandthehalfbloodprinceharrypotter6,0.000021,0.000081
1,100322,1,0.000000,Assata: An Autobiography,11057,https://www.goodreads.com/book/show/100322.Assata,https://images.gr-assets.com/books/1328857268m...,assataanautobiography,0.000090,0.000000
2,100365,1,0.000000,The Mote in God's Eye,48736,https://www.goodreads.com/book/show/100365.The...,https://images.gr-assets.com/books/1399490037m...,themoteingodseye,0.000021,0.000000
3,10046142,1,0.000000,Dancing in the Glory of Monsters: The Collapse...,2391,https://www.goodreads.com/book/show/10046142-d...,https://images.gr-assets.com/books/1328757755m...,dancinginthegloryofmonstersthecollapseofthecon...,0.000418,0.000000
4,1005,3,0.000000,Think and Grow Rich,87634,https://www.goodreads.com/book/show/1005.Think...,https://s.gr-assets.com/assets/nophoto/book/11...,thinkandgrowrich,0.000103,0.000000
...,...,...,...,...,...,...,...,...,...,...
2849,99561,2,2.500000,Looking for Alaska,804587,https://www.goodreads.com/book/show/99561.Look...,https://images.gr-assets.com/books/1394798630m...,lookingforalaska,0.000005,0.000012
2850,99610,1,3.000000,The Best Laid Plans,17434,https://www.goodreads.com/book/show/99610.The_...,https://images.gr-assets.com/books/1353374848m...,thebestlaidplans,0.000057,0.000172
2851,99664,1,4.000000,The Painted Veil,24606,https://www.goodreads.com/book/show/99664.The_...,https://images.gr-assets.com/books/1320421719m...,thepaintedveil,0.000041,0.000163
2852,9969571,3,2.333333,Ready Player One,376328,https://www.goodreads.com/book/show/9969571-re...,https://images.gr-assets.com/books/1500930947m...,readyplayerone,0.000024,0.000056


In [48]:
my_books["mod_title"] = my_books["title"].str.replace("[^a-zA-Z0-9 ]", "", regex=True).str.lower()

In [49]:
my_books["mod_title"] = my_books["mod_title"].str.replace("\s+", " ", regex=True)

In [50]:
book_recs = book_recs[~book_recs["mod_title"].isin(my_books["mod_title"])]

In [51]:
book_recs = book_recs[book_recs["count"]>2]

In [52]:
book_recs = book_recs[book_recs["mean"] >=4]

In [53]:
top_recs = book_recs.sort_values("mean", ascending=False)

In [54]:
top_recs

Unnamed: 0,book_id,count,mean,title,ratings,url,cover_image,mod_title,adjusted_count,score
2265,62291,5,4.8,"A Storm of Swords (A Song of Ice and Fire, #3)",477834,https://www.goodreads.com/book/show/62291.A_St...,https://images.gr-assets.com/books/1497931121m...,astormofswordsasongoficeandfire3,5.2e-05,0.000251
600,157993,3,4.333333,The Little Prince,763309,https://www.goodreads.com/book/show/157993.The...,https://images.gr-assets.com/books/1367545443m...,thelittleprince,1.2e-05,5.1e-05
1103,22034,3,4.333333,The Godfather,259150,https://www.goodreads.com/book/show/22034.The_...,https://images.gr-assets.com/books/1394988109m...,thegodfather,3.5e-05,0.00015
1176,2318271,3,4.333333,The Last Lecture,245804,https://www.goodreads.com/book/show/2318271.Th...,https://images.gr-assets.com/books/1388075896m...,thelastlecture,3.7e-05,0.000159
1909,4381,3,4.333333,Fahrenheit 451,591506,https://www.goodreads.com/book/show/4381.Fahre...,https://images.gr-assets.com/books/1351643740m...,fahrenheit451,1.5e-05,6.6e-05
243,119322,4,4.25,"The Golden Compass (His Dark Materials, #1)",973154,https://www.goodreads.com/book/show/119322.The...,https://images.gr-assets.com/books/1505766203m...,thegoldencompasshisdarkmaterials1,1.6e-05,7e-05
1444,2767793,4,4.25,"The Hero of Ages (Mistborn, #3)",149260,https://www.goodreads.com/book/show/2767793-th...,https://images.gr-assets.com/books/1480717763m...,theheroofagesmistborn3,0.000107,0.000456
2563,78983,4,4.25,"Kane and Abel (Kane and Abel, #1)",75215,https://www.goodreads.com/book/show/78983.Kane...,https://s.gr-assets.com/assets/nophoto/book/11...,kaneandabelkaneandabel1,0.000213,0.000904
244,119324,3,4.0,"The Subtle Knife (His Dark Materials, #2)",246697,https://www.goodreads.com/book/show/119324.The...,https://images.gr-assets.com/books/1505766360m...,thesubtleknifehisdarkmaterials2,3.6e-05,0.000146
398,13497,4,4.0,"A Feast for Crows (A Song of Ice and Fire, #4)",437398,https://www.goodreads.com/book/show/13497.A_Fe...,https://images.gr-assets.com/books/1429538615m...,afeastforcrowsasongoficeandfire4,3.7e-05,0.000146


In [55]:
def make_clickable(val):
    return '<a target="_blank" href="{}">Goodreads</a>'.format(val, val)

def show_image(val):
    return '<a href="{}"><img src="{}" width=50></img></a>'.format(val, val)

top_recs.style.format({'url': make_clickable, 'cover_image': show_image})

Unnamed: 0,book_id,count,mean,title,ratings,url,cover_image,mod_title,adjusted_count,score
2265,62291,5,4.8,"A Storm of Swords (A Song of Ice and Fire, #3)",477834,Goodreads,,astormofswordsasongoficeandfire3,5.2e-05,0.000251
600,157993,3,4.333333,The Little Prince,763309,Goodreads,,thelittleprince,1.2e-05,5.1e-05
1103,22034,3,4.333333,The Godfather,259150,Goodreads,,thegodfather,3.5e-05,0.00015
1176,2318271,3,4.333333,The Last Lecture,245804,Goodreads,,thelastlecture,3.7e-05,0.000159
1909,4381,3,4.333333,Fahrenheit 451,591506,Goodreads,,fahrenheit451,1.5e-05,6.6e-05
243,119322,4,4.25,"The Golden Compass (His Dark Materials, #1)",973154,Goodreads,,thegoldencompasshisdarkmaterials1,1.6e-05,7e-05
1444,2767793,4,4.25,"The Hero of Ages (Mistborn, #3)",149260,Goodreads,,theheroofagesmistborn3,0.000107,0.000456
2563,78983,4,4.25,"Kane and Abel (Kane and Abel, #1)",75215,Goodreads,,kaneandabelkaneandabel1,0.000213,0.000904
244,119324,3,4.0,"The Subtle Knife (His Dark Materials, #2)",246697,Goodreads,,thesubtleknifehisdarkmaterials2,3.6e-05,0.000146
398,13497,4,4.0,"A Feast for Crows (A Song of Ice and Fire, #4)",437398,Goodreads,,afeastforcrowsasongoficeandfire4,3.7e-05,0.000146
