In [1]:
import pandas as pd

my_books = pd.read_csv("liked_books.csv", index_col=0)
my_books["book_id"] = my_books["book_id"].astype(str)

In [2]:
csv_book_mapping = {}

with open("book_id_map.csv", "r") as f:
    while True:
        line = f.readline()
        if not line:
            break
        csv_id, book_id = line.strip().split(",")
        csv_book_mapping[csv_id] = book_id
book_set = set(my_books["book_id"])

In [3]:
overlap_users = {}

with open("goodreads_interactions.csv", 'r') as f:
    while True:
        line = f.readline()
        if not line:
            break
        user_id, csv_id, _, rating, _ = line.split(",")
        
        book_id = csv_book_mapping.get(csv_id)
        
        if book_id in book_set:
            if user_id not in overlap_users:
                overlap_users[user_id] = 1
            else:
                overlap_users[user_id] += 1

In [4]:
len(overlap_users)

316341

In [5]:
filtered_overlap_users = set([k for k in overlap_users if overlap_users[k] > my_books.shape[0]/5])
len(filtered_overlap_users)

1258

In [6]:
interactions_list = []

with open("goodreads_interactions.csv", 'r') as f:
    while True:
        line = f.readline()
        if not line:
            break
        user_id, csv_id, _, rating, _ = line.split(",")
        
        if user_id in filtered_overlap_users:
            book_id = csv_book_mapping[csv_id]
            interactions_list.append([user_id, book_id, rating])

In [7]:
interactions = pd.DataFrame(interactions_list, columns=["user_id", "book_id", "rating"])
interactions = pd.concat([my_books[["user_id", "book_id", "rating"]], interactions])
interactions

Unnamed: 0,user_id,book_id,rating
0,-1,2517439,5
1,-1,113576,5
2,-1,35100,5
3,-1,228221,5
5,-1,17662739,5
...,...,...,...
5638696,804100,475178,0
5638697,804100,186074,0
5638698,804100,153008,0
5638699,804100,45107,0


In [8]:
interactions["book_id"] = interactions["book_id"].astype(str)
interactions["user_id"] = interactions["user_id"].astype(str)
interactions["rating"] = pd.to_numeric(interactions["rating"])
interactions["user_index"] = interactions["user_id"].astype("category").cat.codes
interactions["book_index"] = interactions["book_id"].astype("category").cat.codes
from scipy.sparse import coo_matrix

ratings_mat_coo = coo_matrix((interactions["rating"], (interactions["user_index"], interactions["book_index"])))
ratings_mat_coo.shape

(1259, 802870)

In [9]:
ratings_mat = ratings_mat_coo.tocsr()

In [10]:
my_index = 0
from sklearn.metrics.pairwise import cosine_similarity

similarity = cosine_similarity(ratings_mat[my_index,:], ratings_mat).flatten()

In [11]:
import numpy as np

indices = np.argpartition(similarity, -15)[-15:]
indices

array([1188,  942,  218,  129,  496,  435, 1208,  795, 1213, 1210, 1143,
        321,  294,  862,    0], dtype=int64)

In [12]:
similar_users = interactions[interactions["user_index"].isin(indices)].copy()
similar_users = similar_users[similar_users["user_id"]!="-1"]

In [13]:
book_recs = similar_users.groupby("book_id").rating.agg(['count', 'mean'])

In [14]:
books_titles = pd.read_json("books_titles.json")
books_titles["book_id"] = books_titles["book_id"].astype(str)
book_recs = book_recs.merge(books_titles, how="inner", on="book_id")
book_recs

Unnamed: 0,book_id,count,mean,title,ratings,url,cover_image,mod_title
0,1,6,3.833333,Harry Potter and the Half-Blood Prince (Harry ...,1713866,https://www.goodreads.com/book/show/1.Harry_Po...,https://images.gr-assets.com/books/1361039191m...,harry potter and the halfblood prince harry po...
1,100322,1,0.000000,Assata: An Autobiography,11057,https://www.goodreads.com/book/show/100322.Assata,https://images.gr-assets.com/books/1328857268m...,assata an autobiography
2,100365,1,0.000000,The Mote in God's Eye,48736,https://www.goodreads.com/book/show/100365.The...,https://images.gr-assets.com/books/1399490037m...,the mote in gods eye
3,10046142,1,0.000000,Dancing in the Glory of Monsters: The Collapse...,2391,https://www.goodreads.com/book/show/10046142-d...,https://images.gr-assets.com/books/1328757755m...,dancing in the glory of monsters the collapse ...
4,1005,3,0.000000,Think and Grow Rich,87634,https://www.goodreads.com/book/show/1005.Think...,https://s.gr-assets.com/assets/nophoto/book/11...,think and grow rich
...,...,...,...,...,...,...,...,...
2849,99561,2,2.500000,Looking for Alaska,804587,https://www.goodreads.com/book/show/99561.Look...,https://images.gr-assets.com/books/1394798630m...,looking for alaska
2850,99610,1,3.000000,The Best Laid Plans,17434,https://www.goodreads.com/book/show/99610.The_...,https://images.gr-assets.com/books/1353374848m...,the best laid plans
2851,99664,1,4.000000,The Painted Veil,24606,https://www.goodreads.com/book/show/99664.The_...,https://images.gr-assets.com/books/1320421719m...,the painted veil
2852,9969571,3,2.333333,Ready Player One,376328,https://www.goodreads.com/book/show/9969571-re...,https://images.gr-assets.com/books/1500930947m...,ready player one


In [15]:
book_recs["adjusted_count"] = book_recs["count"] * (book_recs["count"] / book_recs["ratings"])
book_recs["score"] = book_recs["mean"] * book_recs["adjusted_count"]
book_recs = book_recs[~book_recs["book_id"].isin(my_books["book_id"])]
my_books["mod_title"] = my_books["title"].str.replace("[^a-zA-Z0-9 ]", "", regex=True).str.lower()
my_books["mod_title"] = my_books["mod_title"].str.replace("\s+", " ", regex=True)
book_recs = book_recs[~book_recs["mod_title"].isin(my_books["mod_title"])]
book_recs = book_recs[book_recs["mean"] >=4]
book_recs = book_recs[book_recs["count"]>2]
top_recs = book_recs.sort_values("mean", ascending=False)

In [16]:
top_recs

Unnamed: 0,book_id,count,mean,title,ratings,url,cover_image,mod_title,adjusted_count,score
2265,62291,5,4.8,"A Storm of Swords (A Song of Ice and Fire, #3)",477834,https://www.goodreads.com/book/show/62291.A_St...,https://images.gr-assets.com/books/1497931121m...,a storm of swords a song of ice and fire 3,5.2e-05,0.000251
600,157993,3,4.333333,The Little Prince,763309,https://www.goodreads.com/book/show/157993.The...,https://images.gr-assets.com/books/1367545443m...,the little prince,1.2e-05,5.1e-05
1103,22034,3,4.333333,The Godfather,259150,https://www.goodreads.com/book/show/22034.The_...,https://images.gr-assets.com/books/1394988109m...,the godfather,3.5e-05,0.00015
1176,2318271,3,4.333333,The Last Lecture,245804,https://www.goodreads.com/book/show/2318271.Th...,https://images.gr-assets.com/books/1388075896m...,the last lecture,3.7e-05,0.000159
1909,4381,3,4.333333,Fahrenheit 451,591506,https://www.goodreads.com/book/show/4381.Fahre...,https://images.gr-assets.com/books/1351643740m...,fahrenheit 451,1.5e-05,6.6e-05
243,119322,4,4.25,"The Golden Compass (His Dark Materials, #1)",973154,https://www.goodreads.com/book/show/119322.The...,https://images.gr-assets.com/books/1505766203m...,the golden compass his dark materials 1,1.6e-05,7e-05
1444,2767793,4,4.25,"The Hero of Ages (Mistborn, #3)",149260,https://www.goodreads.com/book/show/2767793-th...,https://images.gr-assets.com/books/1480717763m...,the hero of ages mistborn 3,0.000107,0.000456
2563,78983,4,4.25,"Kane and Abel (Kane and Abel, #1)",75215,https://www.goodreads.com/book/show/78983.Kane...,https://s.gr-assets.com/assets/nophoto/book/11...,kane and abel kane and abel 1,0.000213,0.000904
244,119324,3,4.0,"The Subtle Knife (His Dark Materials, #2)",246697,https://www.goodreads.com/book/show/119324.The...,https://images.gr-assets.com/books/1505766360m...,the subtle knife his dark materials 2,3.6e-05,0.000146
398,13497,4,4.0,"A Feast for Crows (A Song of Ice and Fire, #4)",437398,https://www.goodreads.com/book/show/13497.A_Fe...,https://images.gr-assets.com/books/1429538615m...,a feast for crows a song of ice and fire 4,3.7e-05,0.000146
