In [1]:
with open("overlap_users.txt", 'r') as f:
    overlap_users = f.readlines()

In [2]:
overlap_users[:10]

['309061\n',
 '308926\n',
 '113622\n',
 '225247\n',
 '407051\n',
 '253759\n',
 '356846\n',
 '357069\n',
 '41256\n',
 '86969\n']

In [3]:
overlap_users = [u.replace('\n', '') for u in overlap_users]

In [4]:
overlap_users[:10]

['309061',
 '308926',
 '113622',
 '225247',
 '407051',
 '253759',
 '356846',
 '357069',
 '41256',
 '86969']

In [5]:
csv_book_mapping = {}

with open("book_id_map.csv", "r") as f:
    while True:
        line = f.readline()
        if not line:
            break
        csv_id, book_id = line.strip().split(",")
        csv_book_mapping[csv_id] = book_id

In [6]:
import pandas as pd

interactions = pd.read_csv("similar_user_interactions.csv", index_col=0)

In [7]:
interactions.head()

Unnamed: 0,user_id,book_id,rating
0,284,977284,3
1,284,890054,4
2,284,837153,3
3,284,1586480,4
4,284,41814,5


In [8]:
liked_books = [4408, 31147619, 29983711, 9401317, 9317691, 8153988, 20494944]

for book_id in liked_books:
    interactions.loc[interactions.shape[0]] = [-1, book_id, 5]

In [9]:
interactions.tail()

Unnamed: 0,user_id,book_id,rating
1530259,-1,29983711,5
1530260,-1,9401317,5
1530261,-1,9317691,5
1530262,-1,8153988,5
1530263,-1,20494944,5


In [10]:
interactions["user_index"] = interactions["user_id"].astype("category").cat.codes

In [11]:
interactions["book_index"] = interactions["book_id"].astype("category").cat.codes

In [12]:
from scipy.sparse import coo_matrix

ratings_mat_coo = coo_matrix((interactions["rating"], (interactions["user_index"], interactions["book_index"])))

In [13]:
ratings_mat_coo.shape

(2030, 364169)

In [14]:
ratings_mat = ratings_mat_coo.tocsr()

In [15]:
interactions.tail(5)

Unnamed: 0,user_id,book_id,rating,user_index,book_index
1530259,-1,29983711,5,0,338935
1530260,-1,9401317,5,0,167317
1530261,-1,9317691,5,0,166791
1530262,-1,8153988,5,0,157340
1530263,-1,20494944,5,0,263608


In [16]:
my_index = 0

In [17]:
from sklearn.metrics.pairwise import cosine_similarity

similarity = cosine_similarity(ratings_mat[my_index,:], ratings_mat).flatten()

In [18]:
similarity[0]

1.0000000000000002

In [19]:
import numpy as np

indices = np.argpartition(similarity, -15)[-15:]

In [20]:
indices

array([1916, 1940, 1905, 1918, 1938, 1996, 2000, 1913, 1895,  862, 2017,
       2003, 1966, 1962,    0])

In [21]:
similar_users = interactions[interactions["user_index"].isin(indices)].copy()

In [22]:
similar_users = similar_users[similar_users["user_id"]!=-1]

In [23]:
similar_users

Unnamed: 0,user_id,book_id,rating,user_index,book_index
724429,220127,15745753,0,862,212629
724430,220127,6482837,0,862,136892
724431,220127,947126,0,862,83122
724432,220127,19286669,0,862,259566
724433,220127,18050098,0,862,240606
...,...,...,...,...,...
1528521,789297,7743175,5,2003,151913
1528522,789297,9265453,0,2003,166308
1529254,835720,4408,5,2017,1281
1529255,835720,29069989,0,2017,333258


In [42]:
book_recs = similar_users.groupby("book_id").rating.agg(['count', 'mean'])

In [43]:
book_recs

Unnamed: 0_level_0,count,mean
book_id,Unnamed: 1_level_1,Unnamed: 2_level_1
11,1,5.0
34,2,0.0
320,1,0.0
654,1,0.0
865,2,2.5
...,...,...
34466751,1,0.0
34686052,1,0.0
34726138,1,0.0
34861697,1,0.0


In [26]:
books_titles = pd.read_json("books_titles.json")

In [44]:
book_recs = book_recs.merge(books_titles, how="inner", on="book_id")

In [36]:
book_recs

Unnamed: 0,book_id,count,mean,title_x,ratings_x,url_x,cover_image_x,adjusted_count,score,title_y,ratings_y,url_y,cover_image_y
0,34,2,0.0,The Fellowship of the Ring (The Lord of the Ri...,1813229,https://www.goodreads.com/book/show/34.The_Fel...,https://images.gr-assets.com/books/1298411339m...,2e-06,0.0,The Fellowship of the Ring (The Lord of the Ri...,1813229,https://www.goodreads.com/book/show/34.The_Fel...,https://images.gr-assets.com/books/1298411339m...
1,865,2,2.5,The Alchemist,1342863,https://www.goodreads.com/book/show/865.The_Al...,https://images.gr-assets.com/books/1483412266m...,3e-06,7e-06,The Alchemist,1342863,https://www.goodreads.com/book/show/865.The_Al...,https://images.gr-assets.com/books/1483412266m...
2,2165,2,3.0,The Old Man and the Sea,535621,https://www.goodreads.com/book/show/2165.The_O...,https://images.gr-assets.com/books/1329189714m...,7e-06,2.2e-05,The Old Man and the Sea,535621,https://www.goodreads.com/book/show/2165.The_O...,https://images.gr-assets.com/books/1329189714m...
3,4671,2,4.0,The Great Gatsby,2758812,https://www.goodreads.com/book/show/4671.The_G...,https://images.gr-assets.com/books/1490528560m...,1e-06,6e-06,The Great Gatsby,2758812,https://www.goodreads.com/book/show/4671.The_G...,https://images.gr-assets.com/books/1490528560m...
4,5470,2,2.0,1984,2023937,https://www.goodreads.com/book/show/5470.1984,https://images.gr-assets.com/books/1348990566m...,2e-06,4e-06,1984,2023937,https://www.goodreads.com/book/show/5470.1984,https://images.gr-assets.com/books/1348990566m...
5,7613,2,2.5,Animal Farm,1928931,https://www.goodreads.com/book/show/7613.Anima...,https://images.gr-assets.com/books/1424037542m...,2e-06,5e-06,Animal Farm,1928931,https://www.goodreads.com/book/show/7613.Anima...,https://images.gr-assets.com/books/1424037542m...
6,9516,2,1.5,Persepolis: The Story of a Childhood (Persepol...,119470,https://www.goodreads.com/book/show/9516.Perse...,https://images.gr-assets.com/books/1425871473m...,3.3e-05,5e-05,Persepolis: The Story of a Childhood (Persepol...,119470,https://www.goodreads.com/book/show/9516.Perse...,https://images.gr-assets.com/books/1425871473m...
7,15241,2,0.0,"The Two Towers (The Lord of the Rings, #2)",490005,https://www.goodreads.com/book/show/15241.The_...,https://images.gr-assets.com/books/1298415523m...,8e-06,0.0,"The Two Towers (The Lord of the Rings, #2)",490005,https://www.goodreads.com/book/show/15241.The_...,https://images.gr-assets.com/books/1298415523m...
8,18512,2,0.0,"The Return of the King (The Lord of the Rings,...",473101,https://www.goodreads.com/book/show/18512.The_...,https://images.gr-assets.com/books/1389977161m...,8e-06,0.0,"The Return of the King (The Lord of the Rings,...",473101,https://www.goodreads.com/book/show/18512.The_...,https://images.gr-assets.com/books/1389977161m...
9,30119,2,0.0,Where the Sidewalk Ends,1029527,https://www.goodreads.com/book/show/30119.Wher...,https://images.gr-assets.com/books/1168052448m...,4e-06,0.0,Where the Sidewalk Ends,1029527,https://www.goodreads.com/book/show/30119.Wher...,https://images.gr-assets.com/books/1168052448m...


In [45]:
book_recs["adjusted_count"] = book_recs["count"] * (book_recs["count"] / book_recs["ratings"])

In [50]:
book_recs["score"] = book_recs["mean"] * book_recs["count"]

In [51]:
book_recs = book_recs[~book_recs["book_id"].isin(liked_books)]

In [None]:
book_recs = book_recs[book_r

In [52]:
top_recs = book_recs.sort_values("score", ascending=False)

In [53]:
def make_clickable(val):
    return '<a target="_blank" href="{}">Goodreads</a>'.format(val, val)

def show_image(val):
    return '<a href="{}"><img src="{}" width=50></img></a>'.format(val, val)

top_recs.style.format({'url': make_clickable, 'cover_image': show_image})

Unnamed: 0,book_id,count,mean,title,ratings,url,cover_image,adjusted_count,score
125,77203,3,4.0,The Kite Runner,1848782,Goodreads,,5e-06,12.0
310,9531737,2,4.5,"The Wise Man's Fear (The Kingkiller Chronicle, #2)",17766,Goodreads,,0.000225,9.0
30,4671,2,4.0,The Great Gatsby,2758812,Goodreads,,1e-06,8.0
148,157993,2,4.0,The Little Prince,763309,Goodreads,,5e-06,8.0
17,2165,2,3.0,The Old Man and the Sea,535621,Goodreads,,7e-06,6.0
0,11,1,5.0,"The Hitchhiker's Guide to the Galaxy (Hitchhiker's Guide to the Galaxy, #1)",940154,Goodreads,,1e-06,5.0
199,893761,1,5.0,"The Great Hunt (Wheel of Time, #2)",742,Goodreads,,0.001348,5.0
176,455419,1,5.0,The Pre-Raphaelite Vision,41,Goodreads,,0.02439,5.0
170,315519,1,5.0,"Rich Man, Poor Man",28470,Goodreads,,3.5e-05,5.0
142,128029,3,1.666667,A Thousand Splendid Suns,835172,Goodreads,,1.1e-05,5.0
