In [43]:
import numpy as np
import pandas as pd
from scipy.sparse import csr_matrix
from sklearn.neighbors import NearestNeighbors
from scipy.spatial.distance import correlation
from sklearn.metrics.pairwise import pairwise_distances
import warnings
warnings.filterwarnings('ignore')
from scipy.sparse import csr_matrix

In [None]:
!wget https://cdn.freecodecamp.org/project-data/books/book-crossings.zip

!unzip book-crossings.zip

books_filename = 'BX-Books.csv'
ratings_filename = 'BX-Book-Ratings.csv'

In [45]:
df_books = pd.read_csv(
    books_filename,
    encoding = "ISO-8859-1",
    sep=";",
    header=0,
    names=['isbn', 'title', 'author'],
    usecols=['isbn', 'title', 'author'],
    dtype={'isbn': 'str', 'title': 'str', 'author': 'str'})

df_ratings = pd.read_csv(
    ratings_filename,
    encoding = "ISO-8859-1",
    sep=";",
    header=0,
    names=['user', 'isbn', 'rating'],
    usecols=['user', 'isbn', 'rating'],
    dtype={'user': 'int32', 'isbn': 'str', 'rating': 'float32'})

In [5]:
df_books_new = df_books.iloc[:125000,:]

In [6]:
df_ratings_new = df_ratings.iloc[:125000,:]

In [46]:
print(df_books.shape)
print(list(df_books.columns))

(271379, 3)
['isbn', 'title', 'author']


In [47]:
print(df_ratings_new.shape)
print(list(df_ratings_new.columns))

(125000, 3)
['user', 'isbn', 'rating']


In [48]:
new_df= pd.merge(df_books_new, df_ratings_new, on='isbn')
new_df.head()

Unnamed: 0,isbn,title,author,user,rating
0,195153448,Classical Mythology,Mark P. O. Morford,2,0.0
1,2005018,Clara Callan,Richard Bruce Wright,8,5.0
2,2005018,Clara Callan,Richard Bruce Wright,11400,0.0
3,2005018,Clara Callan,Richard Bruce Wright,11676,8.0
4,60973129,Decision in Normandy,Carlo D'Este,8,0.0


In [49]:
combine_book_rating = new_df.dropna(axis = 0, subset = ['title'])
book_ratingCount = (combine_book_rating.
     groupby(by = ['title'])['rating'].
     count().
     reset_index().
     rename(columns = {'rating': 'totalRatingCount'})
     [['title', 'totalRatingCount']]
    )
book_ratingCount.head()

Unnamed: 0,title,totalRatingCount
0,A Light in the Storm: The Civil War Diary of ...,1
1,Beyond IBM: Leadership Marketing and Finance ...,1
2,Earth Prayers From around the World: 365 Pray...,1
3,Final Fantasy Anthology: Official Strategy Gu...,2
4,It Takes Two,1


In [50]:
rating_with_totalRatingCount = combine_book_rating.merge(book_ratingCount, left_on = 'title', right_on = 'title', how = 'left')
rating_with_totalRatingCount.head()

Unnamed: 0,isbn,title,author,user,rating,totalRatingCount
0,195153448,Classical Mythology,Mark P. O. Morford,2,0.0,1
1,2005018,Clara Callan,Richard Bruce Wright,8,5.0,3
2,2005018,Clara Callan,Richard Bruce Wright,11400,0.0,3
3,2005018,Clara Callan,Richard Bruce Wright,11676,8.0,3
4,60973129,Decision in Normandy,Carlo D'Este,8,0.0,2


In [51]:
popularity_threshold = 50
rating_popular_book= rating_with_totalRatingCount.query('totalRatingCount >= @popularity_threshold')
rating_popular_book.head()

Unnamed: 0,isbn,title,author,user,rating,totalRatingCount
26,440234743,The Testament,John Grisham,277478,0.0,66
27,440234743,The Testament,John Grisham,278144,0.0,66
28,440234743,The Testament,John Grisham,9,0.0,66
29,440234743,The Testament,John Grisham,243,0.0,66
30,440234743,The Testament,John Grisham,388,0.0,66


In [63]:
book_features_df=rating_popular_book.pivot_table(index='title',columns='user',values='rating').fillna(0)
book_features_df.head(3)

user,9,14,16,26,51,67,114,165,193,232,242,243,244,254,256,383,388,408,424,446,453,486,487,507,559,569,595,626,638,651,709,728,735,763,805,882,885,899,900,901,...,277997,278007,278048,278054,278075,278107,278122,278137,278144,278162,278176,278188,278202,278220,278221,278243,278254,278325,278333,278342,278350,278356,278373,278390,278418,278422,278469,278506,278514,278541,278543,278552,278554,278586,278633,278653,278663,278692,278698,278843
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1
A Painted House,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7.0,0.0,0.0,8.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
A Time to Kill,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,9.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Angels &amp; Demons,0.0,0.0,0.0,0.0,0.0,0.0,10.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,8.0,0.0,0.0,0.0,0.0,0.0,0.0,7.0,0.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [53]:
from scipy.sparse import csr_matrix

book_features_df_matrix = csr_matrix(book_features_df.values)

In [54]:
model_knn = NearestNeighbors(metric = 'cosine', algorithm = 'brute')
model_knn.fit(book_features_df_matrix)

NearestNeighbors(algorithm='brute', leaf_size=30, metric='cosine',
                 metric_params=None, n_jobs=None, n_neighbors=5, p=2,
                 radius=1.0)

In [55]:
query_index = np.random.choice(book_features_df.shape[0])
print(query_index)
distances, indices = model_knn.kneighbors(book_features_df.iloc[query_index,:].values.reshape(1, -1), n_neighbors = 6)

18


In [56]:
for i in range(0, len(distances.flatten())):
    if i == 0:
        print('Recommendations for {0}:\n'.format(book_features_df.index[query_index]))
    else:
        print('{0}: {1}, with distance of {2}:'.format(i, book_features_df.index[indices.flatten()[i]], distances.flatten()[i]))

Recommendations for Summer Sisters:

1: White Oleander : A Novel, with distance of 0.8512969017028809:
2: The Pelican Brief, with distance of 0.8553051948547363:
3: The Client, with distance of 0.8590638041496277:
4: A Time to Kill, with distance of 0.8642092347145081:
5: The Notebook, with distance of 0.878255307674408:


In [57]:
from fuzzywuzzy import process

In [58]:
def recommender(book_name, data, model, n_recommendations):
    model.fit(data)
    idx=process.extractOne(book_name, df_books_new['title'])[2]
    print('Book selected: ', df_books_new['title'][idx], 'Index: ',idx)
    print('Searching for recommendations...')
    distances, indices = model.kneighbors(data[idx], n_neighbors = n_recommendations)
    for i in indices: 
        print(df_books_new['title'][i].where(i!=idx))
        
book= recommender("Classical Mythology", book_features_df_matrix, model_knn, 20)
print(book)

Book selected:  Classical Mythology Index:  0
Searching for recommendations...
0                                                   NaN
9               Where You'll Find Me: And Other Stories
30                                     Prague : A Novel
38                       Seabiscuit: An American Legend
6     What If?: The World's Foremost Military Histor...
32                                 Wie Barney es sieht.
34                                    Sturmzeit. Roman.
17                        Goodbye to the Buttermilk Sky
37                                To Kill a Mockingbird
16    More Cunning Than Man: A Social History of Rat...
7                                       PLEADING GUILTY
31                                      Chocolate Jesus
8     Under the Black Flag: The Romance and the Real...
12                                   The Middle Stories
5                                The Kitchen God's Wife
20    Our Dumb Century: The Onion Presents 100 Years...
33      Der Fluch der Kai