In [None]:
import numpy as np
import pandas as pd

In [None]:
books = pd.read_csv('Books.csv')
users = pd.read_csv('Users.csv')
ratings = pd.read_csv('Ratings.csv')

In [None]:
books

In [None]:
users

In [None]:
ratings

In [None]:
ratings.isnull().sum()

In [None]:
users.duplicated().sum()

## Popularity Based Recommended System

In [None]:
ratings_with_name = ratings.merge(books, on='ISBN')

In [None]:
num_rating_df = ratings_with_name.groupby('Book-Title').count()['Book-Rating'].reset_index()
num_rating_df.rename(columns={'Book-Rating':'num_ratings'}, inplace=True)
num_rating_df

In [None]:
avg_rating_df = ratings_with_name.groupby('Book-Title').mean()['Book-Rating'].reset_index()
avg_rating_df.rename(columns={'Book-Rating':'avg_ratings'}, inplace=True)
avg_rating_df

In [None]:
popularity_df = num_rating_df.merge(avg_rating_df, on='Book-Title')
popularity_df

In [None]:
popularity_df = popularity_df[popularity_df['num_ratings']>=250].sort_values('avg_ratings', ascending=False).head(50)

In [None]:
popularity_df = popularity_df.merge(books, on='Book-Title').drop_duplicates('Book-Title')[['Book-Title', 'Book-Author', 'Image-URL-L', 'num_ratings', 'avg_ratings']]

In [None]:
popularity_df

## Collaborative Filtering

In [None]:
x = ratings_with_name.groupby('User-ID').count()['Book-Rating'] >200
good_users = x[x].index

In [None]:
filtered_ratings = ratings_with_name[ratings_with_name['User-ID'].isin(good_users)]

In [None]:
y = filtered_ratings.groupby('Book-Title').count()['Book-Rating']>=50
famous_books = y[y].index

In [None]:
final_ratings = filtered_ratings[filtered_ratings['Book-Title'].isin(famous_books)]

In [None]:
pt = final_ratings.pivot_table(index='Book-Title', columns='User-ID', values='Book-Rating')

In [None]:
pt.fillna(0, inplace=True)

In [None]:
pt

In [None]:
from sklearn.metrics.pairwise import cosine_similarity

In [None]:
similarity_score = cosine_similarity(pt)

In [None]:
similarity_score.shape

In [None]:
similarity_score

In [None]:
def recommend(book_name):
    # index fetch
    index = np.where(pt.index ==book_name)[0][0]
    similar_items = sorted(list(enumerate(similarity_score[index])), key= lambda x:x[1], reverse = True)[1:6]
    
    data = []
    for i in similar_items:
        item = []
        temp_df = books[books['Book-Title'] == pt.index[i[0]]]
        item.extend(list(temp_df.drop_duplicates('Book-Title')['Book-Title'].values))
        item.extend(list(temp_df.drop_duplicates('Book-Title')['Book-Author'].values))
        item.extend(list(temp_df.drop_duplicates('Book-Title')['Image-URL-M'].values))
        
        data.append(item)
    return data[0][0]

In [None]:
recommend('A Bend in the Road')

In [None]:
import pickle 
pickle.dump(pt.to_dict(), open('BookTitle-UseridMatrix.pkl','wb'))
pickle.dump(similarity_score, open('BooksSimilarityMatrix.pkl','wb'))
pickle.dump(books.to_dict(), open('Books-Dict.pkl','wb'))