# Popularity + Item-Item Collaborative Filtering

### Import libraries

In [1]:
import numpy as np
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
import pickle

### Import Datasets

In [2]:
books = pd.read_csv('Books.csv')
ratings = pd.read_csv('Ratings.csv')

  books = pd.read_csv('Books.csv')


## Top 50 books based on Popularity

### Merging the above two data sets on basis of 'ISBN' column

In [3]:
name_rating_df = books.merge(ratings,on='ISBN')

In [4]:
num_rating_df = name_rating_df.groupby('Book-Title').count()['Book-Rating'].reset_index()
num_rating_df.rename(columns = {'Book-Rating':'User-Count'}, inplace=True)

In [5]:
avg_rating_df = name_rating_df.groupby('Book-Title').mean()['Book-Rating'].reset_index()
avg_rating_df.rename(columns = {'Book-Rating':'Avg-Rating'}, inplace=True)

### Merging the above two data sets on basis of 'Book-Title' column

In [6]:
popularity_df = num_rating_df.merge(avg_rating_df,on = 'Book-Title')

### Droping books rated by less 400 user and sorting on the basis of 'Avg-Rating'

In [7]:
popularity_df = popularity_df[popularity_df['User-Count']>=400].sort_values('Avg-Rating',ascending = False).head(52)

In [8]:
popularity_df = popularity_df.merge(books, on = 'Book-Title').drop_duplicates('Book-Title')[['Book-Title','Book-Author','User-Count','Publisher','Avg-Rating','Image-URL-M']]

## Item-item Collab Filtering approach

### Filtering user on basis of number of Book-rated by them

In [9]:
user_filter = name_rating_df.groupby('User-ID').count()['Book-Rating'] > 200
selected_user = user_filter[user_filter].index

In [10]:
filtered_user_df = name_rating_df[name_rating_df['User-ID'].isin(selected_user)]

### Filtering books on basis of number of User rated them

In [11]:
book_filter = filtered_user_df.groupby('Book-Title').count()['Book-Rating'] >= 50 
selected_book = book_filter[book_filter].index

In [12]:
filtered_book_df = filtered_user_df[filtered_user_df['Book-Title'].isin(selected_book)]

### Selected Books

In [13]:
final_rating_df = filtered_book_df.pivot_table(index='Book-Title', columns='User-ID', values='Book-Rating')

In [14]:
final_rating_df.fillna(0,inplace=True)

## Calculating similarity scores using cosine similarity

In [15]:
similarity_scores = cosine_similarity(final_rating_df)

## Recommendations based on similarity scores

In [16]:
def recommend(book_name):
    book_index = np.where(final_rating_df.index == book_name)[0][0]
    book_list = sorted(list(enumerate(similarity_scores[book_index])),reverse=True,key = lambda x: x[1])[1:5]
    
    data = []
    for i in book_list:
        item = []
        temp_df = books[books['Book-Title'] == final_rating_df.index[i[0]]]
        item.extend(list(temp_df.drop_duplicates('Book-Title')['Book-Title'].values))
        item.extend(list(temp_df.drop_duplicates('Book-Title')['Book-Author'].values))
        item.extend(list(temp_df.drop_duplicates('Book-Title')['Image-URL-M'].values))
        
        data.append(item)
    
    return data

In [17]:
recommend('Message in a Bottle')

[['Nights in Rodanthe',
  'Nicholas Sparks',
  'http://images.amazon.com/images/P/0446531332.01.MZZZZZZZ.jpg'],
 ['The Mulberry Tree',
  'Jude Deveraux',
  'http://images.amazon.com/images/P/0743437640.01.MZZZZZZZ.jpg'],
 ['A Walk to Remember',
  'Nicholas Sparks',
  'http://images.amazon.com/images/P/0446608955.01.MZZZZZZZ.jpg'],
 ["River's End",
  'Nora Roberts',
  'http://images.amazon.com/images/P/0515127833.01.MZZZZZZZ.jpg']]

### Exporting files by dumping

In [18]:
pickle.dump(popularity_df, open('popular.pkl','wb'))
pickle.dump(final_rating_df,open('final_rating_df.pkl','wb'))
pickle.dump(books,open('books.pkl','wb'))
pickle.dump(similarity_scores,open('similarity_scores.pkl','wb'))