# Problem statement : 

### Build a recommender system by using cosine simillarties score.

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
book = pd.read_csv('book (1).csv', encoding='latin-1')

In [3]:
book.head()

Unnamed: 0.1,Unnamed: 0,User.ID,Book.Title,Book.Rating
0,1,276726,Classical Mythology,5
1,2,276729,Clara Callan,3
2,3,276729,Decision in Normandy,6
3,4,276736,Flu: The Story of the Great Influenza Pandemic...,8
4,5,276737,The Mummies of Urumchi,6


In [4]:
book.shape

(10000, 4)

In [5]:
book.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 4 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   Unnamed: 0   10000 non-null  int64 
 1   User.ID      10000 non-null  int64 
 2   Book.Title   10000 non-null  object
 3   Book.Rating  10000 non-null  int64 
dtypes: int64(3), object(1)
memory usage: 312.6+ KB


In [6]:
book = book.drop(['Unnamed: 0'], axis = 1)
book.head(10)

Unnamed: 0,User.ID,Book.Title,Book.Rating
0,276726,Classical Mythology,5
1,276729,Clara Callan,3
2,276729,Decision in Normandy,6
3,276736,Flu: The Story of the Great Influenza Pandemic...,8
4,276737,The Mummies of Urumchi,6
5,276744,The Kitchen God's Wife,7
6,276745,What If?: The World's Foremost Military Histor...,10
7,276747,PLEADING GUILTY,9
8,276747,Under the Black Flag: The Romance and the Real...,9
9,276747,Where You'll Find Me: And Other Stories,8


In [7]:
# count of Unique UserIDs in database:

len(book['User.ID'].unique())

2182

In [8]:
# count of Unique books in database:

len(book['Book.Title'].unique())

9659

In [9]:
duplicate = book[book.duplicated()]
duplicate

Unnamed: 0,User.ID,Book.Title,Book.Rating
5051,2152,Le nouveau soleil de Teur,7
7439,3757,The Magician's Tale,7


### Use-Item Interaction Matrix

In [10]:
user_book_df = book.pivot_table(index = 'User.ID', columns = 'Book.Title', values = 'Book.Rating').reset_index(drop = True)

In [11]:
user_book_df.iloc[0:5,0:5]

Book.Title,"Jason, Madison &amp",Other Stories;Merril;1985;McClelland &amp,Repairing PC Drives &amp,'48,'O Au No Keia: Voices from Hawai'I's Mahu and Transgender Communities
0,,,,,
1,,,,,
2,,,,,
3,,,,,
4,,,,,


In [12]:
# Replacing index by unique UserIDs

user_book_df.index = book['User.ID'].unique()

In [13]:
user_book_df.iloc[0:5,0:5]

Book.Title,"Jason, Madison &amp",Other Stories;Merril;1985;McClelland &amp,Repairing PC Drives &amp,'48,'O Au No Keia: Voices from Hawai'I's Mahu and Transgender Communities
276726,,,,,
276729,,,,,
276736,,,,,
276737,,,,,
276744,,,,,


In [14]:
# Filling null values:

user_book_df.fillna(0, inplace = True)

In [15]:
user_book_df.iloc[0:5,0:5]

Book.Title,"Jason, Madison &amp",Other Stories;Merril;1985;McClelland &amp,Repairing PC Drives &amp,'48,'O Au No Keia: Voices from Hawai'I's Mahu and Transgender Communities
276726,0.0,0.0,0.0,0.0,0.0
276729,0.0,0.0,0.0,0.0,0.0
276736,0.0,0.0,0.0,0.0,0.0
276737,0.0,0.0,0.0,0.0,0.0
276744,0.0,0.0,0.0,0.0,0.0


# Cosine Similarities between users: 

In [16]:
from sklearn.metrics import pairwise_distances
from scipy.spatial.distance import cosine, correlation

# User Based Collaborative Filtering

In [17]:
user_sim = 1 - pairwise_distances(user_book_df, metric = 'cosine')

In [18]:
user_sim

array([[1., 0., 0., ..., 0., 0., 0.],
       [0., 1., 0., ..., 0., 0., 0.],
       [0., 0., 1., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 1., 0., 0.],
       [0., 0., 0., ..., 0., 1., 0.],
       [0., 0., 0., ..., 0., 0., 1.]])

In [19]:
user_sim_df = pd.DataFrame(user_sim)

In [20]:
# setting index and column name to UserID

user_sim_df.index = book['User.ID'].unique()

In [21]:
user_sim_df.columns = book['User.ID'].unique()

In [22]:
user_sim_df.iloc[0:5,0:5]

Unnamed: 0,276726,276729,276736,276737,276744
276726,1.0,0.0,0.0,0.0,0.0
276729,0.0,1.0,0.0,0.0,0.0
276736,0.0,0.0,1.0,0.0,0.0
276737,0.0,0.0,0.0,1.0,0.0
276744,0.0,0.0,0.0,0.0,1.0


In [23]:
# Resetting diagonal values

np.fill_diagonal(user_sim,0)

In [24]:
user_sim_df.iloc[0:5,0:5]

Unnamed: 0,276726,276729,276736,276737,276744
276726,0.0,0.0,0.0,0.0,0.0
276729,0.0,0.0,0.0,0.0,0.0
276736,0.0,0.0,0.0,0.0,0.0
276737,0.0,0.0,0.0,0.0,0.0
276744,0.0,0.0,0.0,0.0,0.0


In [25]:
# Finding Most Similar Users:

user_sim_df.idxmax(axis = 1)[:-20]

276726    276726
276729    276726
276736    276726
276737    276726
276744    276726
           ...  
162018    276726
162019    276726
162024    276726
162027    276726
162028    276726
Length: 2162, dtype: int64

In [26]:
book[(book['User.ID']==162018) | (book['Book.Title']==276726)]

Unnamed: 0,User.ID,Book.Title,Book.Rating
9707,162018,"Dragondrums (Mccaffrey, Anne. Harper Hall Tril...",8


### Recommendations for user: 

In [27]:
#recommended books for userid==162018

user1=book[book['User.ID']==162018]

user1

Unnamed: 0,User.ID,Book.Title,Book.Rating
9707,162018,"Dragondrums (Mccaffrey, Anne. Harper Hall Tril...",8


In [28]:
#recommended books for userid==276744

user2=book[book['User.ID']==276744]

user2

Unnamed: 0,User.ID,Book.Title,Book.Rating
5,276744,The Kitchen God's Wife,7


# Item Based Collaborative Filtering (IBCF)

In [29]:
#Fetching ratings for book 'PLEADING GUILTY'

book_user_rating = user_book_df['PLEADING GUILTY']  

In [30]:
book_user_rating

276726    0.0
276729    0.0
276736    0.0
276737    0.0
276744    0.0
         ... 
162107    0.0
162109    0.0
162113    0.0
162121    0.0
162129    0.0
Name: PLEADING GUILTY, Length: 2182, dtype: float64

In [31]:
# Finding correlation with other books

sim_to_book = user_book_df.corrwith(book_user_rating)

In [32]:
sim_to_book.head()

Book.Title
 Jason, Madison &amp                                                    -0.000459
 Other Stories;Merril;1985;McClelland &amp                              -0.000459
 Repairing PC Drives &amp                                               -0.000459
'48                                                                     -0.000459
'O Au No Keia: Voices from Hawai'I's Mahu and Transgender Communities   -0.000459
dtype: float64

In [33]:
# Creating a dataframe for 'sim_to_book'

corr_book = pd.DataFrame(sim_to_book, columns = ['Correlation'])

In [34]:
corr_book.head()

Unnamed: 0_level_0,Correlation
Book.Title,Unnamed: 1_level_1
"Jason, Madison &amp",-0.000459
Other Stories;Merril;1985;McClelland &amp,-0.000459
Repairing PC Drives &amp,-0.000459
'48,-0.000459
'O Au No Keia: Voices from Hawai'I's Mahu and Transgender Communities,-0.000459


In [35]:
# Selecting the books having positive correlation with book 'PLEADING GUILTY'

corr_book = corr_book[corr_book['Correlation'] > 0].sort_values(by = 'Correlation', ascending = False)

In [36]:
corr_book.head()

Unnamed: 0_level_0,Correlation
Book.Title,Unnamed: 1_level_1
PLEADING GUILTY,1.0
Under the Black Flag: The Romance and the Reality of Life Among the Pirates,1.0
Where You'll Find Me: And Other Stories,1.0
Hitler's Secret Bankers: The Myth of Swiss Neutrality During the Holocaust,1.0
Nights Below Station Street,1.0


### So we can recomend these books to the user who's reading 'PLEADING GUILTY'.