In [3]:
## Importing Libraries
import pandas as pd
from scipy import sparse
from sklearn .metrics.pairwise import cosine_similarity

In [13]:
# Reading the dataset
ratings=pd.read_csv("toy_dataset.csv",index_col=0)

In [14]:
ratings

Unnamed: 0,action1,action2,action3,romantic1,romantic2,romantic3
user 1,4.0,5.0,3.0,,2.0,1.0
user 2,5.0,3.0,3.0,2.0,2.0,
user 3,1.0,,,4.0,5.0,4.0
user 4,,2.0,1.0,4.0,,3.0
user 5,1.0,,2.0,3.0,3.0,4.0


In [16]:
ratings=ratings.fillna(0)
ratings

Unnamed: 0,action1,action2,action3,romantic1,romantic2,romantic3
user 1,4.0,5.0,3.0,0.0,2.0,1.0
user 2,5.0,3.0,3.0,2.0,2.0,0.0
user 3,1.0,0.0,0.0,4.0,5.0,4.0
user 4,0.0,2.0,1.0,4.0,0.0,3.0
user 5,1.0,0.0,2.0,3.0,3.0,4.0


In [21]:
# Function to get new rating range 
def standardize(row):
    new_row = (row - row.mean())/(row.max()-row.min())
    return new_row

# Transposed as we're building a item to item based collaborative filtering
ratings_std = ratings.apply(standardize).T
ratings_std



Unnamed: 0,user 1,user 2,user 3,user 4,user 5
action1,0.36,0.56,-0.24,-0.44,-0.24
action2,0.6,0.2,-0.4,0.0,-0.4
action3,0.4,0.4,-0.6,-0.266667,0.066667
romantic1,-0.65,-0.15,0.35,0.35,0.1
romantic2,-0.08,-0.08,0.52,-0.48,0.12
romantic3,-0.35,-0.6,0.4,0.15,0.4


In [50]:
# Getting similarity among movies
item_similarity=cosine_similarity(ratings_std)

print(item_similarity)

[[ 1.          0.70668875  0.81368151 -0.79941088 -0.02539184 -0.91410609]
 [ 0.70668875  1.          0.72310153 -0.84515425 -0.5189993  -0.84337386]
 [ 0.81368151  0.72310153  1.         -0.84794611 -0.3799803  -0.80218063]
 [-0.79941088 -0.84515425 -0.84794611  1.          0.14803913  0.72374686]
 [-0.02539184 -0.5189993  -0.3799803   0.14803913  1.          0.39393939]
 [-0.91410609 -0.84337386 -0.80218063  0.72374686  0.39393939  1.        ]]


In [51]:
item_similarity_df = pd.DataFrame(item_similarity,index=ratings.columns,columns=ratings.columns)
item_similarity_df

Unnamed: 0,action1,action2,action3,romantic1,romantic2,romantic3
action1,1.0,0.706689,0.813682,-0.799411,-0.025392,-0.914106
action2,0.706689,1.0,0.723102,-0.845154,-0.518999,-0.843374
action3,0.813682,0.723102,1.0,-0.847946,-0.37998,-0.802181
romantic1,-0.799411,-0.845154,-0.847946,1.0,0.148039,0.723747
romantic2,-0.025392,-0.518999,-0.37998,0.148039,1.0,0.393939
romantic3,-0.914106,-0.843374,-0.802181,0.723747,0.393939,1.0


In [52]:
## Making Recommendation

def get_similar_movies(movie_name , user_rating):
    similar_score= item_similarity_df[movie_name]*(user_rating-2.5)
    similar_score=similar_score.sort_values(ascending=False)
    
    return similar_score


print(get_similar_movies("action2",1))

romantic1    1.267731
romantic3    1.265061
romantic2    0.778499
action1     -1.060033
action3     -1.084652
action2     -1.500000
Name: action2, dtype: float64


In [53]:
# For a user who loves action movies
action_lover = [("action1",5),("romantic2",1),("romantic3",1)]

similar_movies = pd.DataFrame()



In [54]:
for movie,rating in action_lover:
    similar_movies = similar_movies.append(get_similar_movies(movie,rating),ignore_index=True)

similar_movies.head()
similar_movies.sum().sort_values(ascending=False)

action1      3.909247
action2      3.810282
action3      3.807445
romantic2   -2.154389
romantic1   -3.306206
romantic3   -4.376174
dtype: float64