In [98]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
from sklearn.metrics.pairwise import cosine_similarity

In [99]:
df=pd.read_excel("Ratings.xlsx",index_col=0)

In [100]:
df.head()

Unnamed: 0,Action1,Action2,Action3,Thriller1,Thriller2,Thriller3
User1,4.0,5.0,3.0,,2,1.0
User2,5.0,3.0,3.0,2.0,2,
User3,1.0,,,4.0,5,4.0
User4,,2.0,1.0,4.0,3,
User5,1.0,,2.0,3.0,3,4.0


In [101]:
df=df.fillna(0)

In [102]:
df.head()

Unnamed: 0,Action1,Action2,Action3,Thriller1,Thriller2,Thriller3
User1,4.0,5.0,3.0,0.0,2,1.0
User2,5.0,3.0,3.0,2.0,2,0.0
User3,1.0,0.0,0.0,4.0,5,4.0
User4,0.0,2.0,1.0,4.0,3,0.0
User5,1.0,0.0,2.0,3.0,3,4.0


In [103]:
def standardize(row):
    new_row=(row-row.mean())/(row.max()-row.min())
    return new_row

In [104]:
df1=df.apply(standardize)

In [105]:
df1.head()

Unnamed: 0,Action1,Action2,Action3,Thriller1,Thriller2,Thriller3
User1,0.36,0.6,0.4,-0.65,-0.333333,-0.2
User2,0.56,0.2,0.4,-0.15,-0.333333,-0.45
User3,-0.24,-0.4,-0.6,0.35,0.666667,0.55
User4,-0.44,0.0,-0.266667,0.35,0.0,-0.45
User5,-0.24,-0.4,0.066667,0.1,0.0,0.55


In [106]:
item_similarity=cosine_similarity(df1.T)

In [107]:
item_similarity

array([[ 1.        ,  0.70668875,  0.81368151, -0.79941088, -0.65908813,
        -0.43889541],
       [ 0.70668875,  1.        ,  0.72310153, -0.84515425, -0.76980036,
        -0.74757102],
       [ 0.81368151,  0.72310153,  1.        , -0.84794611, -0.93933644,
        -0.48651277],
       [-0.79941088, -0.84515425, -0.84794611,  1.        ,  0.73192505,
         0.33534681],
       [-0.65908813, -0.76980036, -0.93933644,  0.73192505,  1.        ,
         0.69721669],
       [-0.43889541, -0.74757102, -0.48651277,  0.33534681,  0.69721669,
         1.        ]])

In [108]:
item_similarity=pd.DataFrame(item_similarity,columns=df1.columns,index=df1.columns)

In [109]:
item_similarity

Unnamed: 0,Action1,Action2,Action3,Thriller1,Thriller2,Thriller3
Action1,1.0,0.706689,0.813682,-0.799411,-0.659088,-0.438895
Action2,0.706689,1.0,0.723102,-0.845154,-0.7698,-0.747571
Action3,0.813682,0.723102,1.0,-0.847946,-0.939336,-0.486513
Thriller1,-0.799411,-0.845154,-0.847946,1.0,0.731925,0.335347
Thriller2,-0.659088,-0.7698,-0.939336,0.731925,1.0,0.697217
Thriller3,-0.438895,-0.747571,-0.486513,0.335347,0.697217,1.0


In [118]:
# Movie recommendations
def recommendations(movie_name, ratings):
    similar_score = item_similarity[movie_name] * (ratings - 2.5)
    similar_score = similar_score.sort_values(ascending=False)
    return similar_score.reset_index().rename(columns={movie_name: 'Score', 'index': 'Movie'})


In [119]:
# Single Movie usage
print(recommendations("Thriller3", 5))

       Movie     Score
0  Thriller3  2.500000
1  Thriller2  1.743042
2  Thriller1  0.838367
3    Action1 -1.097239
4    Action3 -1.216282
5    Action2 -1.868928


In [129]:
## Multi Movie usage
action_lover = [("Action2", 5), ("Thriller2", 1), ("Thriller3", 1)]
similar_movies_list = []

In [130]:
for movie, rating in action_lover:
    recommended_movies = recommendations(movie, rating)
    similar_movies_list.append(recommended_movies)



In [131]:
similar_movies = pd.concat(similar_movies_list, ignore_index=True)

print(similar_movies.head())



       Movie     Score
0    Action2  2.500000
1    Action3  1.807754
2    Action1  1.766722
3  Thriller3 -1.868928
4  Thriller2 -1.924501


In [132]:
# Summing the scores and sorting
numeric_sums = similar_movies.groupby('Movie')['Score'].sum().sort_values(ascending=False)
print(numeric_sums)

Movie
Action2      4.776057
Action3      3.946528
Action1      3.413697
Thriller1   -3.713793
Thriller3   -4.414753
Thriller2   -4.470326
Name: Score, dtype: float64
