# Collaborative Filtering Test

In [29]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity

In [30]:
ratings = pd.read_csv('toy_dataset.csv', index_col=0)
ratings = ratings.fillna(0)
ratings

Unnamed: 0,action1,action2,action3,romantic1,romantic2,romantic3
user 1,4.0,5.0,3.0,0.0,2.0,1.0
user 2,5.0,3.0,3.0,2.0,2.0,0.0
user 3,1.0,0.0,0.0,4.0,5.0,4.0
user 4,0.0,2.0,1.0,4.0,0.0,3.0
user 5,1.0,0.0,2.0,3.0,3.0,4.0


## Standardize Rating

In [31]:
def standardize(row):
    new_row = (row-row.mean()) / (row.max()-row.min())
    return new_row

ratings_std = ratings.apply(standardize)
ratings_std

Unnamed: 0,action1,action2,action3,romantic1,romantic2,romantic3
user 1,0.36,0.6,0.4,-0.65,-0.08,-0.35
user 2,0.56,0.2,0.4,-0.15,-0.08,-0.6
user 3,-0.24,-0.4,-0.6,0.35,0.52,0.4
user 4,-0.44,0.0,-0.266667,0.35,-0.48,0.15
user 5,-0.24,-0.4,0.066667,0.1,0.12,0.4


In [32]:
item_similarity = cosine_similarity(ratings_std.T)

In [33]:
print(item_similarity)

[[ 1.          0.70668875  0.81368151 -0.79941088 -0.02539184 -0.91410609]
 [ 0.70668875  1.          0.72310153 -0.84515425 -0.5189993  -0.84337386]
 [ 0.81368151  0.72310153  1.         -0.84794611 -0.3799803  -0.80218063]
 [-0.79941088 -0.84515425 -0.84794611  1.          0.14803913  0.72374686]
 [-0.02539184 -0.5189993  -0.3799803   0.14803913  1.          0.39393939]
 [-0.91410609 -0.84337386 -0.80218063  0.72374686  0.39393939  1.        ]]


In [34]:
item_similarity_data = pd.DataFrame(item_similarity, index=ratings.columns,columns=ratings.columns)
item_similarity_data

Unnamed: 0,action1,action2,action3,romantic1,romantic2,romantic3
action1,1.0,0.706689,0.813682,-0.799411,-0.025392,-0.914106
action2,0.706689,1.0,0.723102,-0.845154,-0.518999,-0.843374
action3,0.813682,0.723102,1.0,-0.847946,-0.37998,-0.802181
romantic1,-0.799411,-0.845154,-0.847946,1.0,0.148039,0.723747
romantic2,-0.025392,-0.518999,-0.37998,0.148039,1.0,0.393939
romantic3,-0.914106,-0.843374,-0.802181,0.723747,0.393939,1.0


In [35]:
def get_similar_movies (movie_name, user_rating):
    similar_score = item_similarity_data[movie_name]*(user_rating-2.5)
    similar_score = similar_score.sort_values(ascending=False)
    return similar_score

print(get_similar_movies('action1', 5))

action1      2.500000
action3      2.034204
action2      1.766722
romantic2   -0.063480
romantic1   -1.998527
romantic3   -2.285265
Name: action1, dtype: float64


In [36]:
action_fan = [('action1', 2), ('romantic3', 2), ('action3',4)]
similar_movies = pd.DataFrame()

for movie, rating in action_fan:
    similar_movies = similar_movies.append(get_similar_movies(movie, rating), ignore_index=True)
    
print(similar_movies.head())
print(similar_movies.sum().sort_values(ascending=False))

   romantic3  romantic1  romantic2   action2   action3   action1
0   0.457053   0.399705   0.012696 -0.353344 -0.406841 -0.500000
1  -0.500000  -0.361873  -0.196970  0.421687  0.401090  0.457053
2  -1.203271  -1.271919  -0.569970  1.084652  1.500000  1.220522
action3      1.494250
action1      1.177575
action2      1.152995
romantic2   -0.754244
romantic1   -1.234087
romantic3   -1.246218
dtype: float64


  similar_movies = similar_movies.append(get_similar_movies(movie, rating), ignore_index=True)
  similar_movies = similar_movies.append(get_similar_movies(movie, rating), ignore_index=True)
  similar_movies = similar_movies.append(get_similar_movies(movie, rating), ignore_index=True)
