#### A Demonstration of Item-Based Collaborative filtering

*Item-based collaborative filtering is a technique of generating recommendations, where items similar to the items liked by the user is recommended to the user.*

Following are the steps:


*   Generate a user - interest matrix (For eg: user - rating matrix)
*   Here each row represents a kind of user vector, and column represents an interest/item vector.
*   Find similar items by computing similarity metric between each item vector with all other item vectors.

**Dataset used:** <br>
MovieLens Dataset 1M (http://files.grouplens.org/datasets/movielens/ml-latest-small.zip)



In [None]:
import os
if not os.path.isfile('ml-latest-small.zip'):
  !wget http://files.grouplens.org/datasets/movielens/ml-latest-small.zip
  !unzip 'ml-latest-small.zip'

In [None]:
v1 = [1, 2 ,3]
v2 = [2, 4, 5]
v1_norm = np.linalg.norm(v1)
v2_norm = np.linalg.norm(v2)
cos_dist = np.dot(v1, v2) / (v1_norm * v2_norm)
print(cos_dist)

0.9960238411119946


In [None]:
import pandas as pd
import plotly.graph_objects as go
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
base_path_1 = 'ml-25m'
base_path_2 = 'ml-latest-small'
df_genome_scores = pd.read_csv(os.path.join(base_path_1, 'genome-scores.csv'))
df_genome_tags = pd.read_csv(os.path.join(base_path_1, 'genome-tags.csv'))
df_links = pd.read_csv(os.path.join(base_path_2, 'links.csv'))
df_movies = pd.read_csv(os.path.join(base_path_2, 'movies.csv'))
df_ratings = pd.read_csv(os.path.join(base_path_2, 'ratings.csv'))
df_tags = pd.read_csv(os.path.join(base_path_2, 'tags.csv'))

In [None]:
df_movies.shape

(9742, 3)

In [None]:
df_ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


In [None]:
### CHECK GENRES ####
genres = []
def num_genres(x):
  return len(x.split('|'))

for genre in df_movies['genres']:
  for x in genre.split('|'):
    genres.append(x)
genres = set(genres)

df_movies['num_genres'] = df_movies['genres'].map(lambda x: num_genres(x))

In [None]:
df_genome_scores

Unnamed: 0,movieId,tagId,relevance
0,1,1,0.02875
1,1,2,0.02375
2,1,3,0.06250
3,1,4,0.07575
4,1,5,0.14075
...,...,...,...
15584443,206499,1124,0.11000
15584444,206499,1125,0.04850
15584445,206499,1126,0.01325
15584446,206499,1127,0.14025


In [None]:
df_user_ratings = pd.merge(df_ratings, df_movies, on = ['movieId'])
df_user_ratings

Unnamed: 0,userId,movieId,rating,timestamp,title,genres,num_genres
0,1,1,4.0,964982703,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,5
1,5,1,4.0,847434962,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,5
2,7,1,4.5,1106635946,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,5
3,15,1,2.5,1510577970,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,5
4,17,1,4.5,1305696483,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,5
...,...,...,...,...,...,...,...
100831,610,160341,2.5,1479545749,Bloodmoon (1997),Action|Thriller,2
100832,610,160527,4.5,1479544998,Sympathy for the Underdog (1971),Action|Crime|Drama,3
100833,610,160836,3.0,1493844794,Hazard (2005),Action|Drama|Thriller,3
100834,610,163937,3.5,1493848789,Blair Witch (2016),Horror|Thriller,2


In [None]:
def ret_count(x):
  return len(x)

In [None]:
df_aggr = df_user_ratings.groupby(['userId']).aggregate({'movieId':ret_count})

In [None]:
df_aggr

Unnamed: 0_level_0,movieId
userId,Unnamed: 1_level_1
1,232
2,29
3,39
4,216
5,44
...,...
606,1115
607,187
608,831
609,37


In [None]:
total_movies = df_movies.shape[0]
df_aggr['percentage_movies_rated'] = df_aggr['movieId'] * 100/ total_movies 

In [None]:
df_aggr

Unnamed: 0_level_0,movieId,percentage_movies_rated
userId,Unnamed: 1_level_1,Unnamed: 2_level_1
1,232,2.381441
2,29,0.297680
3,39,0.400328
4,216,2.217204
5,44,0.451653
...,...,...
606,1115,11.445288
607,187,1.919524
608,831,8.530076
609,37,0.379799


In [None]:
sort_by_num_reviews = df_aggr.sort_values(by = 'movieId', ascending = False)
sort_by_num_reviews

Unnamed: 0_level_0,movieId,percentage_movies_rated
userId,Unnamed: 1_level_1,Unnamed: 2_level_1
414,2698,27.694519
599,2478,25.436255
474,2108,21.638267
448,1864,19.133648
274,1346,13.816465
...,...,...
442,20,0.205297
569,20,0.205297
320,20,0.205297
576,20,0.205297


In [None]:
import plotly.graph_objects as go
import plotly.express as px
val_cnts = sort_by_num_reviews['movieId'].value_counts()
indices = sort_by_num_reviews[sort_by_num_reviews['movieId'] >= 100].index
user_rating_slice = df_user_ratings.loc[df_user_ratings['userId'].isin(indices), :]

In [None]:
cumsum = np.cumsum(sort_by_num_reviews['movieId'])
sort_by_num_reviews['data_covered'] = cumsum
sort_by_num_reviews['percent_data_covered'] = cumsum * 100 / df_user_ratings.shape[0]

In [None]:
sort_by_num_reviews

Unnamed: 0_level_0,movieId,percentage_movies_rated,data_covered,percent_data_covered
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
414,2698,27.694519,2698,2.675632
599,2478,25.436255,5176,5.133087
474,2108,21.638267,7284,7.223611
448,1864,19.133648,9148,9.072157
274,1346,13.816465,10494,10.406998
...,...,...,...,...
442,20,0.205297,100756,99.920663
569,20,0.205297,100776,99.940497
320,20,0.205297,100796,99.960332
576,20,0.205297,100816,99.980166


In [None]:
sort_by_num_reviews[sort_by_num_reviews['percent_data_covered'] <= 40].iloc[-1]['data_covered']

40054.0

In [None]:
indices = sort_by_num_reviews[sort_by_num_reviews['percent_data_covered'] <= 40].index

In [None]:
df_user_ratings_train = df_user_ratings[df_user_ratings['userId'].isin(indices)]

In [None]:
df_user_ratings_test = df_user_ratings[~df_user_ratings['userId'].isin(indices)]

assert (df_user_ratings_train.shape[0] + df_user_ratings_test.shape[0] == df_user_ratings.shape[0])

In [None]:
df_user_ratings.reset_index(drop=True)

Unnamed: 0,userId,movieId,rating,timestamp,title,genres,num_genres
0,18,1,3.5,1455209816,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,5
1,19,1,4.0,965705637,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,5
2,64,1,4.0,1161520134,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,5
3,68,1,2.5,1158531426,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,5
4,89,1,3.0,1520408314,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,5
...,...,...,...,...,...,...,...
40049,610,160341,2.5,1479545749,Bloodmoon (1997),Action|Thriller,2
40050,610,160527,4.5,1479544998,Sympathy for the Underdog (1971),Action|Crime|Drama,3
40051,610,160836,3.0,1493844794,Hazard (2005),Action|Drama|Thriller,3
40052,610,163937,3.5,1493848789,Blair Witch (2016),Horror|Thriller,2


In [None]:
def get_release_year(x):
  yr = x.split()[-1]
  yr = yr[1:-1]
  return yr

release_year = df_user_ratings['title'].map(lambda x: get_release_year(x))

In [None]:
df_user_ratings = pd.merge(df_user_ratings, df_genome_scores, on = ['movieId'])

In [None]:
df_user_table = pd.pivot_table(df_user_ratings, values = 'rating', index = ['userId'], columns = ['movieId'], fill_value = 0.0)

In [None]:
df_user_table

movieId,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,34,36,38,39,41,42,43,44,...,176751,176935,177593,177615,177763,177765,178061,178323,178613,179119,179133,179135,179401,179817,179819,179953,180031,180045,180095,180265,180297,180497,180985,181315,182715,182823,183197,183227,184015,184253,184257,184471,185029,185435,185585,187593,187595,188301,188675,188833
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1
18,3.5,3.0,0.0,0.0,0.0,4.0,0.0,0,0.0,0.0,0.0,0.0,0,0,0.0,4.5,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,4.0,2.5,4.0,0,0.0,0.0,0,0.0,0.0,...,0,0,4.5,0.0,0.0,0,0,0.0,0,0,0,0,0,0,0.0,0.0,0.0,0,3,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0.0,0,0.0,0,0.0,0,0.0,0.0
19,4.0,3.0,3.0,0.0,0.0,0.0,2.0,0,0.0,2.0,0.0,1.0,3,0,2.0,0.0,0.0,0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,4.0,4.0,0.0,0,0.0,0.0,0,0.0,3.0,...,0,0,0.0,0.0,0.0,0,0,0.0,0,0,0,0,0,0,0.0,0.0,0.0,0,0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0.0,0,0.0,0,0.0,0,0.0,0.0
28,0.0,0.0,0.0,0.0,0.0,3.5,0.0,0,0.0,0.0,0.0,0.0,0,0,0.0,2.5,0.0,0,0.0,0.0,3.0,0.0,1.5,0.0,0.0,0.0,0.0,0.0,0.0,0,2.5,3.5,0.0,0.0,0,0.0,0.0,0,0.0,0.0,...,0,0,0.0,0.0,0.0,0,0,0.0,0,0,0,0,0,0,0.0,0.0,0.0,0,0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0.0,0,0.0,0,0.0,0,0.0,0.0
64,4.0,0.0,3.5,0.0,0.0,4.5,0.0,0,0.0,0.0,0.0,0.0,0,0,0.0,5.0,0.0,0,3.5,0.0,0.0,3.5,0.0,0.0,3.5,0.0,0.0,0.0,0.0,0,0.0,4.0,4.5,4.0,0,4.0,0.0,0,0.0,0.0,...,0,0,0.0,0.0,0.0,0,0,0.0,0,0,0,0,0,0,0.0,0.0,0.0,0,0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0.0,0,0.0,0,0.0,0,0.0,0.0
68,2.5,2.5,2.0,0.0,2.0,4.0,2.0,0,0.0,4.5,4.5,0.0,0,0,0.0,3.5,3.5,2,1.5,0.0,0.0,0.0,0.0,0.0,3.5,3.0,0.0,0.0,0.0,0,3.5,0.0,1.0,0.0,0,4.0,0.0,0,0.0,3.0,...,0,0,0.0,0.0,0.0,0,0,0.0,0,0,0,0,0,0,0.0,0.0,0.0,0,0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0.0,0,0.0,0,0.0,0,0.0,0.0
89,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,2.5,0.0,0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0,0.0,0.0,0,0.0,0.0,...,0,0,0.0,0.0,0.0,2,0,0.0,0,0,0,0,0,0,0.0,0.0,0.0,0,0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0.0,0,0.0,0,0.0,0,0.0,0.0
91,4.0,3.0,3.0,0.0,0.0,5.0,0.0,0,0.0,3.5,0.0,0.0,0,0,0.0,4.5,0.0,0,2.0,0.0,4.0,3.5,0.0,0.0,2.0,0.0,0.0,0.0,4.0,0,0.0,4.0,0.0,3.0,0,1.5,4.0,0,0.0,0.0,...,0,0,0.0,0.0,0.0,0,0,0.0,0,0,0,0,0,0,0.0,0.0,0.0,0,0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0.0,0,0.0,0,0.0,0,0.0,0.0
105,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0,0.0,0.0,0.0,0.0,0,0,0.0,4.5,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,3.5,0.0,0.0,0,0.0,0.0,0,0.0,0.0,...,0,0,0.0,0.0,0.0,0,0,0.0,4,0,5,0,0,0,0.0,0.0,0.0,0,0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0.0,0,0.0,0,0.0,0,0.0,0.0
111,0.0,0.0,0.0,0.0,3.5,0.0,0.0,0,0.0,0.0,0.0,0.0,0,0,0.0,4.5,0.0,0,0.0,0.0,0.0,0.0,0.0,2.5,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,2.5,0.0,0,4.0,0.0,0,0.0,0.0,...,0,0,0.0,0.0,0.0,0,0,3.5,0,0,0,0,0,0,0.0,3.5,0.0,0,0,4.5,0,0,0.0,0.0,0.0,0.0,3.5,3.5,3.5,0,4,0,0.0,0,0.0,0,0.0,0,0.0,0.0
140,3.0,3.5,0.0,0.0,0.0,5.0,0.0,0,0.0,0.0,4.0,0.0,0,0,0.0,0.0,0.0,0,0.0,0.0,4.0,3.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,4.0,0.0,0,0.0,0.0,0,0.0,0.0,...,0,0,0.0,0.0,0.0,0,0,0.0,0,0,0,0,0,0,0.0,0.0,0.0,0,0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0.0,0,0.0,0,0.0,0,0.0,0.0


In [None]:
from scipy.stats import pearsonr
def compute_correlation(x, y):
  return pearsonr(x, y)

In [None]:
from sklearn.metrics import pairwise_distances
user_similarity = pairwise_distances(df_user_table, metric = 'correlation')
user_similarity = pd.DataFrame(user_similarity)
user_similarity.index = df_user_table.index
user_similarity.columns = df_user_table.index

In [None]:
user_similarity

userId,18,19,28,64,68,89,91,105,111,140,177,182,217,219,226,232,249,274,288,298,305,307,318,380,387,414,438,448,474,477,480,483,489,534,555,561,590,599,600,603,606,608,610
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1
18,0.0,0.806386,0.651304,0.6749562,0.596642,0.9146078,0.675384,0.641927,0.775163,0.764935,0.659819,0.721155,0.823052,0.603782,0.6972968,0.6579647,0.5471153,0.629133,0.720768,0.5858044,0.556268,0.663843,0.651426,0.5936405,0.7298183,0.6320995,0.704546,0.6415292,0.7474009,0.6551,0.6333906,0.6125439,0.683699,0.670189,0.8237496,0.579865,0.6060339,0.639518,0.74479,0.826479,0.7476073,0.663611,0.62275
19,0.806386,0.0,0.8349492,0.7054169,0.690138,0.9217087,0.61717,0.930309,0.8910134,0.81677,0.77286,0.719016,0.564098,0.711367,0.70127,0.859784,0.8116399,0.686761,0.627351,0.7974774,0.825091,0.7159955,0.875236,0.687535,0.7287037,0.6591052,0.750672,0.7022997,0.7120003,0.711062,0.7099942,0.7007898,0.709815,0.8189081,0.5835851,0.70048,0.6725222,0.651743,0.664079,0.7266028,0.8003193,0.642284,0.857695
28,0.651304,0.834949,1.110223e-16,0.6988138,0.664209,0.9658152,0.7002,0.717131,0.8948519,0.669273,0.784839,0.691727,0.829534,0.606181,0.7398802,0.6640173,0.6920232,0.657941,0.703428,0.6827998,0.627984,0.698549,0.726496,0.6828187,0.7471529,0.6168909,0.698167,0.7083042,0.7130804,0.714552,0.6043718,0.6865147,0.74837,0.8195605,0.8030587,0.658958,0.6373095,0.671576,0.791888,0.8093143,0.7559143,0.654726,0.684827
64,0.674956,0.705417,0.6988138,2.220446e-16,0.56479,0.9002952,0.59259,0.75028,0.8012197,0.712784,0.678541,0.604462,0.757634,0.599396,0.5547055,0.7034499,0.7200329,0.591226,0.59896,0.673604,0.710528,0.5717108,0.767213,0.6475527,0.6463684,0.5832746,0.643615,0.6704841,0.625104,0.645987,0.4825359,0.6251277,0.612279,0.7706448,0.7081044,0.637067,0.5981764,0.612501,0.606817,0.7343297,0.6964497,0.511817,0.716922
68,0.596642,0.690138,0.6642091,0.5647896,0.0,0.8606034,0.653721,0.667361,0.6659281,0.67116,0.512751,0.664503,0.731535,0.525434,0.5662334,0.526099,0.5078941,0.496382,0.594781,0.5524322,0.61759,0.5593062,0.666,0.5213598,0.7074422,0.475929,0.553375,0.5535679,0.6516271,0.614002,0.5138067,0.5586472,0.558701,0.6121266,0.7332397,0.540166,0.5437916,0.526204,0.627131,0.7931013,0.7000115,0.50717,0.597858
89,0.914608,0.921709,0.9658152,0.9002952,0.860603,1.110223e-16,0.927803,0.907197,0.8000199,0.953969,0.835215,0.924466,0.948316,0.901227,0.8829857,0.8513181,0.8355383,0.875824,0.898771,0.847183,0.914126,0.8926139,0.897937,0.8891138,0.9206353,0.8651527,0.895223,0.8538284,0.8958337,0.905094,0.8883343,0.8634617,0.9014,0.8504046,0.9449104,0.903457,0.8958349,0.902881,0.906945,0.9463307,0.9196362,0.912516,0.885568
91,0.675384,0.61717,0.7002,0.5925902,0.653721,0.9278034,0.0,0.813093,0.8944552,0.692415,0.735706,0.613737,0.683448,0.596286,0.6491307,0.7776293,0.736384,0.589053,0.590509,0.700381,0.702065,0.6389563,0.800146,0.6040532,0.6268812,0.6175802,0.68754,0.6663268,0.6794958,0.648242,0.5778927,0.6487524,0.648921,0.781349,0.6735814,0.618265,0.5747763,0.595888,0.649597,0.7122723,0.7482651,0.559009,0.726638
105,0.641927,0.930309,0.7171308,0.7502803,0.667361,0.9071971,0.813093,0.0,0.7906903,0.816307,0.686149,0.766701,0.952177,0.709103,0.7835686,0.6910932,0.6254403,0.694297,0.800948,0.6280008,0.617876,0.7500138,0.695731,0.7101715,0.7817503,0.6964342,0.778468,0.7483232,0.7913162,0.755337,0.7084773,0.6690005,0.739347,0.7800233,0.9234658,0.734276,0.7202826,0.744631,0.807464,0.8673204,0.7389422,0.740893,0.663625
111,0.775163,0.891013,0.8948519,0.8012197,0.665928,0.8000199,0.894455,0.79069,1.110223e-16,0.909744,0.704291,0.868766,0.945919,0.760719,0.7635048,0.6831051,0.6276038,0.716662,0.83375,0.6799434,0.771613,0.74914,0.807537,0.7698524,0.8814423,0.7524254,0.783622,0.7273651,0.8783294,0.833541,0.7685842,0.7441744,0.783005,0.7134373,0.9133904,0.809217,0.8132612,0.797252,0.818644,0.9384039,0.889775,0.775752,0.76896
140,0.764935,0.81677,0.6692728,0.7127837,0.67116,0.9539688,0.692415,0.816307,0.9097438,0.0,0.744447,0.708662,0.764581,0.709431,0.752571,0.7182685,0.7933529,0.730498,0.63528,0.8217797,0.761661,0.7507619,0.842085,0.7430124,0.7992894,0.6226857,0.650139,0.713191,0.6779584,0.791188,0.6207634,0.7529389,0.71694,0.8645798,0.816203,0.734941,0.5928191,0.71111,0.796469,0.7770653,0.7534721,0.677593,0.817362


In [None]:
movie_similarity = pairwise_distances(df_user_table.T, metric = 'correlation')

In [None]:
movie_similarity = pd.DataFrame(movie_similarity)
movie_similarity.index = df_user_table.columns.values
movie_similarity.columns = df_user_table.columns.values

In [None]:
def get_similar_users(user, k = 5):
  return user_similarity[user].sort_values()[-k:].index.values

def get_movies_watched_by_user(user):
  movies_watched_by_user = df_user_ratings.loc[df_user_ratings['userId'] == user, 'movieId']
  return movies_watched_by_user.values

def get_ratings_given_by_user(user):
  movies_rated_by_user = df_user_ratings.loc[df_user_ratings['userId'] == user, ['movieId','rating']]
  return movies_rated_by_user


get_ratings_given_by_user(91)

Unnamed: 0,movieId,rating
32,1,4.0
226,3,3.0
284,6,5.0
400,47,4.5
607,50,4.5
...,...,...
87551,8501,4.0
87555,8507,2.5
87564,8581,4.0
87568,27317,4.5


In [None]:
movies_watched = get_movies_watched_by_user(91)
similar_movies = movie_similarity[movies_watched]
similar_movies

Unnamed: 0,1,3,6,47,50,110,151,163,223,231,235,260,296,316,349,356,367,457,480,500,543,552,553,590,592,593,608,648,733,736,780,919,923,1073,1080,1089,1092,1097,1127,1136,...,4987,5165,5294,5346,5438,5479,5588,5640,5650,5749,6203,6219,6283,6327,6375,6395,6461,6620,6629,6638,6678,6731,6744,6820,6963,7118,7308,7492,7561,7650,7802,8016,8147,8266,8426,8501,8507,8581,27317,27873
1,0.000000,0.361097,0.335116,0.158582,0.238453,0.214072,0.598937,0.421685,0.231384,0.214509,0.444600,0.091282,0.164038,0.308674,0.453022,0.142121,0.124870,0.146102,0.100647,0.179798,0.417947,0.431507,0.441899,0.368657,0.188997,0.189098,0.171945,0.176853,0.239423,0.212061,0.139574,0.202814,0.315196,0.229591,0.335363,0.217045,0.429654,0.143217,0.351925,0.182577,...,0.772603,0.753467,0.608086,0.773400,0.639938,0.673643,0.753467,0.825675,0.639375,0.825675,0.784283,0.735802,0.503147,0.753467,0.674565,0.825675,0.813959,0.477594,0.742870,0.728693,0.825675,0.628226,0.727380,0.581806,0.753892,0.885206,0.825675,0.825675,0.825675,0.848643,0.546523,0.755370,0.825675,0.733920,0.825675,0.740205,0.656837,0.643207,0.681684,0.701350
2,0.197686,0.520185,0.438007,0.132538,0.338576,0.344115,0.566255,0.466045,0.276409,0.281577,0.491596,0.181831,0.213472,0.338891,0.465955,0.179339,0.226049,0.239056,0.147400,0.135430,0.568508,0.460290,0.506106,0.443040,0.303543,0.245820,0.234593,0.143019,0.225467,0.211982,0.128563,0.240110,0.395878,0.214907,0.355241,0.319987,0.614559,0.216913,0.433959,0.228023,...,0.806824,0.744754,0.717425,0.783439,0.640928,0.661147,0.744754,0.833398,0.689588,0.833398,0.784023,0.663342,0.621559,0.764389,0.683328,0.833398,0.773710,0.589304,0.800077,0.731518,0.833398,0.699112,0.748142,0.672393,0.764795,0.890292,0.833398,0.833398,0.833398,0.855349,0.593268,0.722372,0.833398,0.734074,0.833398,0.775497,0.614061,0.806385,0.778905,0.714581
3,0.361097,0.000000,0.673720,0.462628,0.505334,0.471818,0.580927,0.560809,0.509937,0.287829,0.571195,0.380054,0.439196,0.443470,0.509964,0.412685,0.377719,0.405253,0.416385,0.411419,0.312530,0.604348,0.318948,0.460966,0.453647,0.435494,0.403234,0.510847,0.434559,0.449321,0.461447,0.431763,0.469196,0.440355,0.571478,0.451166,0.311613,0.304746,0.427504,0.388654,...,0.671123,0.830294,0.633239,0.658321,0.482737,0.622804,0.830294,0.760000,0.460771,0.760000,0.745442,0.861436,0.711210,0.830294,0.592693,0.760000,0.869607,0.546869,0.616000,0.623833,0.760000,0.854750,0.737272,0.692127,0.840553,0.841959,0.760000,0.760000,0.760000,0.791622,0.795212,0.852654,0.760000,0.658419,0.760000,0.868444,0.923491,0.880466,0.827579,0.689826
4,0.891047,1.000000,1.000000,0.841014,1.000000,0.910780,0.716842,1.000000,0.876496,0.795353,1.000000,0.855827,0.839260,1.000000,0.828659,0.848435,0.830902,0.858618,0.872943,0.799119,0.820971,0.697646,1.000000,1.000000,0.877169,0.842317,0.834691,0.848089,1.000000,0.830872,0.904681,0.857466,0.794262,0.756676,1.000000,1.000000,1.000000,0.908879,0.770214,0.810010,...,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,0.774187,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,0.553696,1.000000,1.000000,1.000000
5,0.571606,0.652703,0.762871,0.579723,0.634723,0.576711,0.619519,0.747940,0.728858,0.613811,0.772505,0.571937,0.560271,0.657420,0.474007,0.480668,0.591007,0.542595,0.529842,0.511020,0.706884,0.410677,0.654016,0.651241,0.509422,0.581943,0.554037,0.580760,0.583307,0.611913,0.583243,0.532974,0.488032,0.512583,0.619424,0.679323,0.706786,0.536610,0.643732,0.553567,...,0.921554,1.000000,0.850029,0.907869,0.732672,0.759193,1.000000,1.000000,0.864059,1.000000,1.000000,1.000000,0.824098,0.868441,0.747399,1.000000,1.000000,0.698907,0.851158,0.868570,1.000000,1.000000,0.903014,0.824139,0.860943,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,0.755232,1.000000,0.826767,1.000000,1.000000,0.766709,0.845559,1.000000,0.725996
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
187593,0.801417,1.000000,0.784291,0.689527,0.677434,0.785114,0.754240,0.684100,0.794191,0.870592,0.814738,0.694991,0.720979,0.769255,1.000000,0.704018,0.743162,0.665617,0.751879,0.709054,1.000000,1.000000,1.000000,0.848176,0.862475,0.692071,0.720222,0.673678,0.628900,0.788622,0.661841,0.789694,1.000000,0.765582,0.729181,0.697866,1.000000,0.839850,0.897432,0.848844,...,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,0.842417,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,0.814854,1.000000,1.000000,1.000000,1.000000,0.812672,1.000000,1.000000,1.000000,1.000000
187595,0.721214,0.789279,0.707449,0.745746,0.726029,0.718831,0.689232,0.611952,0.688630,0.708476,0.833410,0.745700,0.747981,0.660978,0.830757,0.732662,0.639432,0.702964,0.729076,0.749611,0.811374,0.829342,0.809159,0.863478,0.759656,0.747829,0.739521,0.714188,0.753740,0.713617,0.730994,0.767586,0.830651,0.725320,0.633083,0.743382,0.800745,0.742841,0.672665,0.702121,...,1.000000,1.000000,0.800950,0.755438,0.645187,0.634727,1.000000,1.000000,0.639142,1.000000,1.000000,1.000000,0.805949,1.000000,1.000000,1.000000,1.000000,0.776952,1.000000,0.651116,1.000000,0.696355,1.000000,0.733243,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,0.643244,1.000000,1.000000,0.737228,1.000000,0.639040,1.000000,1.000000,1.000000,0.584372
188301,0.782093,1.000000,0.745094,0.801268,0.826128,0.821560,1.000000,0.696691,0.802394,0.766118,1.000000,0.819783,0.821400,0.704602,1.000000,0.810544,0.718170,0.811491,0.788238,0.799119,1.000000,1.000000,1.000000,1.000000,0.852603,0.802896,0.834691,0.797452,0.825481,0.797047,0.809362,0.857466,1.000000,0.805341,0.739975,0.837141,1.000000,0.817758,0.737387,0.788900,...,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,0.596527,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,0.525955,1.000000,1.000000,1.000000,1.000000,0.520368,1.000000,1.000000,1.000000,1.000000
188675,1.000000,1.000000,0.796075,0.841014,1.000000,1.000000,1.000000,1.000000,0.777693,1.000000,0.778641,0.891870,0.839260,1.000000,1.000000,0.829489,1.000000,1.000000,0.830591,0.824229,1.000000,1.000000,1.000000,0.818594,1.000000,0.842317,0.834691,0.848089,1.000000,1.000000,0.809362,1.000000,1.000000,1.000000,0.791980,0.837141,1.000000,0.840538,1.000000,0.852230,...,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000


#### MAKING SOME RECOMMENDATIONS

In [None]:
def get_similar_movies(movieId):
  movie_sims = movie_similarity.T[movieId].sort_values()[:5].index
  movie_titles = df_movies[df_movies['movieId'].isin(movie_sims)]['title']
  return movie_titles

print("Test Movie Name: ", df_movies[df_movies['movieId'] == 50][['title', 'genres']].values[0][0])
print("Top 5 similar movies: \n")
movie_sims = get_similar_movies(50)
for movie in movie_sims:
  print(df_movies.loc[df_movies['title'] == movie, ['title', 'genres']].values[0][0])
# def get_recommendations(user_id):
#   user_vector = df_user_table[user_id]
#   for movies in user_vector:
#     if user_vector[movies] >= 4.0:


Test Movie Name:  Usual Suspects, The (1995)
Top 5 similar movies: 

Usual Suspects, The (1995)
Godfather, The (1972)
Reservoir Dogs (1992)
Goodfellas (1990)
Godfather: Part II, The (1974)
