In [1]:
import pandas as pd

In [2]:
movies_df = pd.read_csv("C:/Users/exman/datasets/ml-latest-small/movies.csv", usecols=['movieId', 'title'])
ratings_df = pd.read_csv("C:/Users/exman/datasets/ml-latest-small/ratings.csv", usecols=['userId', 'movieId', 'rating'])

In [3]:
print('The ratings dataset has', ratings_df['userId'].nunique(), 'unique users')
print('The ratings dataset has', ratings_df['movieId'].nunique(), 'unique movies')
print('The ratings dataset has', ratings_df['rating'].nunique(), 'unique ratings')
print('The unique ratings are', sorted(ratings_df['rating'].unique()))

The ratings dataset has 610 unique users
The ratings dataset has 9724 unique movies
The ratings dataset has 10 unique ratings
The unique ratings are [0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5, 5.0]


In [4]:
df = pd.merge(ratings_df, movies_df, on='movieId', how='inner')
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 100836 entries, 0 to 100835
Data columns (total 4 columns):
 #   Column   Non-Null Count   Dtype  
---  ------   --------------   -----  
 0   userId   100836 non-null  int64  
 1   movieId  100836 non-null  int64  
 2   rating   100836 non-null  float64
 3   title    100836 non-null  object 
dtypes: float64(1), int64(2), object(1)
memory usage: 3.8+ MB


In [5]:
df.head()

Unnamed: 0,userId,movieId,rating,title
0,1,1,4.0,Toy Story (1995)
1,5,1,4.0,Toy Story (1995)
2,7,1,4.5,Toy Story (1995)
3,15,1,2.5,Toy Story (1995)
4,17,1,4.5,Toy Story (1995)


In [6]:
agg_ratings = df.groupby('movieId').agg(mean_rating = ('rating', 'mean'), number_of_ratings = ('rating', 'count')).reset_index()
agg_ratings

Unnamed: 0,movieId,mean_rating,number_of_ratings
0,1,3.920930,215
1,2,3.431818,110
2,3,3.259615,52
3,4,2.357143,7
4,5,3.071429,49
...,...,...,...
9719,193581,4.000000,1
9720,193583,3.500000,1
9721,193585,3.500000,1
9722,193587,3.500000,1


In [7]:
agg_ratings_GT40 = agg_ratings[agg_ratings['number_of_ratings'] > 40]
agg_ratings_GT40

Unnamed: 0,movieId,mean_rating,number_of_ratings
0,1,3.920930,215
1,2,3.431818,110
2,3,3.259615,52
4,5,3.071429,49
5,6,3.946078,102
...,...,...,...
8663,122882,3.819149,47
8665,122886,3.853659,41
8673,122904,3.833333,54
8861,134130,4.000000,48


In [8]:
df_GT40 = pd.merge(df, agg_ratings_GT40[['movieId']], on='movieId', how='inner')
df_GT40.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 48748 entries, 0 to 48747
Data columns (total 4 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   userId   48748 non-null  int64  
 1   movieId  48748 non-null  int64  
 2   rating   48748 non-null  float64
 3   title    48748 non-null  object 
dtypes: float64(1), int64(2), object(1)
memory usage: 1.9+ MB


In [9]:
df_GT40

Unnamed: 0,userId,movieId,rating,title
0,1,1,4.0,Toy Story (1995)
1,5,1,4.0,Toy Story (1995)
2,7,1,4.5,Toy Story (1995)
3,15,1,2.5,Toy Story (1995)
4,17,1,4.5,Toy Story (1995)
...,...,...,...,...
48743,590,52722,3.0,Spider-Man 3 (2007)
48744,596,52722,2.5,Spider-Man 3 (2007)
48745,599,52722,2.5,Spider-Man 3 (2007)
48746,606,52722,2.0,Spider-Man 3 (2007)


In [10]:
print('The ratings dataset has', df_GT40['userId'].nunique(), 'unique users')
print('The ratings dataset has', df_GT40['movieId'].nunique(), 'unique movies')
print('The ratings dataset has', df_GT40['rating'].nunique(), 'unique ratings')
print('The unique ratings are', sorted(df_GT40['rating'].unique()))

The ratings dataset has 608 unique users
The ratings dataset has 616 unique movies
The ratings dataset has 10 unique ratings
The unique ratings are [0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5, 5.0]


In [11]:
top_films = pd.merge(agg_ratings_GT40, movies_df[['movieId', 'title']], on='movieId', how='inner').sort_values('number_of_ratings', ascending=False).reset_index(drop=True)
top_films.to_csv('top_films.csv')
top_films.head(50)

Unnamed: 0,movieId,mean_rating,number_of_ratings,title
0,356,4.164134,329,Forrest Gump (1994)
1,318,4.429022,317,"Shawshank Redemption, The (1994)"
2,296,4.197068,307,Pulp Fiction (1994)
3,593,4.16129,279,"Silence of the Lambs, The (1991)"
4,2571,4.192446,278,"Matrix, The (1999)"
5,260,4.231076,251,Star Wars: Episode IV - A New Hope (1977)
6,480,3.75,238,Jurassic Park (1993)
7,110,4.031646,237,Braveheart (1995)
8,589,3.970982,224,Terminator 2: Judgment Day (1991)
9,527,4.225,220,Schindler's List (1993)


In [12]:
matrix = df_GT40.pivot_table(index='userId', columns='title', values='rating')
matrix

title,(500) Days of Summer (2009),10 Things I Hate About You (1999),101 Dalmatians (1996),101 Dalmatians (One Hundred and One Dalmatians) (1961),12 Angry Men (1957),2001: A Space Odyssey (1968),28 Days Later (2002),300 (2007),"40-Year-Old Virgin, The (2005)",50 First Dates (2004),...,"Wolf of Wall Street, The (2013)","X-Files: Fight the Future, The (1998)",X-Men (2000),X-Men: First Class (2011),X-Men: The Last Stand (2006),X2: X-Men United (2003),You've Got Mail (1998),Young Frankenstein (1974),Zombieland (2009),Zoolander (2001)
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,,,,,,,,,,,...,,,5.0,,,,,5.0,,
2,,,,,,,,,,,...,5.0,,,,,,,,3.0,
3,,,,,,,,,,,...,,,,,,,0.5,,,
4,,,,,5.0,,,,,,...,,,,,,,,,,
5,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
606,,,,,,5.0,,,,,...,,,,,,,3.5,3.5,,
607,,,,,,,,,,,...,,,3.0,,,,,,,
608,,,,,,3.0,3.5,5.0,,,...,,3.5,4.0,,4.0,4.0,,,,3.0
609,,,,,,,,,,,...,,,,,,,,,,


In [13]:
matrix_norm = matrix.subtract(matrix.mean(axis=1), axis='rows')
matrix_norm.to_csv('pivot_table_normalized.csv')
matrix_norm

title,(500) Days of Summer (2009),10 Things I Hate About You (1999),101 Dalmatians (1996),101 Dalmatians (One Hundred and One Dalmatians) (1961),12 Angry Men (1957),2001: A Space Odyssey (1968),28 Days Later (2002),300 (2007),"40-Year-Old Virgin, The (2005)",50 First Dates (2004),...,"Wolf of Wall Street, The (2013)","X-Files: Fight the Future, The (1998)",X-Men (2000),X-Men: First Class (2011),X-Men: The Last Stand (2006),X2: X-Men United (2003),You've Got Mail (1998),Young Frankenstein (1974),Zombieland (2009),Zoolander (2001)
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,,,,,,,,,,,...,,,0.626866,,,,,0.626866,,
2,,,,,,,,,,,...,1.138889,,,,,,,,-0.861111,
3,,,,,,,,,,,...,,,,,,,-1.250000,,,
4,,,,,1.6,,,,,,...,,,,,,,,,,
5,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
606,,,,,,1.329392,,,,,...,,,,,,,-0.170608,-0.170608,,
607,,,,,,,,,,,...,,,-0.761062,,,,,,,
608,,,,,,-0.346966,0.153034,1.653034,,,...,,0.153034,0.653034,,0.653034,0.653034,,,,-0.346966
609,,,,,,,,,,,...,,,,,,,,,,


In [14]:
import pandas as pd
import numpy as np

In [15]:
matrix_norm = pd.read_csv("pivot_table_normalized.csv", index_col=[0])
matrix_norm

Unnamed: 0_level_0,(500) Days of Summer (2009),10 Things I Hate About You (1999),101 Dalmatians (1996),101 Dalmatians (One Hundred and One Dalmatians) (1961),12 Angry Men (1957),2001: A Space Odyssey (1968),28 Days Later (2002),300 (2007),"40-Year-Old Virgin, The (2005)",50 First Dates (2004),...,"Wolf of Wall Street, The (2013)","X-Files: Fight the Future, The (1998)",X-Men (2000),X-Men: First Class (2011),X-Men: The Last Stand (2006),X2: X-Men United (2003),You've Got Mail (1998),Young Frankenstein (1974),Zombieland (2009),Zoolander (2001)
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,,,,,,,,,,,...,,,0.626866,,,,,0.626866,,
2,,,,,,,,,,,...,1.138889,,,,,,,,-0.861111,
3,,,,,,,,,,,...,,,,,,,-1.250000,,,
4,,,,,1.6,,,,,,...,,,,,,,,,,
5,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
606,,,,,,1.329392,,,,,...,,,,,,,-0.170608,-0.170608,,
607,,,,,,,,,,,...,,,-0.761062,,,,,,,
608,,,,,,-0.346966,0.153034,1.653034,,,...,,0.153034,0.653034,,0.653034,0.653034,,,,-0.346966
609,,,,,,,,,,,...,,,,,,,,,,


In [16]:
films_names = list(matrix_norm.columns)
films_names

['(500) Days of Summer (2009)',
 '10 Things I Hate About You (1999)',
 '101 Dalmatians (1996)',
 '101 Dalmatians (One Hundred and One Dalmatians) (1961)',
 '12 Angry Men (1957)',
 '2001: A Space Odyssey (1968)',
 '28 Days Later (2002)',
 '300 (2007)',
 '40-Year-Old Virgin, The (2005)',
 '50 First Dates (2004)',
 '8 Mile (2002)',
 'A.I. Artificial Intelligence (2001)',
 'About a Boy (2002)',
 'Abyss, The (1989)',
 'Ace Ventura: Pet Detective (1994)',
 'Ace Ventura: When Nature Calls (1995)',
 'Adaptation (2002)',
 'Addams Family Values (1993)',
 'Air Force One (1997)',
 'Airplane! (1980)',
 'Aladdin (1992)',
 'Alien (1979)',
 'Alien: Resurrection (1997)',
 'Aliens (1986)',
 'Alien³ (a.k.a. Alien 3) (1992)',
 'Almost Famous (2000)',
 'Amadeus (1984)',
 "Amelie (Fabuleux destin d'Amélie Poulain, Le) (2001)",
 'American Beauty (1999)',
 'American Graffiti (1973)',
 'American History X (1998)',
 'American Pie (1999)',
 'American Pie 2 (2001)',
 'American President, The (1995)',
 'American P

In [17]:
user_vector_dictionary = {'(500) Days of Summer (2009)': 5.0, '101 Dalmatians (1996)': 3.5}
user_id = matrix_norm.index.max() + 1

user_vector = pd.DataFrame(np.nan, index=[0], columns=matrix_norm.columns)
user_vector = user_vector.transpose().index.map(user_vector_dictionary)
user_vector = pd.DataFrame(user_vector).transpose()
user_vector.index = [user_id]
user_vector.columns = matrix_norm.columns
user_vector

Unnamed: 0,(500) Days of Summer (2009),10 Things I Hate About You (1999),101 Dalmatians (1996),101 Dalmatians (One Hundred and One Dalmatians) (1961),12 Angry Men (1957),2001: A Space Odyssey (1968),28 Days Later (2002),300 (2007),"40-Year-Old Virgin, The (2005)",50 First Dates (2004),...,"Wolf of Wall Street, The (2013)","X-Files: Fight the Future, The (1998)",X-Men (2000),X-Men: First Class (2011),X-Men: The Last Stand (2006),X2: X-Men United (2003),You've Got Mail (1998),Young Frankenstein (1974),Zombieland (2009),Zoolander (2001)
611,5.0,,3.5,,,,,,,,...,,,,,,,,,,


In [18]:
user_vector_norm = user_vector.subtract(user_vector.mean(axis=1), axis='rows')
user_vector_norm

Unnamed: 0,(500) Days of Summer (2009),10 Things I Hate About You (1999),101 Dalmatians (1996),101 Dalmatians (One Hundred and One Dalmatians) (1961),12 Angry Men (1957),2001: A Space Odyssey (1968),28 Days Later (2002),300 (2007),"40-Year-Old Virgin, The (2005)",50 First Dates (2004),...,"Wolf of Wall Street, The (2013)","X-Files: Fight the Future, The (1998)",X-Men (2000),X-Men: First Class (2011),X-Men: The Last Stand (2006),X2: X-Men United (2003),You've Got Mail (1998),Young Frankenstein (1974),Zombieland (2009),Zoolander (2001)
611,0.75,,-0.75,,,,,,,,...,,,,,,,,,,


In [19]:
matrix_norm = pd.concat([matrix_norm, user_vector_norm])
matrix_norm

Unnamed: 0,(500) Days of Summer (2009),10 Things I Hate About You (1999),101 Dalmatians (1996),101 Dalmatians (One Hundred and One Dalmatians) (1961),12 Angry Men (1957),2001: A Space Odyssey (1968),28 Days Later (2002),300 (2007),"40-Year-Old Virgin, The (2005)",50 First Dates (2004),...,"Wolf of Wall Street, The (2013)","X-Files: Fight the Future, The (1998)",X-Men (2000),X-Men: First Class (2011),X-Men: The Last Stand (2006),X2: X-Men United (2003),You've Got Mail (1998),Young Frankenstein (1974),Zombieland (2009),Zoolander (2001)
1,,,,,,,,,,,...,,,0.626866,,,,,0.626866,,
2,,,,,,,,,,,...,1.138889,,,,,,,,-0.861111,
3,,,,,,,,,,,...,,,,,,,-1.25,,,
4,,,,,1.6,,,,,,...,,,,,,,,,,
5,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
607,,,,,,,,,,,...,,,-0.761062,,,,,,,
608,,,,,,-0.346966,0.153034,1.653034,,,...,,0.153034,0.653034,,0.653034,0.653034,,,,-0.346966
609,,,,,,,,,,,...,,,,,,,,,,
610,-0.633779,,,,,0.366221,0.866221,-0.133779,0.366221,-0.633779,...,0.366221,,-0.633779,-0.133779,-1.133779,-0.133779,,,-0.633779,-0.133779


In [20]:
matrix_norm.to_csv('pivot_table_normalized.csv')

In [21]:
user_similarity = matrix_norm.T.corr()
user_similarity.head()

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,602,603,604,605,606,607,608,609,610,611
1,1.0,,0.0,0.189059,0.268749,-0.31302,-0.118773,0.469668,0.918559,-0.037987,...,0.044987,-0.034864,-0.407556,-0.142637,0.082326,0.106843,0.213186,-0.175412,-0.02452,
2,,1.0,,,,,-0.991241,,,0.037796,...,,-1.0,,,0.583333,,-0.125,,0.694735,
3,0.0,,1.0,,,,,,,,...,,0.294174,,,-0.831704,-0.333333,-0.441248,,0.164122,
4,0.189059,,,1.0,-0.398485,0.359738,0.540406,0.117851,,0.485794,...,0.486818,0.08999,-0.148544,0.460566,0.09216,0.101666,-0.203607,,-0.055617,
5,0.268749,,,-0.398485,1.0,0.036846,0.328889,0.028347,,-0.777714,...,0.174285,0.215664,0.087482,-0.364156,0.295459,0.23108,0.005242,0.384111,0.040582,


In [22]:
user_similarity.drop(index=user_id, inplace=True)
user_similarity

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,602,603,604,605,606,607,608,609,610,611
1,1.000000,,0.000000,0.189059,0.268749,-0.313020,-0.118773,0.469668,0.918559,-0.037987,...,0.044987,-0.034864,-0.407556,-0.142637,0.082326,0.106843,0.213186,-0.175412,-0.024520,
2,,1.000000,,,,,-0.991241,,,0.037796,...,,-1.000000,,,0.583333,,-0.125000,,0.694735,
3,0.000000,,1.000000,,,,,,,,...,,0.294174,,,-0.831704,-0.333333,-0.441248,,0.164122,
4,0.189059,,,1.000000,-0.398485,0.359738,0.540406,0.117851,,0.485794,...,0.486818,0.089990,-0.148544,0.460566,0.092160,0.101666,-0.203607,,-0.055617,
5,0.268749,,,-0.398485,1.000000,0.036846,0.328889,0.028347,,-0.777714,...,0.174285,0.215664,0.087482,-0.364156,0.295459,0.231080,0.005242,0.384111,0.040582,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
606,0.082326,0.583333,-0.831704,0.092160,0.295459,-0.250393,0.146169,0.277241,0.266534,-0.371705,...,0.256465,0.312162,0.739957,0.135500,1.000000,0.020921,0.237897,0.654289,0.352711,
607,0.106843,,-0.333333,0.101666,0.231080,0.428048,0.402792,0.251280,,-0.241121,...,0.233237,0.187283,-0.021810,-0.267339,0.020921,1.000000,0.140465,0.190117,0.037454,
608,0.213186,-0.125000,-0.441248,-0.203607,0.005242,-0.048990,-0.010131,0.434423,-0.033928,-0.560574,...,0.357980,0.111669,0.099892,-0.030778,0.237897,0.140465,1.000000,0.523692,0.116122,
609,-0.175412,,,,0.384111,0.193649,0.420288,0.141860,,,...,0.188512,0.287348,0.656488,-0.550000,0.654289,0.190117,0.523692,1.000000,-0.521773,


In [23]:
n = 10
user_similarity_threshold = 0.3
similar_users = user_similarity[user_similarity[user_id] > user_similarity_threshold][user_id].sort_values(ascending=False)[:n]
similar_users

68     1.0
92     1.0
177    1.0
213    1.0
318    1.0
483    1.0
249    1.0
477    1.0
Name: 611, dtype: float64

In [24]:
user_id_watched = matrix_norm[matrix_norm.index == user_id].dropna(axis=1, how='all')
user_id_watched

Unnamed: 0,(500) Days of Summer (2009),101 Dalmatians (1996)
611,0.75,-0.75


In [25]:
similar_user_movies = matrix_norm[matrix_norm.index.isin(similar_users.index)].dropna(axis=1, how='all')
similar_user_movies

Unnamed: 0,(500) Days of Summer (2009),10 Things I Hate About You (1999),101 Dalmatians (1996),101 Dalmatians (One Hundred and One Dalmatians) (1961),12 Angry Men (1957),2001: A Space Odyssey (1968),28 Days Later (2002),300 (2007),"40-Year-Old Virgin, The (2005)",50 First Dates (2004),...,"Wolf of Wall Street, The (2013)","X-Files: Fight the Future, The (1998)",X-Men (2000),X-Men: First Class (2011),X-Men: The Last Stand (2006),X2: X-Men United (2003),You've Got Mail (1998),Young Frankenstein (1974),Zombieland (2009),Zoolander (2001)
68,0.667647,1.167647,-0.832353,-0.332353,,-1.832353,1.167647,0.667647,1.167647,-0.832353,...,-0.332353,,1.167647,1.167647,-1.332353,1.167647,0.167647,0.667647,1.667647,0.667647
92,-0.142857,0.857143,-1.142857,-0.642857,,,,0.357143,0.357143,,...,,,,,,,,,,
177,-0.052083,0.947917,-1.052083,1.447917,0.947917,-2.052083,,-1.552083,-2.052083,0.447917,...,,,0.947917,0.447917,0.447917,-0.052083,-0.552083,,1.447917,
213,0.434211,,-0.565789,,-0.065789,,,-0.565789,,,...,,,,,,,,,,
249,0.028443,-0.471557,-0.471557,,1.028443,0.528443,0.028443,1.028443,0.028443,,...,1.028443,,0.028443,0.528443,-0.471557,-0.471557,-0.971557,,0.528443,0.028443
318,0.333333,,-0.666667,,0.333333,-0.166667,-0.166667,-0.166667,-0.166667,,...,,,,-0.666667,-0.166667,,,,0.333333,
477,1.16436,,-0.83564,0.16436,0.16436,0.66436,0.16436,0.66436,0.16436,,...,,0.66436,0.16436,,-0.33564,0.16436,0.16436,0.16436,0.66436,
483,0.876694,-1.123306,0.376694,,1.376694,,,0.376694,-0.623306,0.376694,...,,,-0.623306,,,,-0.123306,0.376694,-0.123306,1.376694


In [26]:
similar_user_movies.drop(user_id_watched.columns, axis=1, inplace=True, errors='ignore')
similar_user_movies

Unnamed: 0,10 Things I Hate About You (1999),101 Dalmatians (One Hundred and One Dalmatians) (1961),12 Angry Men (1957),2001: A Space Odyssey (1968),28 Days Later (2002),300 (2007),"40-Year-Old Virgin, The (2005)",50 First Dates (2004),8 Mile (2002),A.I. Artificial Intelligence (2001),...,"Wolf of Wall Street, The (2013)","X-Files: Fight the Future, The (1998)",X-Men (2000),X-Men: First Class (2011),X-Men: The Last Stand (2006),X2: X-Men United (2003),You've Got Mail (1998),Young Frankenstein (1974),Zombieland (2009),Zoolander (2001)
68,1.167647,-0.332353,,-1.832353,1.167647,0.667647,1.167647,-0.832353,0.667647,,...,-0.332353,,1.167647,1.167647,-1.332353,1.167647,0.167647,0.667647,1.667647,0.667647
92,0.857143,-0.642857,,,,0.357143,0.357143,,,,...,,,,,,,,,,
177,0.947917,1.447917,0.947917,-2.052083,,-1.552083,-2.052083,0.447917,,,...,,,0.947917,0.447917,0.447917,-0.052083,-0.552083,,1.447917,
213,,,-0.065789,,,-0.565789,,,,,...,,,,,,,,,,
249,-0.471557,,1.028443,0.528443,0.028443,1.028443,0.028443,,0.528443,-0.471557,...,1.028443,,0.028443,0.528443,-0.471557,-0.471557,-0.971557,,0.528443,0.028443
318,,,0.333333,-0.166667,-0.166667,-0.166667,-0.166667,,,,...,,,,-0.666667,-0.166667,,,,0.333333,
477,,0.16436,0.16436,0.66436,0.16436,0.66436,0.16436,,,-0.33564,...,,0.66436,0.16436,,-0.33564,0.16436,0.16436,0.16436,0.66436,
483,-1.123306,,1.376694,,,0.376694,-0.623306,0.376694,-2.623306,0.376694,...,,,-0.623306,,,,-0.123306,0.376694,-0.123306,1.376694


In [27]:
item_score = {}

for i in similar_user_movies.columns:
    movie_ratings = similar_user_movies[i]
    total = 0
    count = 0
    for u in similar_users.index:
        if pd.isna(movie_ratings[u]) == False:
            score = similar_users[u] * movie_ratings[u]
            total += score
            count += 1
    item_score[i] = total / count

item_score = pd.DataFrame(item_score.items(), columns=['movie', 'movie_score'])
ranked_item_score = item_score.sort_values(by='movie_score', ascending=False)[:20]
ranked_item_score

Unnamed: 0,movie,movie_score
408,"Princess Bride, The (1987)",1.497419
233,"Great Escape, The (1963)",1.376694
370,Natural Born Killers (1994),1.333333
278,Interstellar (2014),1.246157
157,Desperado (1995),1.167647
330,M*A*S*H (a.k.a. MASH) (1970),1.16436
559,Wallace & Gromit: A Close Shave (1995),1.16436
442,Scarface (1983),1.098045
271,Inception (2010),1.086399
239,Guardians of the Galaxy (2014),1.048002


In [28]:
final = list(ranked_item_score['movie'])
final

['Princess Bride, The (1987)',
 'Great Escape, The (1963)',
 'Natural Born Killers (1994)',
 'Interstellar (2014)',
 'Desperado (1995)',
 'M*A*S*H (a.k.a. MASH) (1970)',
 'Wallace & Gromit: A Close Shave (1995)',
 'Scarface (1983)',
 'Inception (2010)',
 'Guardians of the Galaxy (2014)',
 'Twelve Monkeys (a.k.a. 12 Monkeys) (1995)',
 'Psycho (1960)',
 'Forrest Gump (1994)',
 "Wayne's World (1992)",
 'WALL·E (2008)',
 'Million Dollar Baby (2004)',
 "Singin' in the Rain (1952)",
 'Good Will Hunting (1997)',
 'Unforgiven (1992)',
 'Harry Potter and the Deathly Hallows: Part 1 (2010)']