In [1]:
import pandas as pd
import numpy as np

movies = pd.read_csv('datasets/ml-latest-small/movies.csv', index_col=0)
ratings_df = pd.read_csv('datasets/ml-latest-small/ratings.csv')
ratings_df.columns = ['user_id', 'movie_id', 'rating', 'timestamp']

user_ratings = ratings_df.pivot(index='movie_id', columns='user_id',
                                values='rating')

In [2]:
ui_matrix = np.copy(user_ratings.values)
popularity = np.isfinite(ui_matrix).astype(int).sum(axis=1)
means = np.nanmean(ui_matrix, axis=1)
ui_matrix = ui_matrix - means.reshape(-1, 1)
ui_matrix = np.nan_to_num(ui_matrix)

In [3]:
from scipy.sparse.linalg import svds

u, s, vt = svds(ui_matrix, k=500)

In [4]:
user = user_ratings.sample(axis=1)
user_id = user.columns[0]
user = user.assign(title=movies.title[user_ratings.index])
user.columns = ['rating', 'title']
user.sort_values(by='rating', ascending=False).head(20)

Unnamed: 0_level_0,rating,title
movie_id,Unnamed: 1_level_1,Unnamed: 2_level_1
1,5.0,Toy Story (1995)
592,5.0,Batman (1989)
2858,5.0,American Beauty (1999)
2797,5.0,Big (1988)
3114,5.0,Toy Story 2 (1999)
3176,5.0,"Talented Mr. Ripley, The (1999)"
1639,5.0,Chasing Amy (1997)
3481,5.0,High Fidelity (2000)
3578,5.0,Gladiator (2000)
1356,5.0,Star Trek: First Contact (1996)


In [5]:
theta_df = pd.DataFrame(vt.T, index=user_ratings.columns)

user_theta = theta_df.loc[user_id]

pred = (user_theta.values @ (s * u).T) + means

In [6]:
user['predicted'] = pred
user['relevance'] = user['predicted'] * popularity
user[user.rating.isnull()].sort_values(by='relevance', ascending=False).head(20)

Unnamed: 0_level_0,rating,title,predicted,relevance
movie_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
296,,Pulp Fiction (1994),4.248962,1376.663713
356,,Forrest Gump (1994),4.032926,1375.227797
527,,Schindler's List (1993),4.304056,1050.189678
1196,,Star Wars: Episode V - The Empire Strikes Back...,4.237824,991.650929
608,,Fargo (1996),4.257189,953.610249
589,,Terminator 2: Judgment Day (1991),4.005997,949.421358
1270,,Back to the Future (1985),4.014198,907.208763
110,,Braveheart (1995),3.937018,897.640189
858,,"Godfather, The (1972)",4.465236,893.047173
1210,,Star Wars: Episode VI - Return of the Jedi (1983),4.057057,880.381442
