In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (12,9)

import pandas as pd
import numpy as np
from scipy.sparse.linalg import svds

In [2]:
users = pd.read_csv('data/users.dat', 
                    sep='::',
                    names=['UserID', 'gender', 'age', 'occupation', 'zip code'],
                    index_col='UserID',
                    engine='python')

movies = pd.read_csv('data/movies.dat', 
                     sep='::',
                     names=['MovieID', 'title', 'genre'],
                     index_col='MovieID',
                     engine='python')

ratings = pd.read_csv('data/ratings.dat', 
                      sep='::',
                      names=['UserID', 'MovieID', 'Rating', 'Timestamp'],
                      engine='python')

In [3]:
df = ratings.pivot(index='UserID',
                   columns='MovieID',
                   values='Rating').fillna(0)

In [4]:
user_means = df.mean(axis=1)
meaned_df = df.sub(user_means, axis=0)

In [5]:
U, S, Vt = svds(meaned_df)

In [6]:
US = np.dot(U, np.diag(S))
predictions = np.dot(US, Vt) + np.array(user_means).reshape(-1, 1)

predictions = pd.DataFrame(predictions, columns=df.columns)

In [7]:
def recommend_movies(UserID, n=10):
    '''
    returns a sorted list of recommended movies for a user
    
    Parameters
    ----------
    UserID : int (required)
        the user for whom we want to create a set of recommendations
    n : int (optional)
        the number of recommendations to return
        default = 10

    Returns
    -------
    recommendations : pd.DataFrame
        a sorted list of recommendations for the user
    '''
    user_predictions = (movies
                        .reset_index()
                        .join(predictions.loc[UserID],
                              how='left',
                              on='MovieID')
                        .sort_values(by=UserID, 
                                     ascending=False)
                        .set_index('MovieID'))

    already_rated = ratings[ratings['UserID'] == UserID]['MovieID'].values
    recommendations = user_predictions[~user_predictions.index.isin(already_rated)]
    return recommendations[:n]
    

In [16]:
recommend_movies(3)

Unnamed: 0_level_0,title,genre,year,3
MovieID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
589,Terminator 2: Judgment Day (1991),Action|Sci-Fi|Thriller,1991,1.105739
2571,"Matrix, The (1999)",Action|Sci-Fi|Thriller,1999,1.10428
1214,Alien (1979),Action|Horror|Sci-Fi|Thriller,1979,1.055385
1240,"Terminator, The (1984)",Action|Sci-Fi|Thriller,1984,1.030181
1200,Aliens (1986),Action|Sci-Fi|Thriller|War,1986,0.942758
2028,Saving Private Ryan (1998),Action|Drama|War,1998,0.928184
858,"Godfather, The (1972)",Action|Crime|Drama,1972,0.875477
110,Braveheart (1995),Action|Drama|War,1995,0.851724
2916,Total Recall (1990),Action|Adventure|Sci-Fi|Thriller,1990,0.850696
541,Blade Runner (1982),Film-Noir|Sci-Fi,1982,0.846561


In [12]:
recommend_movies(5, n=15)

Unnamed: 0_level_0,title,genre,year,5
MovieID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2396,Shakespeare in Love (1998),Comedy|Romance,1998,1.644283
1,Toy Story (1995),Animation|Children's|Comedy,1995,1.343243
588,Aladdin (1992),Animation|Children's|Comedy|Musical,1992,1.234262
595,Beauty and the Beast (1991),Animation|Children's|Musical,1991,1.232816
539,Sleepless in Seattle (1993),Comedy|Romance,1993,1.191353
1028,Mary Poppins (1964),Children's|Comedy|Musical,1964,1.185496
3114,Toy Story 2 (1999),Animation|Children's|Comedy,1999,1.1475
2081,"Little Mermaid, The (1989)",Animation|Children's|Comedy|Musical|Romance,1989,1.141329
1265,Groundhog Day (1993),Comedy|Romance,1993,1.107401
364,"Lion King, The (1994)",Animation|Children's|Musical,1994,1.083212
