## Import packages

In [1]:
import pandas as pd
from CFModel import CFModel

Using Theano backend.


## Define constants


In [2]:
RATINGS_CSV_FILE = 'ml1m_ratings.csv'
USERS_CSV_FILE = 'ml1m_users.csv'
MOVIES_CSV_FILE = 'ml1m_movies.csv'
MODEL_WEIGHTS_FILE = 'ml1m_weights.h5'
K_FACTORS = 20
TEST_USER = 3000

## Load MovieLens 1M data

In [3]:
ratings = pd.read_csv(RATINGS_CSV_FILE, sep='\t', encoding='latin-1', usecols=['userid', 'movieid', 'rating'])
max_userid = ratings['userid'].drop_duplicates().max()
max_movieid = ratings['movieid'].drop_duplicates().max()
print len(ratings), 'ratings loaded.'

1000209 ratings loaded.


In [4]:
users = pd.read_csv(USERS_CSV_FILE, sep='\t', encoding='latin-1', usecols=['userid', 'gender', 'zipcode', 'age_desc', 'occ_desc'])
print len(users), 'descriptions of', max_userid, 'users loaded.'

6040 descriptions of 6040 users loaded.


In [5]:
movies = pd.read_csv(MOVIES_CSV_FILE, sep='\t', encoding='latin-1', usecols=['movieid', 'title', 'genre'])
print len(movies), 'descriptions of', max_movieid, 'movies loaded.'

3883 descriptions of 3952 movies loaded.


## Make recommendations for a given user

In [6]:
trained_model = CFModel(max_userid, max_movieid, K_FACTORS)

In [7]:
trained_model.load_weights(MODEL_WEIGHTS_FILE)

In [8]:
users[users['userid'] == TEST_USER]

Unnamed: 0,userid,gender,zipcode,age_desc,occ_desc
2999,3000,M,55408,25-34,college/grad student


In [9]:
def predict_rating(userid, movieid):
    return trained_model.rate(userid - 1, movieid - 1)

In [10]:
user_ratings = ratings[ratings['userid'] == TEST_USER][['userid', 'movieid', 'rating']]
user_ratings['prediction'] = user_ratings.apply(lambda x: predict_rating(TEST_USER, x['movieid']), axis=1)
user_ratings.sort_values(by='rating', 
                         ascending=False).merge(movies, 
                                                on='movieid', 
                                                how='inner', 
                                                suffixes=['_u', '_m']).head(10)

Unnamed: 0,userid,movieid,rating,prediction,title,genre
0,3000,590,5,3.506141,Dances with Wolves (1990),Adventure|Drama|Western
1,3000,3552,5,3.434246,Caddyshack (1980),Comedy
2,3000,2858,5,4.038718,American Beauty (1999),Comedy|Drama
3,3000,3358,5,3.360126,Defending Your Life (1991),Comedy|Romance
4,3000,2968,5,3.327013,Time Bandits (1981),Adventure|Fantasy|Sci-Fi
5,3000,1307,5,3.632926,When Harry Met Sally... (1989),Comedy|Romance
6,3000,144,5,3.016357,"Brothers McMullen, The (1995)",Comedy
7,3000,1193,5,4.001684,One Flew Over the Cuckoo's Nest (1975),Drama
8,3000,1265,5,3.614451,Groundhog Day (1993),Comedy|Romance
9,3000,733,5,3.301279,"Rock, The (1996)",Action|Adventure|Thriller


In [11]:
recommendations = ratings[ratings['movieid'].isin(user_ratings['movieid']) == False][['movieid']].drop_duplicates()
recommendations['prediction'] = recommendations.apply(lambda x: predict_rating(TEST_USER, x['movieid']), axis=1)
recommendations.sort_values(by='prediction',
                          ascending=False).merge(movies,
                                                 on='movieid',
                                                 how='inner',
                                                 suffixes=['_u', '_m']).head(10)

Unnamed: 0,movieid,prediction,title,genre
0,2905,4.221931,Sanjuro (1962),Action|Adventure
1,745,4.123325,"Close Shave, A (1995)",Animation|Comedy|Thriller
2,2019,4.123102,Seven Samurai (The Magnificent Seven) (Shichin...,Action|Drama
3,668,4.11767,Pather Panchali (1955),Drama
4,2351,4.108316,Nights of Cabiria (Le Notti di Cabiria) (1957),Drama
5,3022,4.104285,"General, The (1927)",Comedy
6,1212,4.097769,"Third Man, The (1949)",Mystery|Thriller
7,1178,4.097549,Paths of Glory (1957),Drama|War
8,670,4.082615,"World of Apu, The (Apur Sansar) (1959)",Drama
9,922,4.073469,Sunset Blvd. (a.k.a. Sunset Boulevard) (1950),Film-Noir
