## Import packages

In [1]:
import pandas as pd
from CFModel import CFModel

Using Theano backend.
Using gpu device 0: GRID K520 (CNMeM is disabled, cuDNN 5006)


## Define constants


In [2]:
RATINGS_CSV_FILE = 'ml1m_ratings.csv'
USERS_CSV_FILE = 'ml1m_users.csv'
MOVIES_CSV_FILE = 'ml1m_movies.csv'
MODEL_WEIGHTS_FILE = 'ml1m_weights.h5'
K_FACTORS = 120
TEST_USER = 3000

## Load MovieLens 1M data

In [3]:
ratings = pd.read_csv(RATINGS_CSV_FILE, sep='\t', encoding='latin-1', usecols=['userid', 'movieid', 'rating'])
max_userid = ratings['userid'].drop_duplicates().max()
max_movieid = ratings['movieid'].drop_duplicates().max()
print len(ratings), 'ratings loaded.'

1000209 ratings loaded.


In [4]:
users = pd.read_csv(USERS_CSV_FILE, sep='\t', encoding='latin-1', usecols=['userid', 'gender', 'zipcode', 'age_desc', 'occ_desc'])
print len(users), 'descriptions of', max_userid, 'users loaded.'

6040 descriptions of 6040 users loaded.


In [5]:
movies = pd.read_csv(MOVIES_CSV_FILE, sep='\t', encoding='latin-1', usecols=['movieid', 'title', 'genre'])
print len(movies), 'descriptions of', max_movieid, 'movies loaded.'

3883 descriptions of 3952 movies loaded.


## Make recommendations for a given user

In [6]:
trained_model = CFModel(max_userid, max_movieid, K_FACTORS)

In [7]:
trained_model.load_weights(MODEL_WEIGHTS_FILE)

In [8]:
users[users['userid'] == TEST_USER]

Unnamed: 0,userid,gender,zipcode,age_desc,occ_desc
2999,3000,M,55408,25-34,college/grad student


In [9]:
def predict_rating(userid, movieid):
    return trained_model.rate(userid - 1, movieid - 1)

In [10]:
user_ratings = ratings[ratings['userid'] == TEST_USER][['userid', 'movieid', 'rating']]
user_ratings['prediction'] = user_ratings.apply(lambda x: predict_rating(TEST_USER, x['movieid']), axis=1)
user_ratings.sort_values(by='rating', 
                         ascending=False).merge(movies, 
                                                on='movieid', 
                                                how='inner', 
                                                suffixes=['_u', '_m']).head(10)

Unnamed: 0,userid,movieid,rating,prediction,title,genre
0,3000,590,5,3.153018,Dances with Wolves (1990),Adventure|Drama|Western
1,3000,3552,5,3.684154,Caddyshack (1980),Comedy
2,3000,2858,5,5.018598,American Beauty (1999),Comedy|Drama
3,3000,3358,5,3.419102,Defending Your Life (1991),Comedy|Romance
4,3000,2968,5,3.388493,Time Bandits (1981),Adventure|Fantasy|Sci-Fi
5,3000,1307,5,3.841095,When Harry Met Sally... (1989),Comedy|Romance
6,3000,144,5,3.05876,"Brothers McMullen, The (1995)",Comedy
7,3000,1193,5,4.014853,One Flew Over the Cuckoo's Nest (1975),Drama
8,3000,1265,5,3.823973,Groundhog Day (1993),Comedy|Romance
9,3000,733,5,3.066499,"Rock, The (1996)",Action|Adventure|Thriller


In [11]:
recommendations = ratings[ratings['movieid'].isin(user_ratings['movieid']) == False][['movieid']].drop_duplicates()
recommendations['prediction'] = recommendations.apply(lambda x: predict_rating(TEST_USER, x['movieid']), axis=1)
recommendations.sort_values(by='prediction',
                          ascending=False).merge(movies,
                                                 on='movieid',
                                                 how='inner',
                                                 suffixes=['_u', '_m']).head(10)

Unnamed: 0,movieid,prediction,title,genre
0,2959,4.624823,Fight Club (1999),Drama
1,296,4.522243,Pulp Fiction (1994),Crime|Drama
2,3160,4.459617,Magnolia (1999),Drama
3,1136,4.3717,Monty Python and the Holy Grail (1974),Comedy
4,50,4.365819,"Usual Suspects, The (1995)",Crime|Thriller
5,2395,4.29095,Rushmore (1998),Comedy
6,2762,4.255275,"Sixth Sense, The (1999)",Thriller
7,745,4.232957,"Close Shave, A (1995)",Animation|Comedy|Thriller
8,593,4.197603,"Silence of the Lambs, The (1991)",Drama|Thriller
9,608,4.188972,Fargo (1996),Crime|Drama|Thriller
