In [4]:
import numpy as np
import pandas as pd
import random
from surprise import Dataset, Reader

In [5]:
seed_constant = 27
np.random.seed(seed_constant)
random.seed(seed_constant)

## Load data and model

In [6]:
DATAPATH = "../data/interim/"

In [7]:
test = pd.read_csv(DATAPATH + 'test.csv')
test.head()

Unnamed: 0,user_id,movie_title,rating
0,880,Leaving Las Vegas (1995),4.0
1,543,Courage Under Fire (1996),3.0
2,393,"Thin Blue Line, The (1988)",3.0
3,267,Supercop (1992),5.0
4,297,My Fellow Americans (1996),3.0


In [8]:
reader = Reader(rating_scale=(1, 5))
testset = Dataset.load_from_df(test, reader)
testset_full = testset.construct_testset(testset.raw_ratings)

In [15]:
import pickle

with open('../models/final_model.pkl', 'rb') as f:
    classifier = pickle.load(f)

## Load additional description data

In [None]:
DATAPATH = "../data/raw/ml-100k/"


In [69]:
#Load the Ratings data
data = pd.read_csv(DATAPATH + 'u.data', sep="\t", header=None)
data.columns = ['user_id', 'movie_id', 'rating', 'timestamp']
data.head()

Unnamed: 0,user_id,movie_id,rating,timestamp
0,196,242,3,881250949
1,186,302,3,891717742
2,22,377,1,878887116
3,244,51,2,880606923
4,166,346,1,886397596


In [67]:
genre = pd.read_csv(DATAPATH + 'u.genre', sep="|", encoding='latin-1', header=None)
genre.drop(genre.columns[1], axis=1, inplace=True)
genre.columns = ['Genres']
genre_list = list(genre['Genres'])

columns = ['movie_id', 'movie_title' ,'release_date','video_release_date', 'IMDb_URL']
columns.extend(genre_list)

item = pd.read_csv(DATAPATH + 'u.item', sep="|", encoding='latin-1', header=None)
item.columns = columns

In [68]:
def get_genre(row):
    s = ''
    for c in item.columns:
        if row[c]==1 and c!='movie_id':
            s += c + '|'
    return s

item['genres'] = item.apply(get_genre, axis=1)
item = item.drop(['video_release_date'], axis=1)
item = item.drop(genre_list, axis=1)
item.head()

Unnamed: 0,movie_id,movie_title,release_date,IMDb_URL,genres
0,1,Toy Story (1995),01-Jan-1995,http://us.imdb.com/M/title-exact?Toy%20Story%2...,Animation|Children's|Comedy|
1,2,GoldenEye (1995),01-Jan-1995,http://us.imdb.com/M/title-exact?GoldenEye%20(...,Action|Adventure|Thriller|
2,3,Four Rooms (1995),01-Jan-1995,http://us.imdb.com/M/title-exact?Four%20Rooms%...,Thriller|
3,4,Get Shorty (1995),01-Jan-1995,http://us.imdb.com/M/title-exact?Get%20Shorty%...,Action|Comedy|Drama|
4,5,Copycat (1995),01-Jan-1995,http://us.imdb.com/M/title-exact?Copycat%20(1995),Crime|Drama|Thriller|


## Get top predictions

In [71]:
from collections import defaultdict


def get_top_n(predictions, n=10):
    # First map the predictions to each user.
    top_n = defaultdict(list)
    for uid, iid, true_r, est, _ in predictions:
        top_n[uid].append((iid, est))

    # Then sort the predictions for each user and retrieve the k highest ones.
    for uid, user_ratings in top_n.items():
        user_ratings.sort(key=lambda x: x[1], reverse=True)
        top_n[uid] = user_ratings[:n]

    return top_n

In [116]:
def print_movies(movies):
    for _, row  in movies.iterrows():
        # movie_id, movie_title, release_date, IMDb_URL, genres
        print('Title: {}, genres: {}'.format(row['movie_title'], row['genres']))

In [117]:
def print_prediction(uid, user_ratings):
    print('User id: {}'.format(uid))

    temp = data[data['user_id'] == uid].sort_values("rating", ascending = False)
    best_movies = temp['movie_id'][:5]
    user_history = item[item['movie_id'].isin(best_movies)]
    print('Best user ratings: ')
    print_movies(user_history)

    print('Best recomendations: ')
    predicted_movies = [title for (title, _) in user_ratings]
    recommended_movies = item[item['movie_title'].isin(predicted_movies)]
    print_movies(recommended_movies)

In [119]:
predictions = classifier.test(testset_full)
top_n = get_top_n(predictions, n=5)

# Print the recommended items for each user
iterator = iter(top_n.items())
for i in range(3):
    uid, user_ratings = next(iterator)
    print()
    print_prediction(uid, user_ratings)


User id: 880
Best user ratings: 
Title: Taxi Driver (1976), genres: Drama|Thriller|
Title: Shawshank Redemption, The (1994), genres: Drama|
Title: GoodFellas (1990), genres: Crime|Drama|
Title: Sting, The (1973), genres: Comedy|Crime|
Title: Wag the Dog (1997), genres: Comedy|Drama|
Best recomendations: 
Title: Star Wars (1977), genres: Action|Adventure|Romance|Sci-Fi|War|
Title: Empire Strikes Back, The (1980), genres: Action|Adventure|Drama|Romance|Sci-Fi|War|
Title: Apocalypse Now (1979), genres: Drama|War|
Title: Schindler's List (1993), genres: Drama|War|
Title: Rear Window (1954), genres: Mystery|Thriller|

User id: 543
Best user ratings: 
Title: Usual Suspects, The (1995), genres: Crime|Thriller|
Title: Citizen Kane (1941), genres: Drama|
Title: Henry V (1989), genres: Drama|War|
Title: Bridges of Madison County, The (1995), genres: Drama|Romance|
Title: Rear Window (1954), genres: Mystery|Thriller|
Best recomendations: 
Title: Citizen Kane (1941), genres: Drama|
Title: Henry V