In [1]:
import numpy as np
import pandas as pd
import os
import sys
import pickle
import time
import datetime
import matplotlib.pyplot as plt
import seaborn as sns
from importlib import reload
%matplotlib inline
from IPython.core.display import display, HTML, clear_output
display(HTML("<style>.container { width:80% !important; }</style>")) 

In [2]:
pd.options.display.max_rows = 999
pd.options.display.max_columns = 999
pd.options.display.max_colwidth = 999

## Import PMF class

In [3]:
cwd = os.getcwd()
path = os.path.join(cwd, '..', 'src')
if not path in sys.path:
    sys.path.append(path)
del cwd, path

In [4]:
from MovieRecommender import PMF

## Load ratings data

In [5]:
cwd = os.getcwd()
ratings = pd.read_csv(os.path.join(cwd, "..", "data", "ratings.csv"))
ratings.drop('timestamp', axis=1, inplace=True)

In [8]:
ratings.head()

Unnamed: 0,userId,movieId,rating
0,1,2,3.5
1,1,29,3.5
2,1,32,3.5
3,1,47,3.5
4,1,50,3.5


## Define Adam's userid

In [9]:
max_userId = ratings['userId'].max()
adam_id = max_userId + 1
print('Adam userid: ' + str(adam_id))

## Load Adam's ratings profile and union it with the ratings data

In [7]:
adam = pd.read_csv('adam_profile.csv', index_col=0)
adam['userId'] = adam_id
adam = adam[['userId', 'movieId', 'rating']]

In [16]:
adam.head()

Unnamed: 0,movieId,rating,userId
0,1,4,138494
1,33794,5,138494
2,3755,5,138494
3,2053,2,138494
4,102407,5,138494


In [20]:
ratings.shape[0] + adam.shape[0]

20000875

In [21]:
ratings_combined = pd.concat([ratings, adam])

In [22]:
ratings_combined.shape

(20000875, 3)

In [23]:
ratings_combined.head()

Unnamed: 0,userId,movieId,rating
0,1,2,3.5
1,1,29,3.5
2,1,32,3.5
3,1,47,3.5
4,1,50,3.5


In [24]:
ratings_combined.isnull().sum()

userId     0
movieId    0
rating     0
dtype: int64

## Fit PMF using all data, including Adam's ratings

In [25]:
pmf = PMF(rank=10, lamd=7.5, sig2=0.5, verbose=True)

In [26]:
pmf.fit(ratings_combined)

force_refresh is set to: True
persisting user mapping
persisting movie mapping
building omega | 2017-08-08 17:02:12.512816
building omega_u | 2017-08-08 17:02:35.681375
building omega_v | 2017-08-08 19:05:12.426757
building the matrix dictionary M | 2017-08-08 19:29:09.337894
beginning training epochs | 2017-08-08 19:30:49.966994
iteration 10 of 50 | 2017-08-08 20:28:57.254689
iteration 20 of 50 | 2017-08-08 21:34:10.580610
iteration 30 of 50 | 2017-08-08 22:38:33.584332
iteration 40 of 50 | 2017-08-08 23:43:07.185445
iteration 50 of 50 | 2017-08-09 00:47:45.841756


## Persist model

In [26]:
pickle.dump(pmf, open('adam_pmf_model_trained.pkl', "wb"))