In [1]:
import os
import pandas as pd
import numpy as np
import surprise

In [2]:
data_dir='E:\Work\Machine Learning Course\Python\Module 7 Reccomendation Engines\Data\ml-latest-small'

In [3]:
os.chdir(data_dir)

In [4]:
mr=pd.read_csv("ratings.csv")
mr.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,31,2.5,1260759144
1,1,1029,3.0,1260759179
2,1,1061,3.0,1260759182
3,1,1129,2.0,1260759185
4,1,1172,4.0,1260759205


In [5]:
mr.drop('timestamp',axis=1,inplace=True)
mr.rename(columns={'userId':'user','movieId':'item','rating':'rating'},inplace=True)

In [6]:
# user, item, rating on scale of 1 to 5
reader=surprise.dataset.Reader(line_format='user item rating', rating_scale=(1,5))

In [7]:
mr_train=surprise.dataset.Dataset.load_from_df(mr,reader=reader)
mr_trainset=mr_train.build_full_trainset()

In [8]:
## Create a neighbourhood based user and item based collaborative filtering model
from surprise import SVD

In [9]:
model=SVD(n_factors=20)

In [10]:
model.train(mr_trainset)

In [11]:
mr.head()

Unnamed: 0,user,item,rating
0,1,31,2.5
1,1,1029,3.0
2,1,1061,3.0
3,1,1129,2.0
4,1,1172,4.0


In [12]:
model.predict(uid=1,iid=31,r_ui=2.5)

Prediction(uid=1, iid=31, r_ui=2.5, est=2.3443625859847432, details={'was_impossible': False})

In [13]:
from surprise import NMF

In [14]:
model1=NMF(n_factors=20,biased=True,)

In [15]:
model1.train(mr_trainset)

In [16]:
model1.predict(uid=1,iid=31,r_ui=2.5)

Prediction(uid=1, iid=31, r_ui=2.5, est=2.1048622051257442, details={'was_impossible': False})

In [17]:
## Evaluate Model performance at current values of hyperparameters
mr_train.split(n_folds=3)
surprise.evaluate(SVD(n_factors=20),mr_train)

Evaluating RMSE, MAE of algorithm SVD.

------------
Fold 1
RMSE: 0.8933
MAE:  0.6884
------------
Fold 2
RMSE: 0.8987
MAE:  0.6911
------------
Fold 3
RMSE: 0.8962
MAE:  0.6906
------------
------------
Mean RMSE: 0.8961
Mean MAE : 0.6900
------------
------------


CaseInsensitiveDefaultDict(list,
                           {'mae': [0.68835448921016751,
                             0.69105985166343509,
                             0.69059494820623635],
                            'rmse': [0.8933343452726894,
                             0.89869348267164051,
                             0.89621495316505861]})

In [18]:
mr_train.split(n_folds=3)
surprise.evaluate(NMF(n_factors=20,biased=True),mr_train)

Evaluating RMSE, MAE of algorithm NMF.

------------
Fold 1
RMSE: 1.1884
MAE:  0.8837
------------
Fold 2
RMSE: 1.8503
MAE:  1.5032
------------
Fold 3
RMSE: 1.5274
MAE:  1.1921
------------
------------
Mean RMSE: 1.5220
Mean MAE : 1.1930
------------
------------


CaseInsensitiveDefaultDict(list,
                           {'mae': [0.88372354158847177,
                             1.5031689453788133,
                             1.192095085475755],
                            'rmse': [1.1883606365743693,
                             1.8503010994692384,
                             1.5273984162931631]})

In [19]:
## Doing grid search for SVD model on number of factors
param_grid={"n_factors":[15,20,25,30]}
algo=SVD

In [20]:
grid_search=surprise.GridSearch(algo,param_grid=param_grid,measures=["RMSE","MAE"])

[{'n_factors': 15}, {'n_factors': 20}, {'n_factors': 25}, {'n_factors': 30}]


In [21]:
grid_search.evaluate(mr_train)

------------
Parameters combination 1 of 4
params:  {'n_factors': 15}
------------
Mean RMSE: 0.8958
Mean MAE : 0.6906
------------
------------
Parameters combination 2 of 4
params:  {'n_factors': 20}
------------
Mean RMSE: 0.8964
Mean MAE : 0.6911
------------
------------
Parameters combination 3 of 4
params:  {'n_factors': 25}
------------
Mean RMSE: 0.8964
Mean MAE : 0.6909
------------
------------
Parameters combination 4 of 4
params:  {'n_factors': 30}
------------
Mean RMSE: 0.8979
Mean MAE : 0.6919
------------


In [22]:
print(grid_search.best_params['RMSE'])
print(grid_search.best_params['MAE'])

{'n_factors': 15}
{'n_factors': 15}


In [23]:
print(grid_search.best_score['RMSE'])
print(grid_search.best_score['MAE'])

0.895781492707
0.690593314114
