In [25]:
import numpy as np
import pandas as pd
from __future__ import (absolute_import, division, print_function, unicode_literals)
import os
from surprise import SVD
from surprise import Dataset
from surprise import Reader
from surprise.model_selection import cross_validate

## Data Load

In [26]:
file_path = os.path.expanduser('~/.surprise_data/ml-100k/ml-100k/u.data')
reader = Reader(line_format = 'user item rating timestamp', sep = '\t')
data = Dataset.load_from_file(file_path, reader = reader)

In [27]:
algo = SVD()
cross_validate(algo, data, measures = ['RMSE', 'MAE'], cv = 5, verbose = True)

Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9347  0.9423  0.9330  0.9471  0.9310  0.9376  0.0061  
MAE (testset)     0.7380  0.7410  0.7351  0.7480  0.7324  0.7389  0.0054  
Fit time          12.40   13.51   12.61   11.04   13.83   12.68   0.98    
Test time         0.47    0.38    0.31    0.34    0.37    0.37    0.05    


{'test_rmse': array([0.9346969 , 0.9422943 , 0.932964  , 0.94709733, 0.93098753]),
 'test_mae': array([0.73804087, 0.74097557, 0.73513271, 0.74801885, 0.73238158]),
 'fit_time': (12.401796102523804,
  13.508845567703247,
  12.608235120773315,
  11.044529438018799,
  13.834606647491455),
 'test_time': (0.4650845527648926,
  0.37822699546813965,
  0.31382083892822266,
  0.3357243537902832,
  0.3719980716705322)}

In [28]:
df = pd.DataFrame(data.raw_ratings, columns = ['uid', 'iid', 'rate', 'timestamp'])

In [29]:
df

Unnamed: 0,uid,iid,rate,timestamp
0,196,242,3.0,881250949
1,186,302,3.0,891717742
2,22,377,1.0,878887116
3,244,51,2.0,880606923
4,166,346,1.0,886397596
5,298,474,4.0,884182806
6,115,265,2.0,881171488
7,253,465,5.0,891628467
8,305,451,3.0,886324817
9,6,86,3.0,883603013


In [30]:
user = list(set(df.uid))
item = list(set(df.iid))

## Basic CF algorithm

In [31]:
from surprise import KNNBasic
trainset = data.build_full_trainset()
sim_options = {'name' : 'cosine', 'user_based' : True}

In [32]:
algo = KNNBasic(k = 40, min_k = 1, simoptions = sim_options)
algo.fit(trainset)

Computing the msd similarity matrix...
Done computing similarity matrix.


<surprise.prediction_algorithms.knns.KNNBasic at 0x149b3829668>

In [33]:
pred_rating_basic = np.zeros((len(user) + 1, len(item) + 1))
for u in user:
    iids = df[df.uid == u]
    for i in range(1, len(iids) + 1):
        iid = iids[i - 1:i].iid.values[0]
        r_ui = iids[i - 1:i].rate.values[0]
        pred = algo.predict(u, iid, r_ui, verbose = False)
        pred_rating_basic[int(u)][int(iid)] = pred.est

In [34]:
pred_rating_basic # pred_rating_basic[uid][iid]

array([[0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 4.12997131, 3.1930005 , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 4.17483517, 0.        , ..., 0.        , 0.        ,
        0.        ],
       ...,
       [0.        , 4.49353944, 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 3.50017794, ..., 0.        , 0.        ,
        0.        ]])

## CF algorithm with mean

In [35]:
from surprise import KNNWithMeans

In [36]:
algo = KNNWithMeans(k = 40, min_k = 1, simoptions = sim_options)
algo.fit(trainset)

Computing the msd similarity matrix...
Done computing similarity matrix.


<surprise.prediction_algorithms.knns.KNNWithMeans at 0x149b38295f8>

In [37]:
pred_rating_mean = np.zeros((len(user) + 1, len(item) + 1))
for u in user:
    iids = df[df.uid == u]
    for i in range(1, len(iids) + 1):
        iid = iids[i - 1:i].iid.values[0]
        r_ui = iids[i - 1:i].rate.values[0]
        pred = algo.predict(u, iid, r_ui, verbose = False)
        pred_rating_mean[int(u)][int(iid)] = pred.est

In [50]:
pred_rating_mean # pred_rating[uid][iid]

array([[0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 3.890355  , 3.17539145, ..., 0.        , 0.        ,
        0.        ],
       [0.        , 4.0140942 , 0.        , ..., 0.        , 0.        ,
        0.        ],
       ...,
       [0.        , 4.75242209, 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 3.30818473, ..., 0.        , 0.        ,
        0.        ]])

## CF algorithm with z-score

In [38]:
from surprise import KNNWithZScore

In [39]:
algo = KNNWithZScore(k = 40, min_k = 1, simoptions = sim_options)
algo.fit(trainset)

Computing the msd similarity matrix...
Done computing similarity matrix.


<surprise.prediction_algorithms.knns.KNNWithZScore at 0x149b3829978>

In [40]:
pred_rating_z = np.zeros((len(user) + 1, len(item) + 1))
for u in user:
    iids = df[df.uid == u]
    for i in range(1, len(iids) + 1):
        iid = iids[i - 1:i].iid.values[0]
        r_ui = iids[i - 1:i].rate.values[0]
        pred = algo.predict(u, iid, r_ui, verbose = False)
        pred_rating_z[int(u)][int(iid)] = pred.est

In [52]:
pred_rating_z # pred_rating[uid][iid]

array([[0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 3.96878304, 3.03329263, ..., 0.        , 0.        ,
        0.        ],
       [0.        , 4.04859899, 0.        , ..., 0.        , 0.        ,
        0.        ],
       ...,
       [0.        , 4.68476278, 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 3.20540185, ..., 0.        , 0.        ,
        0.        ]])

## SVD

In [41]:
from surprise import SVD

In [42]:
algo = SVD(n_factors = 100, n_epochs = 20, biased = False, lr_all = 0.005, reg_all = 0)
algo.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x149b3829f98>

In [43]:
pred_rating_svd = np.zeros((len(user) + 1, len(item) + 1))
for u in user:
    iids = df[df.uid == u]
    for i in range(1, len(iids) + 1):
        iid = iids[i - 1:i].iid.values[0]
        r_ui = iids[i - 1:i].rate.values[0]
        pred = algo.predict(u, iid, r_ui, verbose = False)
        pred_rating_svd[int(u)][int(iid)] = pred.est

In [53]:
pred_rating_svd # pred_rating[uid][iid]

array([[0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 5.        , 3.17588084, ..., 0.        , 0.        ,
        0.        ],
       [0.        , 3.76016444, 0.        , ..., 0.        , 0.        ,
        0.        ],
       ...,
       [0.        , 4.61737491, 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 3.76791589, ..., 0.        , 0.        ,
        0.        ]])

## PMF

In [44]:
from surprise import SVD

In [45]:
algo = SVD(n_factors = 100, n_epochs = 20, biased = False, lr_all = 0.005, reg_all = 0.02)
algo.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x149b3829710>

In [46]:
pred_rating_pmf = np.zeros((len(user) + 1, len(item) + 1))
for u in user:
    iids = df[df.uid == u]
    for i in range(1, len(iids) + 1):
        iid = iids[i - 1:i].iid.values[0]
        r_ui = iids[i - 1:i].rate.values[0]
        pred = algo.predict(u, iid, r_ui, verbose = False)
        pred_rating_pmf[int(u)][int(iid)] = pred.est

In [54]:
pred_rating_pmf # pred_rating[uid][iid]

array([[0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 4.11393133, 3.07647591, ..., 0.        , 0.        ,
        0.        ],
       [0.        , 3.84283827, 0.        , ..., 0.        , 0.        ,
        0.        ],
       ...,
       [0.        , 4.30428769, 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 3.30187663, ..., 0.        , 0.        ,
        0.        ]])

## PMF with biased

In [47]:
from surprise import SVD

In [48]:
algo = SVD(n_factors = 100, n_epochs = 20, biased = True, lr_all = 0.005, reg_all = 0.02)
algo.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x149b8ba0240>

In [49]:
pred_rating_pmf_bi = np.zeros((len(user) + 1, len(item) + 1))
for u in user:
    iids = df[df.uid == u]
    for i in range(1, len(iids) + 1):
        iid = iids[i - 1:i].iid.values[0]
        r_ui = iids[i - 1:i].rate.values[0]
        pred = algo.predict(u, iid, r_ui, verbose = False)
        pred_rating_pmf_bi[int(u)][int(iid)] = pred.est

In [55]:
pred_rating_pmf_bi # pred_rating[uid][iid]

array([[0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 3.9945954 , 3.38047108, ..., 0.        , 0.        ,
        0.        ],
       [0.        , 4.14281664, 0.        , ..., 0.        , 0.        ,
        0.        ],
       ...,
       [0.        , 4.40220499, 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 3.82112415, ..., 0.        , 0.        ,
        0.        ]])