In [1]:
import numpy as np
import pandas as pd
from __future__ import (absolute_import, division, print_function, unicode_literals)
from RatingMatrix import MovieLens_pivot
import os
from surprise import SVD
from surprise import Dataset
from surprise import Reader
from surprise.model_selection import cross_validate
from surprise.model_selection import KFold
from surprise import KNNWithMeans
from surprise import SVD
sim_options = {'name' : 'cosine', 'user_based' : True}
import warnings
warnings.filterwarnings('ignore')

In [None]:
data = Dataset.load_builtin('ml-1m')
kf = KFold(n_splits = 5) 

In [27]:
pred_rating_mean = pd.read_csv('pred_rating_mean.csv')
pred_rating_svd = pd.read_csv('pred_rating_svd.csv')
pred_rating_pmf = pd.read_csv('pred_rating_pmf.csv')
pred_rating_pmf_bi = pd.read_csv('pred_rating_pmf_bi.csv')

## 1) Precision&Recall&F1-measure

### CF with mean

In [4]:
algo = KNNWithMeans(k = 40, min_k = 1, simoptions = sim_options)

In [5]:
for trainset, testset, i in kf.split(data):
    algo.fit(trainset)
    predictions = algo.test(testset)
    precision, recalls = precision_recall_at_k(predictions, k = 5, threshold = 4)
    
    P = sum(prec for prec in precisions.values()) / len(precisons)
    R = sum(prec for rec in recalls.values()) / len(recalls)
    F1 = 2 * P * R / (P + R)
    
    print('precision : ', P)
    print('recall : ', R)
    print('F1 : '  , F1)

ValueError: too many values to unpack (expected 3)

### SVD

In [6]:
algo = SVD(n_factors = 100, n_epochs = 20, biased = False, lr_all = 0.005, reg_all = 0)

In [None]:
for trainset, testset, i in kf.split(data):
    algo.fit(trainset)
    predictions = algo.test(testset)
    precision, recalls = precision_recall_at_k(predictions, k = 5, threshold = 4)
    
    P = sum(prec for prec in precisions.values()) / len(precisons)
    R = sum(prec for rec in recalls.values()) / len(recalls)
    F1 = 2 * P * R / (P + R)
    
    print('precision : ', P)
    print('recall : ', R)
    print('F1 : '  , F1)

### PMF

In [7]:
algo = SVD(n_factors = 100, n_epochs = 20, biased = False, lr_all = 0.005, reg_all = 0.02)

In [None]:
for trainset, testset, i in kf.split(data):
    algo.fit(trainset)
    predictions = algo.test(testset)
    precision, recalls = precision_recall_at_k(predictions, k = 5, threshold = 4)
    
    P = sum(prec for prec in precisions.values()) / len(precisons)
    R = sum(prec for rec in recalls.values()) / len(recalls)
    F1 = 2 * P * R / (P + R)
    
    print('precision : ', P)
    print('recall : ', R)
    print('F1 : '  , F1)

### PMF with biased

In [8]:
algo = SVD(n_factors = 100, n_epochs = 20, biased = True, lr_all = 0.005, reg_all = 0.02)

In [None]:
for trainset, testset, i in kf.split(data):
    algo.fit(trainset)
    predictions = algo.test(testset)
    precision, recalls = precision_recall_at_k(predictions, k = 5, threshold = 4)
    
    P = sum(prec for prec in precisions.values()) / len(precisons)
    R = sum(prec for rec in recalls.values()) / len(recalls)
    F1 = 2 * P * R / (P + R)
    
    print('precision : ', P)
    print('recall : ', R)
    print('F1 : '  , F1)

## 2) NDCG

In [204]:
df = pd.DataFrame(data.raw_ratings, columns = ['uid', 'iid', 'rate', 'timestamp'])
del df['timestamp']
user = list(set(df.uid))

In [271]:
def addRating(uid, method):
    add = df[df.uid == uid]
    add['pred'] = 0.0
    for iid, i in zip(add.iid, add.index):
        add.set_value(i, 'pred', pred_rating_mean.iloc[int(uid) - 1][int(iid) + 1])
    
    return add

def DCG(data):
    pred_sort = list(data.sort_values(by = ['pred'], axis = 0, ascending = False).rate)
    dcg = pred_sort[0]
    for i in range(1, len(pred_sort)):
        dcg += pred_sort[i] / np.log2(i + 1)
        
    return dcg

def IDCG(data):
    rate_sort = list(data.sort_values(by = ['rate'], axis = 0, ascending = False).rate)
    idcg = rate_sort[0]
    for i in range(1, len(rate_sort)):
        idcg += rate_sort[i] / np.log2(i + 1)
        
    return idcg

In [272]:
def NDCG(uid, method):
    table = addRating(uid, method)
    dcg = DCG(table)
    idcg = IDCG(table)
    
    return dcg / idcg

### CF with mean

In [270]:
NDCG_CF_mean = {}

In [281]:
for u, i in zip(user, range(10)):
    value = NDCG(u, pred_rating_mean)      # 오래 걸려서 일단 10개만 돌림
    NDCG_CF_mean[u] = value
    if i == 9:
        break

In [282]:
NDCG_CF_mean

{'5352': 0.9964681842034485,
 '3704': 0.9982952348008067,
 '3604': 1.0,
 '1156': 0.9978229154359466,
 '2428': 0.9944994359395674,
 '4768': 0.9971608295198146,
 '3388': 1.0,
 '3579': 0.9935187253321537,
 '804': 0.9994919576116145,
 '4843': 0.9980309658977566}

### SVD

In [283]:
NDCG_SVD = {}

In [None]:
for u, i in zip(user, range(10)):
    value = NDCG(u, pred_rating_svd)
    NDCG_SVD[u] = value
    if i == 9:
        break

In [None]:
NDCG_SVD

### PMF

In [None]:
NDCG_PMF = {}

In [None]:
for u, i in zip(user, range(10)):
    value = NDCG(u, pred_rating_pmf)
    NDCG_PMF[u] = value
    if i == 9:
        break

In [None]:
NDCG_PMF

### PMF with biased

In [None]:
NDCG_PMF_bi = {}

In [None]:
for u, i in zip(user, range(10)):
    value = NDCG(u, pred_rating_pmf_bi)
    NDCG_PMF_bi[u] = value
    if i == 9:
        break

In [None]:
NDCG_PMF_bi