## SVDpp
作为SVD的升级算法，考虑到了用户对所有有过评分行为的商品的隐性反馈。  
The prediction r^ui is set as:  
$$
\hat{r_{ui}}=\mu+b_u+b_i+q^T_i(p_u+|I_u|^{−1/2}\sum_{j\in{I_u}}y_j)
$$
这里$y_j$是一组新的物品因子，用于捕捉隐性评级，即用户评价了商品j的个人喜好偏置

In [1]:
from surprise import SVDpp, accuracy, Dataset
from surprise.model_selection import train_test_split

In [2]:
data = Dataset.load_builtin("ml-100k")
trainset, testset = train_test_split(data, test_size=.2, shuffle=True, random_state=10)

In [4]:
model = SVDpp(
    n_factors=100,#隐因子的大小，即矩阵分解的k值
    n_epochs=20,
    init_mean=0,#假定隐因子向量服从正态分布的均值。默认0
    init_std_dev=.1,#假定隐因子向量服从正态分布的方差。默认0.1
    lr_all=.005,#所有参数的学习率
    reg_all=.002,#所有参数的正则化惩罚参数
    lr_bi=None,#bi的学习率，优先于lr_all
    lr_bu=None,#bu的学习率，优先于lr_all
    lr_qi=None,#qi的学习率，优先于lr_all
    lr_pu=None,#pu的学习率，优先于lr_all
    lr_yj=None,#yi的学习率，优先于lr_all
    reg_bi=None,#bi的正则化惩罚参数，优先于reg_all
    reg_bu=None,#bu的正则化惩罚参数，优先于reg_all
    reg_qi=None,#qi的正则化惩罚参数，优先于reg_all
    reg_pu=None,#pu的正则化惩罚参数，优先于reg_all
    reg_yj=None,#yi的正则化惩罚参数，优先于reg_all
    random_state=10,
    verbose=True
)
model.fit(trainset)

 processing epoch 0
 processing epoch 1
 processing epoch 2
 processing epoch 3
 processing epoch 4
 processing epoch 5
 processing epoch 6
 processing epoch 7
 processing epoch 8
 processing epoch 9
 processing epoch 10
 processing epoch 11
 processing epoch 12
 processing epoch 13
 processing epoch 14
 processing epoch 15
 processing epoch 16
 processing epoch 17
 processing epoch 18
 processing epoch 19


<surprise.prediction_algorithms.matrix_factorization.SVDpp at 0x2597f6dfba8>

In [5]:
pu = model.pu
pu.shape

(943, 100)

In [6]:
qi = model.qi
bi = model.bi
bu = model.bu
yj = model.yj
global_mean = trainset.global_mean
yj.shape, global_mean

((1653, 100), 3.5282375)

In [7]:
pred = model.test(testset)
pred[:5]

[Prediction(uid='154', iid='302', r_ui=4.0, est=4.266729334909859, details={'was_impossible': False}),
 Prediction(uid='896', iid='484', r_ui=4.0, est=4.029826950256732, details={'was_impossible': False}),
 Prediction(uid='230', iid='371', r_ui=4.0, est=3.245403002099773, details={'was_impossible': False}),
 Prediction(uid='234', iid='294', r_ui=3.0, est=2.095114885396975, details={'was_impossible': False}),
 Prediction(uid='25', iid='729', r_ui=4.0, est=3.91871829855566, details={'was_impossible': False})]

In [8]:
accuracy.rmse(pred)

RMSE: 0.9315


0.9315074547799508

In [9]:
trainset.to_inner_iid('302'), trainset.to_inner_uid('154')

(171, 738)

In [10]:
ur = trainset.ur[738]
ur[:5]

[(61, 5.0), (5, 4.0), (418, 5.0), (232, 4.0), (245, 4.0)]

In [11]:
import numpy as np
yj_u738 = np.zeros(100)
for (iid, rating) in ur:
    yj_u738 += yj[iid]
yj_u738, len(ur)

(array([ 0.46591679, -0.01760601,  0.33893417, -0.59497736,  0.01154657,
         0.27853249,  0.60461401,  0.31332899,  0.95245207,  0.66316093,
        -0.28455223, -0.24311912, -0.23802693,  0.04298593,  0.86705005,
        -0.28870749, -0.09216296,  0.09597773,  0.27308644, -0.40378002,
        -0.05781717, -0.05917021,  0.04575827, -0.0539812 ,  0.18512899,
         0.19859737,  0.32728435,  0.61882183,  0.22455721,  0.75694698,
         0.28479897, -0.08267368, -0.11139986,  0.34141078,  0.01849795,
         0.31446014, -0.05205146, -0.3735532 ,  0.19307094,  0.48020824,
        -0.01358934, -0.61465379,  0.10138678,  0.00735017, -0.7757618 ,
        -0.03654485,  0.73888832, -0.6899228 , -0.03353955,  0.46378631,
        -1.01907517, -0.63383956, -0.02812417, -0.47145491, -0.20166856,
        -0.07703023,  1.16442911, -1.01568158, -0.47485544,  0.39311605,
         0.76041676,  0.27449021,  0.175515  , -0.41883673, -0.02689833,
         0.057071  , -1.2081172 ,  0.42112661, -0.5

In [13]:
#根据原理验证是否和pred第一行一致
global_mean + bu[738] + bi[171] + np.dot(qi[171], pu[738]+yj_u738/(40**.5))

4.266729334909859