In [1]:
import numpy as np
import pandas as pd

In [3]:
# 用户评分矩阵
R = np.array([[4,0,2,0,1],
             [0,2,3,0,0],
             [1,0,2,4,0],
             [5,0,0,3,1],
             [0,0,1,5,1],
             [0,3,2,4,1],])

In [29]:
def LFM_grad_desc(R, K=2, max_iter=1000, alpha=0.0001, lamda=0.002 ):
    """
    LFM随机梯度下降求解
    R 为用户~评分矩阵
    K 隐含特征向量维度
    max_iter 最大学习次数
    alpha  步长 学习率
    lamda 正则化系数
    """
    M = R.shape[0]
    N = R.shape[1]
    
    # 随机生成
    P = np.random.rand(M, K)
    Q = np.random.rand(N, K)
    Q = Q.T
    
    # 开始迭代
    for step in range(max_iter):
        # 对所有的用户和物品进行遍历
        for u in range(M):
            for i in range(N):
                # 取出当前用户有评分的数据 计算偏差
                if R[u][i] > 0:
                    # 预测值和真实值的偏差
                    error_ui = np.dot(P[u, :], Q[:, i]) - R[u][i]
                    # 按照随机梯度下降算法更新 Pu Qi
                    for k in range(K):
                        P[u][k] = P[u][k] - alpha *(2 * error_ui * Q[k][i] + 2 * lamda * P[u][k]) 
                        Q[k][i] = Q[k][i] - alpha * (2 * error_ui * P[u][k] + 2 * lamda * Q[k][i])
        # 所有的u,i遍历完, 所有的特征向量更新完成, 得到P, Q,计算预测评分矩阵
#         pred_r = np.dot(P, Q)
        cost = 0
        # 计算当前损失
        for u in range(M):
            for i in range(N):
                if R[u][i] > 0:
                    cost += (np.dot(P[u, :], Q[:, i]) - R[u][i]) ** 2
                    # 加上正则项
                    for k in range(K):
                        cost += lamda * (P[u][k] ** 2 + Q[k][i] ** 2)
        if cost < 0.0001:
            break
                # 计算当前损失函数
    return P, Q.T, cost
    

In [26]:
# 给定超参数

K = 5
max_iter = 5000
alpha = 0.0002
lamda = 0.004

P, Q, cost = LFM_grad_desc(R, K, max_iter, alpha, lamda)
cost

0.5620410875527074

In [30]:
# 预测值
pred_r = np.dot(P, Q.T)
pred_r

array([[4.00317344, 2.5609118 , 1.99820875, 4.0881203 , 0.94033135],
       [3.48130085, 2.01114171, 2.98635932, 3.85264815, 1.01698779],
       [1.00221031, 1.1391706 , 2.00185156, 3.98988605, 0.53883581],
       [4.9862868 , 2.80056786, 1.32466519, 3.01322878, 0.96207941],
       [5.44870669, 3.57049707, 1.02547399, 4.97626743, 1.00917979],
       [4.60455032, 2.97437932, 1.97259598, 4.00246548, 1.07184879]])

In [31]:
P

array([[ 0.76857183,  0.85574357,  0.72694329,  0.77816719,  0.66023326],
       [-0.01840529,  1.10777752,  0.84940372,  0.74088171,  1.12995235],
       [ 0.19013119,  1.67210952,  0.75868741,  0.12486243, -0.08200108],
       [ 0.84899843, -0.08243641,  0.78097054,  0.93404336,  0.98227355],
       [ 1.52171146,  0.38571054,  1.2092196 ,  1.00782098,  0.1800176 ],
       [ 1.2399479 ,  0.82012406,  0.37491222,  0.60080662,  1.05284866]])

In [32]:
Q

array([[ 1.53600905, -0.12107087,  1.09068628,  1.58044972,  1.36849297],
       [ 1.18838861,  0.21285954,  0.67455845,  0.75218142,  0.59025801],
       [-0.33319138,  1.13094501,  0.22267458,  0.65403211,  0.93250863],
       [ 1.12660252,  1.5569562 ,  1.42647761,  0.88467383,  0.24915074],
       [ 0.23070751,  0.18141495,  0.26344633,  0.19616059,  0.39927572]])