# 隐语义模型的梯度下降求解

### 1.算法实现

In [35]:
import numpy as np

"""
@输入参数：
R：M*N 的评分矩阵
P：初始化用户特征矩阵M*K
Q：初始化物品特征矩阵N*K
K：隐特征向量个数
steps: 最大迭代次数
alpha：步长
lamda：正则化系数

@输出：
分解之后的 P，Q
"""

def LFM_grad_desc(R, K=5, steps=3000, alpha=0.0002, lamda=0.004):
    M = len(R)         # R的行数M
    N = len(R[0])      # R的列数N
    P = np.random.rand(M,K) # 产生一个M x K的随机矩阵
    Q = np.random.rand(N,K) # 产生一个N x K的随机矩阵
    Q = Q.T # Q做转置
    
    for step in range(steps):
        for i in range(M):
            for j in range(N):
                # 如果评分大于0，表示有评分，才考虑误差
                if R[i][j] > 0:
                    eij = R[i][j] - np.dot(P[i,:],Q[:,j]) # np.dot是点乘的意思, P[i,:]: 1 x k; Q[:,j]: k x 1
                    for k in range(0, K):
                        P[i][k] = P[i][k] + alpha * (2 * eij * Q[k][j] - 2 * lamda * P[i][k])
                        Q[k][j] = Q[k][j] + alpha * (2 * eij * P[i][k] - 2 * lamda * Q[k][j])

        # 根据更新之后的P、Q计算预测评分矩阵
        eR = np.dot(P,Q)
        # 计算当前损失函数
        e = 0
        for i in range(M):
            for j in range(N):
                if R[i][j] > 0:
                    e += (R[i][j]-np.dot(P[i,:],Q[:,j]))**2
                    for k in range(K):
                        e += lamda * (P[i][k]**2 + Q[k][j]**2) 
        
        if e < 0.001:
            break
    return P, Q.T

输入

$$
R: M \times N \\
P: M \times K \\
Q: K \times N
$$

损失函数：

$$
L = \sum (R - PQ)^2 + \lambda {\vert\vert P \vert\vert}^2 + \lambda {\vert\vert Q \vert\vert}^2
$$

当然这里的R是评分矩阵的**已有评分**

也就是说

$$
\widehat{R}_{ij} = P_{i1}Q_{1j} + ... + P_{iK}Q_{Kj}  = \sum P_{ik}Q_{kj}
$$

上面的公式$k$要循环$(1, K)$

对P求偏导数

$$
\frac {\partial L}{\partial P} = \sum 2 \times (R-PQ) \times (-Q^T) + 2\lambda\vert\vert P \vert\vert
$$

梯度下降迭代公式：

$$
P_{下一轮} = P_{当前轮} + \alpha (2\sum(R-PQ)Q^T-2\lambda\vert\vert P \vert\vert)
$$

Q的求解同理

这里有一个矩阵求导的技巧：

$$
\frac {\partial PQ}{\partial P} = Q^T
$$

假设

$$
P = [P_1, ... ,P_K] \\
Q = [Q_1, ... ,Q_K]^T \\
PQ = P_1Q_1 + ... + P_KQ_K \\
\frac {\partial PQ}{\partial P} = [Q_1, ... , Q_K] = Q^T
$$

标量针对向量进行求导

$$
\frac {\partial x}{\partial Y} = [\frac {\partial x}{\partial y_1}, ..., \frac {\partial x}{\partial y_K}]
$$

### 2. 测试

In [36]:
R = np.array([[5,0,4,0,0],
              [0,3,0,0,4],
              [5,0,0,2,0],
              [0,0,4,0,0]])

nP,nQ = LFM_grad_desc(R)
R

array([[5, 0, 4, 0, 0],
       [0, 3, 0, 0, 4],
       [5, 0, 0, 2, 0],
       [0, 0, 4, 0, 0]])

In [37]:
nP

array([[1.14996471, 1.03800869, 0.60747143, 0.51926041, 0.86216595],
       [0.73573355, 0.82487915, 0.60479086, 0.88928859, 1.40618051],
       [1.31745151, 0.67773341, 0.82413723, 0.62437012, 0.65523668],
       [0.73347945, 0.87307918, 0.56632902, 1.06282693, 1.0151266 ]])

In [38]:
nQ

array([[1.42037719, 0.80543388, 1.14908875, 1.17826826, 1.3825335 ],
       [0.23688063, 0.20246003, 0.75195393, 0.87447126, 1.01474268],
       [1.11432086, 1.13599154, 0.53317698, 0.87893668, 0.91611041],
       [0.16566253, 0.28504231, 0.78099022, 0.77765127, 0.69004162],
       [0.5433344 , 0.77167095, 0.44603318, 0.67279315, 1.48461224]])

In [39]:
nP.dot(nQ.T)

array([[4.97127098, 2.26830505, 4.03072476, 1.95954562, 3.32610592],
       [5.39627533, 3.00062963, 4.1492045 , 2.49122417, 3.99198148],
       [5.00571651, 2.27989624, 3.82642507, 1.99276096, 2.99924079],
       [5.05152912, 2.73586793, 3.97522123, 2.33966031, 3.54698804]])