# 隐语义模型的梯度下降求解

### 1.算法实现

In [12]:
import numpy as np

"""
@输入参数：
R：M*N 的评分矩阵
P：初始化用户特征矩阵M*K
Q：初始化物品特征矩阵N*K
K：隐特征向量个数
steps: 最大迭代次数
alpha：步长
lamda：正则化系数

@输出：
分解之后的 P，Q
"""

def LFM_grad_desc(R, K=2, steps=3000, alpha=0.0002, lamda=0.004):
    M = len(R)         # R的行数M
    N = len(R[0])      # R的列数N
    P = np.random.rand(M,K) # 产生一个M x K的随机矩阵
    Q = np.random.rand(N,K) # 产生一个N x K的随机矩阵
    Q = Q.T # Q做转置
    
    for step in range(steps):
        for i in range(M):
            for j in range(N):
                # 如果评分大于0，表示有评分，才考虑误差
                if R[i][j] > 0:
                    eij = R[i][j] - np.dot(P[i,:],Q[:,j]) # np.dot是点乘的意思, P[i,:]: 1 x k; Q[:,j]: k x 1
                    for k in range(0, K):
                        P[i][k] = P[i][k] + alpha * (2 * eij * Q[k][j] - 2 * lamda * P[i][k])
                        Q[k][j] = Q[k][j] + alpha * (2 * eij * P[i][k] - 2 * lamda * Q[k][j])

        # 根据更新之后的P、Q计算预测评分矩阵
        eR = np.dot(P,Q)
        # 计算当前损失函数
        e = 0
        for i in range(M):
            for j in range(N):
                if R[i][j] > 0:
                    e += (R[i][j]-np.dot(P[i,:],Q[:,j]))**2
                    for k in range(K):
                        e += lamda * (P[i][k]**2 + Q[k][j]**2) 
        
        if e < 0.001:
            break
    return P, Q.T

输入

$$
R: M \times N \\
P: M \times K \\
Q: K \times N
$$

损失函数：

$$
L = \sum (R - PQ)^2 + \lambda {\vert\vert P \vert\vert}^2 + \lambda {\vert\vert Q \vert\vert}^2
$$

当然这里的R是评分矩阵的**已有评分**

也就是说

$$
\widehat{R}_{ij} = P_{i1}Q_{1j} + ... + P_{iK}Q_{Kj}  = \sum P_{ik}Q_{kj}
$$

上面的公式$k$要循环$(1, K)$

对P求偏导数

$$
\frac {\partial L}{\partial P} = \sum 2 \times (R-PQ) \times (-Q^T) + 2\lambda\vert\vert P \vert\vert
$$

梯度下降迭代公式：

$$
P_{下一轮} = P_{当前轮} + \alpha (2\sum(R-PQ)Q^T-2\lambda\vert\vert P \vert\vert)
$$

Q的求解同理

这里有一个矩阵求导的技巧：

$$
\frac {\partial PQ}{\partial P} = Q^T
$$

假设

$$
P = [P_1, ... ,P_K] \\
Q = [Q_1, ... ,Q_K]^T \\
PQ = P_1Q_1 + ... + P_KQ_K \\
\frac {\partial PQ}{\partial P} = [Q_1, ... , Q_K] = Q^T
$$

标量针对向量进行求导


$$
\frac {\partial x}{\partial Y} = [\frac {\partial x}{\partial y_1}, ..., \frac {\partial x}{\partial y_K}]
$$

### ppt矩阵求导详细推导


$$
P_u^T = (p_1, ..., p_k)
$$

$$
Q_i = \left(
  \begin{array}{c}
          q_{1} \\
          \vdots \\
          q_{k}
 \end{array}
 \right)
$$

所以

$$
P_u^TQ_i = (p_1, ..., p_k)\left(
  \begin{array}{c}
          q_{1} \\
          \vdots \\
          q_{k}
 \end{array}
 \right) = p_1q_1 + ... + p_kq_k
$$

所以对$Q_i$求偏导为：

$$
\frac {\partial P_u^TQ_i}{\partial P_u} = \frac {\partial (p_1q_1 + ... + p_kq_k)}{\partial \left(
  \begin{array}{c}
          p_{1} \\
          \vdots \\
          p_{k}
 \end{array}
 \right)} = \left(
  \begin{array}{c}
          q_{1} \\
          \vdots \\
          q_{k}
 \end{array}
 \right) = Q_i
$$

$$
\sum_i Q_iQ_i^T = Q_1Q_1^T + ... + Q_NQ_N^T = (Q_1, ..., Q_N)\left(
  \begin{array}{c}
          Q_{1}^T \\
          \vdots \\
          Q_{N}^T
 \end{array}
 \right) = (Q_1, ..., Q_N)(Q_1^T, ..., Q_N^T)^T
$$

### 2. 测试

In [21]:
R = np.array([[5,1,4,2,3],
              [1,0,3,3,4],
              [5,2,0,2,5],
              [1,2,4,3,5]])

nP,nQ = LFM_grad_desc(R)
R

array([[5, 1, 4, 2, 3],
       [1, 0, 3, 3, 4],
       [5, 2, 0, 2, 5],
       [1, 2, 4, 3, 5]])

In [22]:
nP

array([[2.10832007, 0.48256027],
       [0.36871684, 1.69232023],
       [2.1340747 , 1.14319922],
       [0.38806583, 2.10348715]])

In [23]:
nQ

array([[2.32864436, 0.06061983],
       [0.37287366, 0.88648014],
       [1.50706114, 1.56332958],
       [0.41702182, 1.41702996],
       [1.0472418 , 2.18330051]])

In [24]:
nP.dot(nQ.T)

array([[4.93878036, 1.21391712, 3.93176799, 1.56301783, 3.26149498],
       [0.96119857, 1.63769307, 3.20133309, 2.55183143, 4.08097931],
       [5.03880155, 1.80916365, 5.00337819, 2.50990326, 4.73083966],
       [1.03118034, 2.00939911, 3.87328261, 3.14253623, 4.99894333]])