# 隐语义模型的梯度下降求解

### 1.算法实现

In [1]:
import numpy as np

"""
@输入参数：
R：M*N 的评分矩阵
P：初始化用户特征矩阵M*K
Q：初始化物品特征矩阵N*K
K：隐特征向量个数
steps: 最大迭代次数
alpha：步长
lamda：正则化系数

@输出：
分解之后的 P，Q
"""

def LFM_grad_desc(R, K=2, steps=3000, alpha=0.0002, lamda=0.004):
    M = len(R)         # R的行数M
    N = len(R[0])      # R的列数N
    P = np.random.rand(M,K) # 产生一个M * K的随机矩阵
    Q = np.random.rand(N,K) # 产生一个N * K的随机矩阵
    Q = Q.T # Q做转置
    
    for step in range(steps):
        for i in range(M):
            for j in range(N):
                # 如果评分大于0，表示有评分，才考虑误差
                if R[i][j] > 0:
                    eij = R[i][j] - np.dot(P[i,:],Q[:,j]) # np.dot是点乘的意思, P[i,:]: 1 * K; Q[:,j]: K * 1
                    for k in range(0, K):
                        P[i][k] = P[i][k] + alpha * (2 * eij * Q[k][j] - 2 * lamda * P[i][k])
                        Q[k][j] = Q[k][j] + alpha * (2 * eij * P[i][k] - 2 * lamda * Q[k][j])

        # 根据更新之后的P、Q计算预测评分矩阵
        # P和Q进行点乘
        eR = np.dot(P,Q)
        # 计算当前损失函数
        e = 0
        for i in range(M):
            for j in range(N):
                if R[i][j] > 0:
                    e += (R[i][j]-np.dot(P[i,:],Q[:,j]))**2
                    for k in range(K):
                        e += lamda * (P[i][k]**2 + Q[k][j]**2) 
        
        if e < 0.001:
            break
    return P, Q.T

输入

$$
R: M \times N \\
P: M \times K \\
Q: K \times N
$$

损失函数：

$$
L = \sum (R - PQ)^2 + \lambda {\vert\vert P \vert\vert}^2 + \lambda {\vert\vert Q \vert\vert}^2
$$

当然这里的R是评分矩阵的**已有评分**

也就是说

$$
\widehat{R}_{ij} = P_{i1}Q_{1j} + ... + P_{iK}Q_{Kj}  = \sum P_{ik}Q_{kj}
$$

上面的公式$k$要循环$(1, K)$

对P求偏导数

$$
\frac {\partial L}{\partial P} = \sum 2 \times (R-PQ) \times (-Q^T) + 2\lambda\vert\vert P \vert\vert
$$

梯度下降迭代公式：

$$
P_{下一轮} = P_{当前轮} + \alpha (2\sum(R-PQ)Q^T-2\lambda\vert\vert P \vert\vert)
$$

Q的求解同理

这里有一个矩阵求导的技巧：

$$
\frac {\partial PQ}{\partial P} = Q^T
$$

假设

$$
P = [P_1, ... ,P_K] \\
Q = [Q_1, ... ,Q_K]^T \\
PQ = P_1Q_1 + ... + P_KQ_K \\
\frac {\partial PQ}{\partial P} = [Q_1, ... , Q_K] = Q^T
$$

标量针对向量进行求导


$$
\frac {\partial x}{\partial Y} = [\frac {\partial x}{\partial y_1}, ..., \frac {\partial x}{\partial y_K}]
$$

### ppt矩阵求导详细推导


$$
P_u^T = (p_1, ..., p_k)
$$

$$
Q_i = \left(
  \begin{array}{c}
          q_{1} \\
          \vdots \\
          q_{k}
 \end{array}
 \right)
$$

所以

$$
P_u^TQ_i = (p_1, ..., p_k)\left(
  \begin{array}{c}
          q_{1} \\
          \vdots \\
          q_{k}
 \end{array}
 \right) = p_1q_1 + ... + p_kq_k
$$

所以对$Q_i$求偏导为：

$$
\frac {\partial P_u^TQ_i}{\partial P_u} = \frac {\partial (p_1q_1 + ... + p_kq_k)}{\partial \left(
  \begin{array}{c}
          p_{1} \\
          \vdots \\
          p_{k}
 \end{array}
 \right)} = \left(
  \begin{array}{c}
          q_{1} \\
          \vdots \\
          q_{k}
 \end{array}
 \right) = Q_i
$$

$$
\sum_i Q_iQ_i^T = Q_1Q_1^T + ... + Q_NQ_N^T = (Q_1, ..., Q_N)\left(
  \begin{array}{c}
          Q_{1}^T \\
          \vdots \\
          Q_{N}^T
 \end{array}
 \right) = (Q_1, ..., Q_N)(Q_1^T, ..., Q_N^T)^T
$$

$$
P_u = \left(
  \begin{array}{c}
          p_{1} \\
          \vdots \\
          p_{k}
 \end{array}
 \right)
$$

所以

$$
\left(
  \begin{array}{c}
          p_{u1} \\
          \vdots \\
          p_{uk}
 \end{array}
 \right)
:=
\left(
  \begin{array}{c}
          p_{u1} \\
          \vdots \\
          p_{uk}
 \end{array}
 \right)
 -
2 \times e_{ij} \times \alpha \times
\left(
  \begin{array}{c}
          q_{1i} \\
          \vdots \\
          q_{ki}
 \end{array}
 \right)
 -
 2 \times \alpha \times \lambda
 \left(
  \begin{array}{c}
          p_{u1} \\
          \vdots \\
          p_{uk}
 \end{array}
 \right)
$$

### 2. 测试

In [2]:
R = np.array([[5,1,4,2,3],
              [1,0,3,3,4],
              [5,2,0,2,5],
              [1,2,4,3,5]])

nP,nQ = LFM_grad_desc(R)
R

array([[5, 1, 4, 2, 3],
       [1, 0, 3, 3, 4],
       [5, 2, 0, 2, 5],
       [1, 2, 4, 3, 5]])

In [4]:
nP

array([[1.81934903, 0.95791858],
       [0.64127237, 1.33964577],
       [1.40524923, 1.84553705],
       [0.09605722, 2.11453667]])

In [5]:
nQ

array([[1.99870812, 0.72355862],
       [0.08831666, 0.951575  ],
       [1.10031007, 1.86652159],
       [0.74830888, 1.04585253],
       [0.72814274, 2.23043893]])

In [6]:
nP.dot(nQ.T)

array([[4.32945793, 1.07221021, 3.78982379, 2.36327662, 3.4613247 ],
       [2.25102854, 1.33140845, 3.2060762 , 1.88094173, 3.45493589],
       [4.14403729, 1.88027384, 4.99094464, 2.98172008, 5.13957971],
       [1.72198157, 2.02062368, 4.05252108, 2.283374  , 4.78628828]])