# LFM梯度下降

### 0.引入依赖


In [2]:
import numpy as np
import pandas as pd

### 1.数据预处理

In [5]:
#评分矩阵R
R = np.array([[4, 0, 2, 0, 1], 
             [ 0, 2, 3, 0, 0],
             [ 1, 0, 2, 4, 0],
             [ 5, 0, 0, 3, 1],
             [ 0, 0, 1, 5, 1],
             [ 0, 3, 2, 4, 1],])

### 2.算法实现

In [16]:
"""
输入参数：
R：M*N 的评分矩阵
K：隐特征向量维度
max_iter：最大迭代次数
alpha：步长
lamda：正则化系数

输出：
分解后的P、Q
P：初始化用户特征矩阵M*K
Q：初始化物品特征矩阵N*K
"""

#给定超参数

K = 2
max_iter = 5000
alpha = 0.0005
lamda = 0.004

#核心算法
def LFM_grad_desc( R, K, max_iter = 1000, alpha = 0.0001, lamda = 0.004 ):
    #基本维度参数定义
    M = len(R)
    N = len(R[0])
    
    #P，Q初始值,随机生成
    P = np.random.rand(M, K)
    Q = np.random.rand(N, K)
    Q = Q.T
    
    #开始迭代
    for step in range(max_iter):
        #遍历用户u，物品i，对应的特征向量Pu、Qi梯度下降
        for u in range(M):
            for i in range(N):
                #有评分才计算误差
                if R[u, i] > 0:
                    ui = np.dot( P[u , : ], Q[ : , i] ) - R[u, i]
                    
                    #梯度下降，更新Pu，Qi
                    for k in range(K):
                        P[u, k] = P[u, k] - alpha * ( 2 * ui * Q[k, i] + 2 * lamda * P[u, k] )
                        Q[k, i] = Q[k, i] - alpha * ( 2 * ui * P[u, k] + 2 * lamda * Q[k, i] )
        
        #u、i遍历完成，所有特征向量更新完成，可以得到P、Q，可以计算预测评分矩阵
        pred_R = np.dot( P , Q )
        
        #计算当前损失函数
        cost = 0
        for  u in range(M):
            for i in range(N):
                if R[u, i] > 0:
                    cost += (pred_R[u, i] - R[u, i]) ** 2
                    #加上正则化项
                    for k in range(K):
                        cost += lamda * ( P[u, k] ** 2 + Q[k, i] ** 2 )
                        
        if cost < 0.0001:
            break
        
    return P, Q.T, cost

### 3.测试

In [18]:
P, Q, cost = LFM_grad_desc(R, K, max_iter, alpha, lamda)

print(P)
print(Q)
print(cost)
print(R)

pred_R = np.dot( P, Q.T )
pred_R

[[ 1.18032406  1.80108974]
 [ 1.87169436  0.04069066]
 [ 1.34454949  0.64866209]
 [-0.32530181  1.99587288]
 [ 0.59855195  2.0413426 ]
 [ 1.15788657  0.98785643]]
[[-0.38124337  2.44540091]
 [ 1.04208287  1.75710764]
 [ 1.58201154  0.04878276]
 [ 2.00337612  1.83629104]
 [ 0.23985352  0.47729201]]
0.7340236057597367
[[4 0 2 0 1]
 [0 2 3 0 0]
 [1 0 2 4 0]
 [5 0 0 3 1]
 [0 0 1 5 1]
 [0 3 2 4 1]]


array([[ 3.95439577,  4.39470405,  1.95514841,  5.671958  ,  1.14275061],
       [-0.61406608,  2.02195851,  2.96302708,  3.82442768,  0.4683538 ],
       [ 1.0736383 ,  2.54090112,  2.15873634,  3.88477074,  0.63209616],
       [ 5.00472851,  3.16797206, -0.41726703,  3.01330162,  0.87458939],
       [ 4.7637071 ,  4.21059943,  1.0464984 ,  4.94762381,  1.1178813 ],
       [ 1.97426843,  2.94238384,  1.87998028,  4.13367421,  0.74921914]])