In [None]:
import numpy as np
import matplotlib.pyplot as plt

In [None]:
# 计算Loss值
def loss(R, P, Q, K, beta):
    e = 0
    for i in range(len(R)):
        for j in range(len(R[i])):
            if R[i][j] > 0:
                e = e + pow(R[i][j] - numpy.dot(P[i,:],Q[:,j]), 2)
                for k in range(K):
                    e = e + (beta/2) * (pow(P[i][k],2) + pow(Q[k][j],2))
    if e < 0.001:
        return e
    return e

# matrix_factorization函数，返回P，Q矩阵
def matrix_factorization(R, P, Q, K, steps=5000, alpha=0.0002, beta=0.02):
    Q = Q.T
    Loss = []
    for step in range(steps):
        for i in range(len(R)):
            for j in range(len(R[i])):
                if R[i][j] > 0:
                    eij = R[i][j] - numpy.dot(P[i,:],Q[:,j])
                    for k in range(K):
                        P[i][k] = P[i][k] + alpha * (2 * eij * Q[k][j] - beta * P[i][k])
                        Q[k][j] = Q[k][j] + alpha * (2 * eij * P[i][k] - beta * Q[k][j])
        eR = numpy.dot(P,Q)
        Loss.append(loss(R, P, Q, K, beta))
    return P, Q.T, Loss

In [None]:
# 数据读入内存
R_train = np.loadtxt('ml-100k/u1.base') # 80000 x 4
R_test = np.loadtxt('ml-100k/u1.test')  # 20000 x 4

In [None]:
# 数据预处理
user_number = 943
item_number = 1682
K = 3
## 舍弃timestamp特征
R_train = R_train[:,:3] # 80000 x 3
R_test = R_test[:,:3]   # 20000 x 3
## 填充原始评分矩阵
R = np.zeros([user_number, item_number])

for user in range(R_train.shape[0]):
    R[int(R_train[user][0]-1)][int(R_train[user][1])-1] = R_train[user][2]

P = np.random.random((user_number, K))
Q = np.random.random((item_number, K))

In [None]:
P, Q, Loss = matrix_factorization(R, P, Q, K，10)

In [None]:
# 预测并计算准确率
R_pre = numpy.dot(P, Q.T)
acc = 0
threshold = 0.5
for i in range(R_test.shape[0]):
    if R_test[i, 2] - R_pre[int(R_test[i, 0]-1), int(R_test[i, 1]-1)] < threshold:
        acc += 1
acc = acc / R_test.shape[0]

In [None]:
# plot the loss history
plt.rcParams['figure.figsize'] = (10.0, 8.0) #set default size
plt.plot(Loss, label = "MF")
plt.xlabel('iteration')
plt.ylabel('loss')
plt.title('Loss history')
plt.legend(loc = 'best')
plt.grid()
plt.show()
print("准确率为", acc)