In [1]:
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from tqdm import tqdm, trange
from math import sqrt
import pandas as pd
import numpy as np
import heapq

In [2]:
n,m=943,1682
cols = ['user_id', 'item_id', 'rating', 'ts']
base = pd.read_csv('../../ml-100k/ua.base',sep='\t',names=cols)
test = pd.read_csv('../../ml-100k/ua.test',sep='\t',names=cols)

In [3]:
# 创建uesr-item矩阵
train_matrix = np.zeros((n, m))
test_matrix = np.zeros((n, m))
y_ui = np.zeros((n, m))
for i in base.itertuples():
    train_matrix[i[1] - 1, i[2] - 1] = i[3]

for i in test.itertuples():
    test_matrix[i[1] - 1, i[2] - 1] = i[3]

# 创建y矩阵
for i in range(n):
    for j in range(m):
        if train_matrix[i][j] != 0:
            y_ui[i][j] = 1

In [4]:
# RSVD
#Init
global_average_rating = train_matrix.sum()/y_ui.sum() 
avg_rating_user = np.array([0.0]*n)
avg_rating_item = np.array([0.0]*m)
for i in range(n):
    if y_ui[i].sum()==0:
        avg_rating_user[i] = global_average_rating
    else:
        avg_rating_user[i] = train_matrix[i].sum()/y_ui[i].sum()
for i in range(m):
    if y_ui[:,i].sum()==0:
        avg_rating_item[i] = global_average_rating
    else:
        avg_rating_item[i] = train_matrix[:,i].sum()/y_ui[:,i].sum()
bias_u = np.array([0.0]*n)  
bias_i = np.array([0.0]*m)
for i in range(n):
    if sum(y_ui[i]) == 0:
        bias_u[i] = 0
    else:
        t = 0
        for j in range(m):
            t += (train_matrix[i,j]-avg_rating_item[j])*y_ui[i,j]
        bias_u[i] = t/sum(y_ui[i])
for i in range(m):
    if sum(y_ui[:,i]) == 0:
        bias_i[i] = 0
    else:
        t = 0
        for j in range(n):
            t += (train_matrix[j,i]-avg_rating_user[j])*y_ui[j,i]
        bias_i[i] = t/sum(y_ui[:,i])

In [5]:
def rsvd(R, U, V, T, d, bias_u, bias_i, global_avg_rating, alpha_u=0.01, alpha_v=0.01,bet_u=0.01, bet_v=0.01, gama=0.01):
    minf = 9999999
    Uc = U.copy()
    Vc = V.copy()
    bias_uc = bias_u.copy()
    bias_ic = bias_i.copy()
    miu = global_avg_rating
    for step in tqdm(range(T)):
        fui = 0
        Ut = np.zeros(U.shape)
        Vt = np.zeros(V.shape)
        bias_ut = np.zeros(bias_u.shape)
        bias_it = np.zeros(bias_i.shape)
        gar = 0
        count = 0
        countU = np.zeros(bias_u.shape)
        countV = np.zeros(bias_i.shape)
        for u in range(n):
            for i in range(m):
                if R[u,i] > 0:
                    count += 1
                    countU[u] += 1
                    countV[i] += 1
                    rui = np.dot(U[u,:],V[i,:].T)+bias_u[u]+bias_i[i]+global_avg_rating
                    if rui > 5:
                        rui = 5
                    elif rui < 1:
                        rui = 1
                    eui = R[u,i] - rui
                    fui += (np.power(eui, 2)+alpha_u*np.dot(U[u,:], U[u,:].T)+ alpha_v*np.dot(V[i,:], V[i,:].T)+bet_u*np.power(bias_u[u],2)+bet_v*np.power(bias_i[i],2))/2
                    gar -= eui
                    bias_ut -= (eui-bet_u* bias_u[u])
                    bias_it -= (eui-bet_v* bias_i[i])
                    Ut[u,:] -= (eui*V[i,:]-alpha_u*U[u,:])
                    Vt[i,:] -= (eui*U[u,:]-alpha_v*V[i,:])
        global_avg_rating -= gama*gar/count
        for u in range(n):
            if countU[u] != 0:
                bias_u[u] -= gama*bias_ut[u]/countU[u]
                U[u,:] -= gama*Ut[u,:]/countU[u]
        for i in range(m):
            if countV[i] != 0:
                bias_i[i] -= gama*bias_it[i]/countV[i]
                V[i,:] -= gama*Vt[i,:]/countV[i]
        if minf > fui:
            minf = fui
            Uc = U.copy()
            Vc = V.copy()
            bias_uc = bias_u.copy()
            bias_ic = bias_i.copy()
            miu = global_avg_rating
        gama = gama*0.9
    return Uc,Vc,bias_uc,bias_ic,miu

In [6]:
# 评估函数
def eva(prediction, truth):
    prediction = prediction[truth.nonzero()].flatten()
    truth = truth[truth.nonzero()].flatten()
    return sqrt(mean_squared_error(prediction, truth)),mean_absolute_error(prediction, truth)

In [7]:
if __name__ == '__main__':
    d = 20
    T = 100
    U = (np.random.rand(n, d)-0.5)*0.01
    V = (np.random.rand(m, d)-0.5)*0.01
    R = train_matrix.copy()
    Uf,Vf,bias_uf,bias_if,miuf = rsvd(R, U, V, T, d, bias_u, bias_i, global_average_rating)
    RSVD = ((np.dot(Uf,Vf.T)+bias_if+miuf).T+bias_uf).T

100%|██████████| 100/100 [07:14<00:00,  4.34s/it]


In [8]:
print("RSVD RMSE、MAE:"+str(eva(RSVD, test_matrix)))

RSVD RMSE、MAE:(0.9651131280964326, 0.7610691528277747)
