In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf

  from ._conv import register_converters as _register_converters


# 1. 数据预处理

In [2]:
ratings_df = pd.read_csv('ratingsProcessed.csv')
train_rating_df = pd.read_csv('train_rating.csv')
test_rating_df = pd.read_csv('test_rating.csv')
userNo,movieNo = ratings_df['userId'].max()+1,ratings_df['movieId'].max()+1
testNo = np.shape(test_rating_df)[0]

In [3]:
userNo,movieNo,testNo

(611, 9742, 20168)

In [4]:
#ratings_df

In [5]:
train_global_rating_mean = np.mean(train_rating_df["rating"])

In [6]:
train_global_rating_mean

3.502076411960133

 # 创建训练集的用户评分矩阵train_rating和测试集用户评分矩阵test_rating

In [7]:
train_rating = np.zeros((userNo,movieNo))
for index ,row in train_rating_df.iterrows():
    train_rating[int(row['userId']),int(row['movieId'])] = row['rating']
test_rating = np.zeros((userNo,movieNo))
for index ,row in test_rating_df.iterrows():
    test_rating[int(row['userId']),int(row['movieId'])] = row['rating']

# 创建训练集的用户记录矩阵train_record和测试集用户记录矩阵test_record

In [8]:
train_record = train_rating>0
train_record = np.array(train_record,dtype=int)
test_record = test_rating>0
test_record = np.array(test_record,dtype=int)

# 2. 构建模型





In [9]:
f = 100
lamda = 0.01

   # 2.1 隐语义模型

In [10]:
def createLFM():
    LFM_P = tf.Variable(tf.random_normal([userNo,f],stddev=1/(f**0.5),dtype=tf.float32))
    LFM_Q = tf.Variable(tf.random_normal([movieNo,f],stddev=1/(f**0.5),dtype=tf.float32))
    LFM_predict_rating = tf.matmul(LFM_P,LFM_Q,transpose_b=True)
    LFM_loss = tf.reduce_sum(((train_rating-LFM_predict_rating)*train_record)**2) + lamda*((tf.reduce_sum(LFM_P)**2)+(tf.reduce_sum(LFM_Q)**2))
    return LFM_loss,LFM_predict_rating

# 2.2 BiasSVD模型

In [11]:
def createBSVD():
    BIAS_u = train_global_rating_mean
    BIAS_bu = tf.Variable(tf.zeros((userNo,1),dtype=tf.float32))
    BIAS_bi = tf.Variable(tf.zeros((1,movieNo),dtype=tf.float32))
    BIAS_P = tf.Variable(tf.random_normal([userNo,f],stddev=1/(f**0.5),dtype=tf.float32))
    BIAS_Q = tf.Variable(tf.random_normal([movieNo,f],stddev=1/(f**0.5),dtype=tf.float32))
    BIAS_predict_rating = BIAS_u + BIAS_bu + BIAS_bi + tf.matmul(BIAS_P,BIAS_Q,transpose_b=True)
    BIAS_loss = tf.reduce_sum(((train_rating-BIAS_predict_rating)*train_record)**2) + lamda*((tf.reduce_sum(BIAS_P)**2) + (tf.reduce_sum(BIAS_Q)**2) + (tf.reduce_sum(BIAS_bu)**2) + (tf.reduce_sum(BIAS_bi)**2))
    return BIAS_loss,BIAS_predict_rating

# 2.3 SVD++模型

In [12]:
def createSVD():
    SVD_u = train_global_rating_mean
    SVD_bu = tf.Variable(tf.zeros((userNo,1),dtype=tf.float32))
    SVD_bi = tf.Variable(tf.zeros((1,movieNo),dtype=tf.float32))
    SVD_P = tf.Variable(tf.random_normal([userNo,f],stddev=1/(f**0.5),dtype=tf.float32))
    SVD_Q = tf.Variable(tf.random_normal([movieNo,f],stddev=1/(f**0.5),dtype=tf.float32))
    SVD_y = tf.Variable(tf.random_normal([movieNo,f],stddev=1/(f**0.5),dtype=tf.float32))
    SVD_Nu = np.sum(train_record,axis=1)
    N = np.zeros((userNo,movieNo),dtype=np.float32)
    for i in range(0,userNo):
        if SVD_Nu[i] == 0:
            SVD_Nu[i] = 1
    for i in range(0,movieNo):
        N[:,i] = SVD_Nu
    SVD_Nu = N**0.5
    SVD_Qy=tf.matmul(SVD_Q,SVD_y,transpose_b=True)
    SVD_tr = tf.constant(train_record,dtype=tf.float32)
    SVD_predict_rating = SVD_u + SVD_bu + SVD_bi + tf.matmul(SVD_P,SVD_Q,transpose_b=True) + tf.matmul(SVD_tr,SVD_Qy)/SVD_Nu
    SVD_loss = tf.reduce_sum(((train_rating-SVD_predict_rating)*train_record)**2) + lamda*((tf.reduce_sum(SVD_P)**2) + (tf.reduce_sum(SVD_Q)**2) + (tf.reduce_sum(SVD_bu)**2) + (tf.reduce_sum(SVD_bi)**2) + (tf.reduce_sum(SVD_y)**2))
    return SVD_loss,SVD_predict_rating

# 2.4 ASVD模型

In [13]:
def createASVD():
    ASVD_u = train_global_rating_mean
    ASVD_bu = tf.Variable(tf.zeros((userNo,1),dtype=tf.float32))
    ASVD_bi = tf.Variable(tf.zeros((1,movieNo),dtype=tf.float32))
    ASVD_Q = tf.Variable(tf.random_normal([movieNo,f],stddev=1/(f**0.5),dtype=tf.float32))
    ASVD_x = tf.Variable(tf.random_normal([movieNo,f],stddev=1/(f**0.5),dtype=tf.float32))
    ASVD_y = tf.Variable(tf.random_normal([movieNo,f],stddev=1/(f**0.5),dtype=tf.float32))
    ASVD_train_record = tf.constant(train_record,dtype=tf.float32)
    ASVD_test_record = tf.constant(test_record,dtype=tf.float32)
    N = np.zeros((userNo,movieNo),dtype=np.float32)
    R = np.zeros((userNo,movieNo),dtype=np.float32)
    ASVD_Nu = np.sum(train_record,axis=1)
    for i in range(0,userNo):
        if ASVD_Nu[i] == 0:
            ASVD_Nu[i] = 1
    for i in range(0,movieNo):
        N[:,i] = ASVD_Nu
    ASVD_Nu = N**0.5
    ASVD_Ru = np.sum(test_record,axis=1)
    for i in range(0,userNo):
        if ASVD_Ru[i] == 0:
            ASVD_Ru[i] = 1
    for i in range(0,movieNo):
        R[:,i] = ASVD_Ru
    ASVD_Ru = R**0.5
    ASVD_QX = tf.matmul(ASVD_Q,ASVD_x,transpose_b=True)
    ASVD_Qxt = tf.matmul(ASVD_test_record,ASVD_QX)
    ASVD_Qxt =(test_rating - ASVD_u - ASVD_bi - ASVD_bu)*ASVD_Qxt
    ASVD_QY = tf.matmul(ASVD_Q,ASVD_y,transpose_b=True)
    ASVD_Qyt = tf.matmul(ASVD_train_record,ASVD_QY)
    ASVD_predict_rating = ASVD_u + ASVD_bu + ASVD_bi + ASVD_Qxt/ASVD_Ru + ASVD_Qyt/ASVD_Nu
    ASVD_loss = tf.reduce_sum(((train_rating-ASVD_predict_rating)*train_record)**2) + lamda*((tf.reduce_sum(ASVD_x)**2) + (tf.reduce_sum(ASVD_y)**2) + (tf.reduce_sum(ASVD_bu)**2) + (tf.reduce_sum(ASVD_bi)**2) + (tf.reduce_sum(ASVD_Q)**2))
    return ASVD_loss,ASVD_predict_rating



# 3. 模型训练

In [14]:
#loss,predict = createLFM()
#loss,predict = createBSVD()
#loss,predict = createSVD()
loss,predict = createASVD()

In [15]:
learning_rate = 1e-4

In [16]:
#optimizer = tf.train.AdamOptimizer(learning_rate)
#optimizer = tf.train.AdamOptimizer()
#optimizer = tf.train.GradientDescentOptimizer(1e-5)
optimizer = tf.train.GradientDescentOptimizer(learning_rate)

In [17]:
#train = optimizer.minimize(loss,global_step= step)
#train = optimizer.minimize(BIAS_loss)
train = optimizer.minimize(loss)

In [18]:
#RMSE = (tf.reduce_sum(((test_rating-predict)*test_record)**2)/testNo)**0.5
MAE = tf.reduce_sum((tf.abs(test_rating-predict)*test_record))/testNo

In [19]:
#tf.summary.scalar("RMSE",RMSE)
tf.summary.scalar("MAE",MAE)

<tf.Tensor 'MAE:0' shape=() dtype=string>

In [20]:
summaryMerged = tf.summary.merge_all()

In [21]:
filename='tensorboard'

In [22]:
writer = tf.summary.FileWriter(filename)

In [23]:
sess = tf.Session()

In [24]:
init = tf.global_variables_initializer()

In [25]:
sess.run(init)

In [26]:
for i in range(500): 
    if i % 100 == 0:
        print(i)
    _, movie_summary = sess.run([train,summaryMerged])
    writer.add_summary(movie_summary,i)



0
100
200
300
400
