In [2]:
import numpy as np
import tensorflow as tf
import os
import random
from collections import defaultdict
import pandas as pd

def load_data_train():
    user_movie = defaultdict(set)
    data=pd.read_csv('BRP_datas\\BRP_common_user_book\\common_user_book_19_4VS5.csv')
    num_user=len(pd.unique(data['user_id']))
    num_book=len(pd.unique(data['book_id']))
    print('训练集借阅记录数：{}'.format(data.shape[0]))
    for row,val in data.iterrows():
        u = int(val['user_id'])
        i = int(val['book_id'])
        user_movie[u].add(i)   #

    print("num_user:", num_user)
    print("num_book", num_book)

    return num_user, num_book, user_movie


def load_data_test():
    user_movie = defaultdict(set)
    data=pd.read_csv('BRP_datas\\BRP_common_user_book\\common_user_book_19_5VS4.csv')
    num_user=len(pd.unique(data['user_id']))
    num_book=len(pd.unique(data['book_id']))
    print('测试集借阅记录数：{}'.format(data.shape[0]))
    for row,val in data.iterrows():
        u = int(val['user_id'])
        i = int(val['book_id'])
        user_movie[u].add(i)

    print("num_user:", num_user)
    print("num_book", num_book)

    return num_user, num_book, user_movie

def generate_test(user_movie_pair_test):
    """
    对每一个用户u，在user_movie_pair_test中随机找到他借阅过的一本书,保存在user_ratings_test，
    后面构造训练集和测试集需要用到。
    """
    user_test = dict()
    for u,i_list in user_movie_pair_test.items():
        user_test[u] = random.sample(user_movie_pair_test[u],1)[0]
    return user_test


def generate_train_batch(user_movie_pair_train,item_count,batch_size=50):
    t = []
    for b in range(batch_size):
        u = random.sample(user_movie_pair_train.keys(),1)[0]
        i = random.sample(user_movie_pair_train[u],1)[0]
        j = random.randint(0,item_count)
        while j in user_movie_pair_train[u]:
            j = random.randint(0,item_count)

        t.append([u,i,j])

    return np.asarray(t)


def generate_test_batch(user_ratings_test,user_movie_pair_test,item_count):
    """
    对于每个用户u，它的评分图书i是我们在user_ratings_test中随机抽取的，它的j是用户u所有没有借阅过的图书集合，
    比如用户u有1000本书没有借阅，那么这里该用户的测试集样本就有1000个
    """
    for u in user_movie_pair_test.keys():
        t = []
        i = user_ratings_test[u]
        for j in range(0,item_count):
            if not(j in user_movie_pair_test[u]):
                t.append([u,i,j])
        yield np.asarray(t)


def bpr_mf(user_count,item_count,hidden_dim):
    u = tf.placeholder(tf.int32,[None])
    i = tf.placeholder(tf.int32,[None])
    j = tf.placeholder(tf.int32,[None])

    user_emb_w = tf.get_variable("user_emb_w", [user_count+1, hidden_dim],
                                 initializer=tf.random_normal_initializer(0, 0.1))
    item_emb_w = tf.get_variable("item_emb_w", [item_count+1, hidden_dim],
                                 initializer=tf.random_normal_initializer(0, 0.1))

    u_emb = tf.nn.embedding_lookup(user_emb_w, u)
    i_emb = tf.nn.embedding_lookup(item_emb_w, i)
    j_emb = tf.nn.embedding_lookup(item_emb_w, j)


    x = tf.reduce_sum(tf.multiply(u_emb,(i_emb-j_emb)),1,keep_dims=True)

    mf_auc = tf.reduce_mean(tf.to_float(x>0))

    l2_norm = tf.add_n([
        tf.reduce_sum(tf.multiply(u_emb, u_emb)),
        tf.reduce_sum(tf.multiply(i_emb, i_emb)),
        tf.reduce_sum(tf.multiply(j_emb, j_emb))
    ])

    regulation_rate = 0.0001
    bprloss = regulation_rate * l2_norm - tf.reduce_mean(tf.log(tf.sigmoid(x)))

    train_op = tf.train.GradientDescentOptimizer(0.01).minimize(bprloss)
    return u, i, j, mf_auc, bprloss, train_op


user_count,item_count,user_movie_pair_train = load_data_train()
test_user_count,test_item_count,user_movie_pair_test = load_data_test()
user_ratings_test = generate_test(user_movie_pair_test)

print('user_ratings_test的值为：{}'.format(user_ratings_test))
with tf.Session() as sess:
    u,i,j,mf_auc,bprloss,train_op = bpr_mf(user_count,item_count,20)
    sess.run(tf.global_variables_initializer())

    for epoch in range(1,6):
        print('epoch的值为{}'.format(epoch))
        _batch_bprloss = 0
        for k in range(1,5000):

            uij = generate_train_batch(user_movie_pair_train,item_count)
            _bprloss,_train_op = sess.run([bprloss,train_op],
                                          feed_dict={u:uij[:,0],i:uij[:,1],j:uij[:,2]})

            _batch_bprloss += _bprloss

        print("epoch:",epoch)
        print("bpr_loss:",_batch_bprloss / k)
        print("_train_op")

        user_count = 0
        _auc_sum = 0.0

        for t_uij in generate_test_batch(user_ratings_test,user_movie_pair_test,item_count):
            _auc, _test_bprloss = sess.run([mf_auc, bprloss],
                                              feed_dict={u: t_uij[:, 0], i: t_uij[:, 1], j: t_uij[:, 2]}
                                              )
            user_count += 1
            _auc_sum += _auc
        print("test_loss: ", _test_bprloss, "test_auc: ", _auc_sum / user_count)
        print("")
    variable_names = [v.name for v in tf.trainable_variables()]
    values = sess.run(variable_names)
    for k, v in zip(variable_names, values):
        print("Variable: ", k)
        print("Shape: ", v.shape)
        print(v)


session1 = tf.Session()
u1_all = tf.matmul(values[0], values[1],transpose_b=True)
result_1 = session1.run(u1_all)
print (result_1)


p = np.squeeze(result_1)
# np.argsort(p)，将元素从小到大排列，提取对应的索引。找到了索引就是找到了书
ind = np.argsort(p)[:,-5:]
print('top5对应的索引为{}'.format(ind))

num=0
all_num_user_item=0
for ii in range(len(user_movie_pair_test)):
    num_user_item=0
    for jj in user_movie_pair_test[ii]:
        num_user_item+=1
        if jj in (ind[ii]):
            num+=1
    all_num_user_item+=num_user_item
print('num的值为:{}'.format(num))
print('用户的数目为{}'.format(len(user_movie_pair_test)))
print('用户喜欢的物品的数目为：{}'.format(all_num_user_item))
print('召回率为{}'.format(num/all_num_user_item))
print('准确率为{}'.format(num/(len(user_movie_pair_test)*5)))

W0717 09:21:50.828836  9044 deprecation.py:506] From <ipython-input-2-5e7103bdff04>:95: calling reduce_sum_v1 (from tensorflow.python.ops.math_ops) with keep_dims is deprecated and will be removed in a future version.
Instructions for updating:
keep_dims is deprecated, use keepdims instead
W0717 09:21:50.836815  9044 deprecation.py:323] From <ipython-input-2-5e7103bdff04>:97: to_float (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use `tf.cast` instead.


训练集借阅记录数：88
num_user: 66
num_book 88
测试集借阅记录数：89
num_user: 66
num_book 88
user_ratings_test的值为：{37: 10, 60: 28, 58: 60, 49: 85, 36: 76, 25: 15, 47: 41, 16: 77, 23: 65, 27: 23, 0: 47, 31: 74, 48: 61, 30: 31, 43: 50, 28: 5, 7: 38, 34: 22, 21: 52, 29: 58, 26: 7, 50: 78, 10: 57, 40: 59, 39: 8, 18: 19, 32: 11, 59: 83, 65: 13, 1: 33, 6: 20, 44: 64, 63: 4, 19: 37, 55: 17, 17: 14, 54: 55, 42: 84, 56: 2, 57: 12, 11: 46, 61: 26, 53: 29, 64: 27, 2: 18, 41: 82, 35: 0, 12: 45, 46: 87, 15: 53, 51: 9, 52: 42, 20: 1, 9: 66, 22: 43, 13: 25, 62: 73, 3: 6, 45: 86, 8: 39, 4: 51, 24: 79, 38: 48, 5: 49, 14: 44, 33: 32}
epoch的值为1
epoch: 1
bpr_loss: 0.662916138508864
_train_op
test_loss:  0.6159963 test_auc:  0.9760988264372854

epoch的值为2
epoch: 2
bpr_loss: 0.595301103916233
_train_op
test_loss:  0.5231968 test_auc:  0.9968120726672086

epoch的值为3
epoch: 3
bpr_loss: 0.5135010426570044
_train_op
test_loss:  0.41771066 test_auc:  0.9976867554765759

epoch的值为4
epoch: 4
bpr_loss: 0.42177647992500567
_train_op
test