In [1]:
import os
import sys
import pandas as pd
import numpy as np
import random
import time
import tensorflow as tf
import math
from IPython.display import clear_output

In [2]:
# Helper function
def newPath(path):
    if not os.path.isdir(path):
        os.mkdir(path)
        
def writeProgress(msg, count, total):
    sys.stdout.write(msg + "{:.2%}\r".format(count/total))
    sys.stdout.flush()

In [3]:
def relu(x):
    return np.maximum(0,x)  

def softmax(x):
    exp_x = np.exp(x)
    softmax_x = exp_x / np.sum(exp_x)
    return softmax_x 

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

### Load numpy array

In [4]:
all_npy = np.load('./npy/all_4876.npy')
movie_genre = np.load('./npy/movie_genre.npy')
usr_following = np.load('./npy/user_followings.npy')
usr_genre = np.load('./npy/user_genre.npy')

print('All features:', all_npy.shape)
print('Movie genre:', movie_genre.shape)
print('User following:', usr_following.shape)
print('User genre:', usr_genre.shape)

All features: (165, 4876)
Movie genre: (165, 20)
User following: (1582, 165)
User genre: (1582, 20)


### Normalize usr_genre

In [5]:
usr_genre_norm = np.zeros(usr_genre.shape)
for i in range(len(usr_genre)):
    usr_genre_norm[i] = usr_genre[i]/np.max(usr_genre[i])
print(usr_genre_norm.shape)

(1582, 20)


In [6]:
print('Before:', usr_genre)
print('After:', usr_genre_norm)

Before: [[2 1 0 ... 1 0 0]
 [4 8 4 ... 0 0 0]
 [2 2 2 ... 1 0 0]
 ...
 [5 3 0 ... 1 1 0]
 [2 2 0 ... 0 1 0]
 [3 2 0 ... 1 1 0]]
After: [[0.22222222 0.11111111 0.         ... 0.11111111 0.         0.        ]
 [0.44444444 0.88888889 0.44444444 ... 0.         0.         0.        ]
 [0.4        0.4        0.4        ... 0.2        0.         0.        ]
 ...
 [0.26315789 0.15789474 0.         ... 0.05263158 0.05263158 0.        ]
 [0.28571429 0.28571429 0.         ... 0.         0.14285714 0.        ]
 [0.33333333 0.22222222 0.         ... 0.11111111 0.11111111 0.        ]]


# Training & testing split

## Prepare

In [7]:
#The number of followers for each movie
moive_followers = np.sum(usr_following, axis=0)
# print(moive_followers)

print('Min number of followers:', np.min(moive_followers))
print('Max number of followers:', np.max(moive_followers))

asc = np.sort(moive_followers)
# print(each_user)
# print(asc)
desc = np.flip(asc)
print(desc)

over5 = 0
for num in moive_followers:
    if num >= 5:
        over5 += 1
print('The num of followers over 5:', over5)

Min number of followers: 1
Max number of followers: 520
[520 487 442 431 394 393 384 384 382 368 357 348 342 341 340 331 327 320
 317 307 300 291 288 287 278 277 275 273 260 256 254 239 232 230 230 218
 217 217 210 208 207 200 193 192 183 182 178 173 172 170 169 168 167 164
 162 159 152 150 149 148 146 146 145 143 142 138 136 134 134 132 126 126
 125 125 124 123 123 122 122 119 118 117 116 114 110 110 109 107 105  97
  97  96  93  90  89  89  83  82  82  81  80  78  78  78  77  76  75  73
  73  72  72  71  70  68  66  64  64  63  61  61  61  59  58  58  52  49
  49  48  46  44  43  41  41  40  40  39  37  37  37  35  33  32  31  31
  29  28  26  25  24  23  23  22  20  20  19  19  18  18  14  12  11  11
   9   4   1]
The num of followers over 5: 163


In [8]:
#The number of following movie for each user
each_user = np.sum(usr_following, axis=1)
# print(each_user)

print('Min number of followings:', np.min(each_user))
print('Max number of followings:', np.max(each_user))

asc = np.sort(each_user)
# print(each_user)
# print(asc)
desc = np.flip(asc)
np.set_printoptions(threshold=sys.maxsize)
print(desc)

Min number of followings: 10
Max number of followings: 133
[133  88  86  61  59  55  52  52  48  47  45  45  44  44  44  43  43  43
  43  42  42  42  42  42  42  41  41  41  41  41  41  40  40  39  39  39
  38  37  36  36  35  35  35  34  34  34  34  34  33  33  32  32  32  32
  32  31  31  31  31  31  31  31  31  31  31  30  30  30  30  30  30  30
  30  29  29  29  29  29  29  28  27  27  27  27  27  27  27  26  26  26
  26  26  26  26  26  26  26  26  26  26  26  26  26  26  26  26  25  25
  25  25  25  25  25  25  25  25  25  25  25  25  25  25  24  24  24  24
  24  24  24  24  24  24  24  24  24  24  24  23  23  23  23  23  23  23
  23  23  23  23  23  23  23  23  23  23  23  23  22  22  22  22  22  22
  22  22  22  22  22  22  22  22  22  22  22  22  22  22  22  22  21  21
  21  21  21  21  21  21  21  21  21  21  21  21  21  21  21  21  21  21
  21  20  20  20  20  20  20  20  20  20  20  20  20  20  20  20  20  20
  20  20  20  20  20  20  20  20  20  20  20  20  20  19  19  19 

In [9]:
print('Over 10:', np.sum(each_user >= 10))
print('Over 12:', np.sum(each_user >= 12))
print('Over 14:', np.sum(each_user >= 14))
print('Over 16:', np.sum(each_user >= 16))
print('Over 18:', np.sum(each_user >= 18))
print('Over 20:', np.sum(each_user >= 20))

Over 10: 1582
Over 12: 937
Over 14: 613
Over 16: 440
Over 18: 315
Over 20: 229


In [10]:
over12_idx = np.nonzero(each_user >= 12)[0]
over12_idx.shape

(937,)

## Setup 

In [11]:
usr_nb = len(usr_following) # the number of users
movie_nb = len(movie_genre)  # the number of movies

print(usr_nb, movie_nb)

1582 165


In [12]:
usr_test_amount = 150
movie_test_amount = 32

print(usr_test_amount, movie_test_amount)

150 32


In [15]:
usr_idx = [i for i in range(len(usr_following))]
print(len(usr_idx))

random.seed(42)
test_idx = sorted(random.sample(usr_idx, usr_test_amount))
print(len(test_idx), test_idx[:10])

1582
150 [13, 51, 54, 61, 65, 88, 93, 96, 114, 130]


In [18]:
#Training
train_t = []
train_f = []
# Testing
test_t = []
test_f = []
test_pos = -1

for i in range(usr_nb):
    
    t_for_train = []
    f_for_train = []
    
    if i not in test_idx: #if not in test id, just append it to true or false list
        for j in range(movie_nb):
            if usr_following[i][j] == 1:
                t_for_train.append(j)
            else:
                f_for_train.append(j)
                
        train_t.append(t_for_train)
        train_f.append(f_for_train)
        
    else: #if in test id, choose 2 true and other 
        test_pos += 1
        temp_t = []
        temp_f = []
        
        for j in range(movie_nb):
            
            if usr_following[i][j] == 1:
                temp_t.append(j)
            else:
                temp_f.append(j)
        
        # random choose half true and half false for test 
        t_for_test = random.sample(temp_t, math.ceil(0.5*len(temp_t)))
        f_for_test  = random.sample(temp_f, movie_test_amount-len(t_for_test))
        
        test_t.append(t_for_test)
        test_f.append(f_for_test)
        
        #other for training
        t_for_train = [item for item in temp_t if not item in t_for_test]
        f_for_train = [item for item in temp_f if not item in f_for_test]
        
        train_t.append(t_for_train)
        train_f.append(f_for_train)

In [19]:
# train_t[i] 代表的是user i positive feedback
print('The length of train_t:',len(train_t))
print('The length of train_f:',len(train_f))
print('The length of test_t:',len(test_t))
print('The length of test_f:',len(test_f))

The length of train_t: 1582
The length of train_f: 1582
The length of test_t: 150
The length of test_f: 150


## Stat

In [20]:
#average num of following for training user
total_train = 0
for t in train_t:
    total_train += len(t)
    
avg = total_train / usr_nb
print('Training:', avg)

#average num of following for testing user
total_test = 0
for t in test_t:
    total_test += len(t)
avg = total_test / usr_test_amount
print('Testing:', avg)

Training: 14.139064475347661
Testing: 7.1866666666666665


In [24]:
all_auxilary = [i for i in range(movie_nb)]

# Recommendation model

In [22]:
SAVE_NAME = 'MRM_ALL_Embedding200_L2'
LATENT_FOLDER = './latent_factor/MRM_ALL/Embedding200_L2/'
newPath(LATENT_FOLDER)

In [23]:
latent_dim = 128 # latent dims
ft_dim = all_npy.shape[1] # feature dims
embedding_dims = 200

print(latent_dim, ft_dim, embedding_dims)

128 4876 200


In [27]:
tf.reset_default_graph()

user = tf.placeholder(tf.int32,shape=(1,))
i = tf.placeholder(tf.int32, shape=(1,))
j = tf.placeholder(tf.int32, shape=(1,))

#多少個auxliary 
xf = tf.placeholder(tf.float32, shape=(None,ft_dim))
l_id = tf.placeholder(tf.int32, shape=(None,))
l_id_len = tf.placeholder(tf.int32,shape=(1,))
r = tf.placeholder(tf.float32,shape=(None,))

image_i = tf.placeholder(tf.float32, [1, ft_dim])
image_j = tf.placeholder(tf.float32, [1, ft_dim])

with tf.variable_scope("item_level"):
    user_latent = tf.get_variable("user_latent", [usr_nb, latent_dim],
                                  initializer=tf.random_normal_initializer(0,0.1,seed=3))
    item_latent = tf.get_variable("item_latent", [movie_nb, latent_dim],
                                  initializer=tf.random_normal_initializer(0,0.1,seed=3)) 
    aux_item = tf.get_variable("aux_item", [movie_nb, latent_dim],
                               initializer=tf.random_normal_initializer(0,0.1,seed=3))
    
    W1 = tf.get_variable("W1", [usr_nb, latent_dim],
                         initializer=tf.contrib.layers.xavier_initializer())
    Wu = tf.get_variable("Wu", [latent_dim,latent_dim], 
                         initializer=tf.contrib.layers.xavier_initializer())
    Wy = tf.get_variable("Wy", [movie_nb, latent_dim, latent_dim],
                         initializer=tf.contrib.layers.xavier_initializer())
    Wa = tf.get_variable("Wa", [latent_dim, latent_dim],
                         initializer=tf.contrib.layers.xavier_initializer())
    Wv = tf.get_variable("Wv", [latent_dim, ft_dim],
                         initializer=tf.contrib.layers.xavier_initializer())
    
    aux_new = tf.get_variable("aux_new", [1, latent_dim], initializer=tf.constant_initializer(0.0))
    ########## Error part, how to get auxisize dynamically
    ####aux_size= tf.get_variable(name='aux_size', initializer=l_id.get_shape().as_list()[-1])
    
with tf.variable_scope('feature_level'):
    embedding = tf.get_variable("embedding", [embedding_dims,ft_dim],
                                initializer=tf.contrib.layers.xavier_initializer())
    Beta = tf.get_variable("beta", [usr_nb, embedding_dims],
                           initializer=tf.random_normal_initializer(0.00001,0.000001,seed=10))
    
#lookup the latent factors by user and id
u = tf.nn.embedding_lookup(user_latent, user)
vi = tf.nn.embedding_lookup(item_latent, i)
vj = tf.nn.embedding_lookup(item_latent, j)

w1 = tf.nn.embedding_lookup(W1, user) #(1*k)
wu = Wu
#wu = tf.squeeze(tf.nn.embedding_lookup(Wu, user)) #(k*k)
wy = tf.squeeze(tf.nn.embedding_lookup(Wy, i)) #(k*k)
wa = Wa
#wa = tf.squeeze(tf.nn.embedding_lookup(Wa, user)) #(k*k)
wv = Wv
#wv = tf.squeeze(tf.nn.embedding_lookup(Wv, user)) #(k,l)

beta = tf.nn.embedding_lookup(Beta, user) #user feature latent factor

In [28]:
a_list = tf.Variable([])
q = tf.constant(0)

def att_cond(q,a_list):
    return tf.less(q,l_id_len[0])

def att_body(q,a_list):
    xfi = tf.expand_dims(xf[q],0) #(1,ft_dim)
    
    a_list = tf.concat([a_list,[(tf.matmul( w1, tf.nn.relu( tf.matmul(wu, u, transpose_b=True) +
        tf.matmul(wy, tf.expand_dims(tf.nn.embedding_lookup(item_latent,l_id[q]),0), transpose_b=True) +
        tf.matmul(wa, tf.expand_dims(tf.nn.embedding_lookup(aux_item, l_id[q]),0), transpose_b=True) +
        tf.matmul(wv, xfi, transpose_b=True)))[0][0])*r[q]]],0)
    q += 1
    return q,  a_list

_, a_list = tf.while_loop(att_cond,att_body,[q,a_list],shape_invariants=[q.get_shape(),tf.TensorShape([None])])

a_list_soft = tf.nn.softmax(a_list)


aux_np = tf.expand_dims(tf.zeros(latent_dim),0)
q = tf.constant(0)

def sum_att_cond(q,aux_np):
    return tf.less(q,l_id_len[0])

def sum_att_body(q,aux_np):
    #aux_np+=a_list_soft[q]*tf.expand_dims(tf.nn.embedding_lookup(aux_item, l_id[q]),0)
    aux_np = tf.math.add_n([aux_np,a_list_soft[q]*tf.expand_dims(tf.nn.embedding_lookup(aux_item, l_id[q]),0)]) 
    q += 1
    return q, aux_np

_, aux_np = tf.while_loop(sum_att_cond, sum_att_body, [q,aux_np])

aux_part = tf.matmul(aux_np, vi, transpose_b=True)
#tf.print('aux attention:',aux_np)
aux_np += u #user_latent factor + sum (alpha*auxilary)
aux_new = tf.assign(aux_new,aux_np) #把aux_new 的 值變成aux_np


latent_i_part = tf.matmul(aux_new, vi, transpose_b=True)
feature_i_part = tf.matmul(beta,(tf.matmul(embedding,image_i, transpose_b=True)))
latent_j_part = tf.matmul(aux_new, vj, transpose_b=True)
feature_j_part = tf.matmul(beta,(tf.matmul(embedding,image_j, transpose_b=True)))
only_aux_i_part = tf.matmul(aux_np, vi, transpose_b=True)
only_aux_j_part = tf.matmul(aux_np, vj, transpose_b=True)

#矩陣中對應函數各自相乘
# ex: tf.matmul(thetav,(tf.matmul(embedding, image_i, transpose_b=True)))
xui = tf.matmul(aux_new, vi, transpose_b=True)+ tf.matmul(beta,(tf.matmul(embedding,image_i, transpose_b=True)))
xuj = tf.matmul(aux_new, vj, transpose_b=True)+ tf.matmul(beta,(tf.matmul(embedding,image_j, transpose_b=True)))

xuij = tf.subtract(xui,xuj)

norm_par = [tf.reduce_sum(tf.multiply(u, u)),tf.reduce_sum(tf.multiply(vi, vi)),tf.reduce_sum(tf.multiply(vj, vj)),
           tf.reduce_sum(tf.multiply(w1, w1)),tf.reduce_sum(tf.multiply(wu, wu)),tf.reduce_sum(tf.multiply(wy, wy)),
           tf.reduce_sum(tf.multiply(wa, wa)),tf.reduce_sum(tf.multiply(wv,wv)),tf.reduce_sum(tf.multiply(beta,beta))]
l2_norm = tf.add_n([
            0.0001 * tf.reduce_sum(tf.multiply(u, u)),
            0.0001 * tf.reduce_sum(tf.multiply(vi, vi)),
            0.0001 * tf.reduce_sum(tf.multiply(vj, vj)),
  
            0.0001 * tf.reduce_sum(tf.multiply(w1, w1)),
            0.01 * tf.reduce_sum(tf.multiply(wu, wu)),
            0.01 * tf.reduce_sum(tf.multiply(wy, wy)),
            0.01 * tf.reduce_sum(tf.multiply(wa, wa)),
            0.01 * tf.reduce_sum(tf.multiply(wv,wv)),
            
            0.001 * tf.reduce_sum(tf.multiply(beta,beta)),
            0.01 * tf.reduce_sum(tf.multiply(embedding,embedding))
            
          ])

loss = l2_norm - tf.log(tf.sigmoid(xuij)) # objective funtion
train_op = tf.train.AdamOptimizer(learning_rate=0.0001).minimize(loss) #parameter optimize 
auc = tf.reduce_mean(tf.to_float(xuij > 0))

Instructions for updating:
Use tf.cast instead.
Instructions for updating:
Use tf.cast instead.


In [34]:
print('Start time:', time.ctime())

init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)
loss_acc_list = []
t0 = time.time()

#use_true=init_list_of_objects(136)
#use_test=init_list_of_objects(136)

#train_pair_t=[] #positive feedback
#train_pair_f=[] #negative feedback
train_yes_id=[]

for q in range(5):
    print('Iteration:',q)
    train_auc = 0
    total_loss = 0
    xuij_auc = 0
    length = 0
    
    for z in range(usr_nb):
        writeProgress('Progress:', z, usr_nb)
        """
        yes 用來存放選擇到的YouTuber feature (for auxilary)
        yesr 用來存放user對該YouTuber的喜好程度(user_category 跟 YouTuber_category的相似性)
        r_3 用來存放user 對該YouTuber種類的偏好(取max)
        """
        yes = []
        yesr = []
        
#         #隨機選3個sample true's YouTuber
#         sample = random.sample(train_t[z],len(train_t[z]))
        #選全部的Positive
        sample = random.sample(train_t[z],len(train_t[z]))
        
        train_yes_id.append(sample) #sample全部丟進去
        
        #sample=random.sample(train_t[z]+train_f[z],len(train_t[z])+len(train_f[z]))
        
        #change
        r_3 = np.zeros(len(sample)) 
        alpha_history = []
        a_list_history = []
        U_history = []
        Y_history = []
        
        #print(len(sample))
        #check if all YouTuber are in train_t or train_f
        #if len(train_t[z])+len(train_f[z]) != 88:
            #print(z,len(train_t[z])+len(train_f[z]))
         
        for b in range(len(sample)):
            yes.append(all_npy[sample[b]])
            yesr.append(movie_genre[sample[b]] * usr_genre_norm[z])
            #print('YouTuber_category ', YouTuber_category[sample[k]])
            #print('User_category ',user_category_norm[z])
        #print(len(yes))
        
        for b in range(len(yesr)):
            r_3[b]=max(yesr[b])
        #print('r_3:',r_3)
        
        yes = np.array(yes)
        #print('user shape should be ',np.array([z]).shape)
        #print('xf shape should be ',yes.shape)
        #print('r shape should be ',np.array(r_3).shape)
        #print('l_id shape should be ',np.array(sample).shape)
        
        #not_used_list = list(set(train_t[z]).difference(set(sample)))
        
        # positive 
        train_t_sample = random.sample(train_t[z],len(train_t[z]))
        #print('number of positive feedback', len(train_t[z]))
        # negative
#         train_f_sample = random.sample(train_f[z],20)
        
        for ta in train_t_sample:
            #print(ta,'--> positive feedback')
            
            pos = sample.index(ta)
            
            image_1=np.expand_dims(all_npy[ta],0)
            train_f_sample = random.sample(train_f[z],10)
            
            for b in train_f_sample:
                image_2=np.expand_dims(all_npy[b],0) #(1,2048)
                #print('Image_2 shape',image_2.shape)
            
                #use_test[z].append(b)
                _embedding,_a_list,r3,_auc, _loss,_=sess.run([embedding,a_list,a_list_soft,auc,loss,train_op], feed_dict={user: [z],
                                        i: [ta], j: [b], xf: yes , l_id:sample, l_id_len:[len(sample)],r:r_3,
                                        image_i:image_1,image_j:image_2})
                #print(XUIJ)
                #print('loss=',_loss)
                #print('auc=',_auc)
                
                #print('after softmax:',r3)
                #print('before softmax:',_a_list)
                #print('embedding:',_embedding)
                #print('---------------------------------------------------')
                a_list_history.append(_a_list)
                alpha_history.append(r3)
                train_auc += _auc
                total_loss += _loss
                length += 1
            #now1+=1
        
        np.save(LATENT_FOLDER + str(q) + '_' + str(z),_embedding)
    
    #print('mine:',xuij_auc/136)   
    #print('a_list_soft:',r3)
    print("{:<20}{}".format('total_loss', total_loss/length))
    print("{:<20}{}".format('train_auc:', train_auc/length))
    
    loss_acc_list.append([total_loss/length, train_auc/length, time.time()-t0])
    
    print('\tCurrent time:', time.ctime())
    print('==================================================')
    
print('Total cost time:',time.time()-t0)

print('End time:', time.ctime())

Start time: Sat Mar  7 12:30:39 2020
Iteration: 0
total_loss          [[0.55990512]]
train_auc:          0.8203773247496423
	Current time: Sat Mar  7 12:51:22 2020
Iteration: 1
total_loss          [[0.47979776]]
train_auc:          0.8606983190271816
	Current time: Sat Mar  7 13:12:03 2020
Iteration: 2
total_loss          [[0.43251237]]
train_auc:          0.8858369098712446
	Current time: Sat Mar  7 13:32:41 2020
Iteration: 3
total_loss          [[0.39716214]]
train_auc:          0.9014440271816881
	Current time: Sat Mar  7 13:53:28 2020
Iteration: 4
total_loss          [[0.37242811]]
train_auc:          0.9131840128755365
	Current time: Sat Mar  7 14:14:11 2020
Total cost time: 6211.124539375305
End time: Sat Mar  7 14:14:11 2020


In [35]:
for i in range(len(loss_acc_list)):
    print('Iteration:',i)
    print('loss=',loss_acc_list[i][0])
    print('acc=',loss_acc_list[i][1])
#     print('time=',loss_acc_list[i][2])
    print('==================================================')

Iteration: 0
loss= [[0.55990512]]
acc= 0.8203773247496423
Iteration: 1
loss= [[0.47979776]]
acc= 0.8606983190271816
Iteration: 2
loss= [[0.43251237]]
acc= 0.8858369098712446
Iteration: 3
loss= [[0.39716214]]
acc= 0.9014440271816881
Iteration: 4
loss= [[0.37242811]]
acc= 0.9131840128755365


# Get latent factor and Each weight

In [36]:
U, Y, A, A1, Au, Ay, Aa, Av, E, B = sess.run([user_latent, item_latent, aux_item, 
                                              W1, Wu, Wy, Wa, Wv, embedding, Beta])

In [37]:
print('User latent shape: ',U.shape)
print('photo latent shape: ', Y.shape)
print('Auxilary latent shape: ',A.shape)
print('W1 weight shape: ',A1.shape)
print('Wu weight shape:',Au.shape)
print('Wy weight shape:', Ay.shape)
print('Wa weight shape:', Aa.shape)
print('Wv weight shape:', Av.shape)
print('Embedding shape:', E.shape)
print('Beta shape:',B.shape)

User latent shape:  (1582, 128)
photo latent shape:  (165, 128)
Auxilary latent shape:  (165, 128)
W1 weight shape:  (1582, 128)
Wu weight shape: (128, 128)
Wy weight shape: (165, 128, 128)
Wa weight shape: (128, 128)
Wv weight shape: (128, 4876)
Embedding shape: (200, 4876)
Beta shape: (1582, 200)


In [38]:
np.savez('./weight/' + SAVE_NAME + '.npz', 
         U=U, Y=Y, A=A, A1=A1, Wu=Au, Wy=Ay, Wa=Aa, Wv=Av, E=E, B=B)

# Testing Part

In [39]:
'''
usr_test_amount = 150
movie_test_amount = 32
'''

#with Embedding
result = np.zeros((usr_test_amount, movie_nb))
RS = np.zeros((usr_test_amount, movie_nb))
#test_idx --> Test 的 index

test_yes_id = []

for s in range(usr_test_amount):
    print(s, test_idx[s])

    yes = []
    sample = random.sample(train_t[test_idx[s]],len(train_t[test_idx[s]])) #從training part 的positive feedback 取出YouTuber 當成Auxilary
    #sample=result_yes_id[now]
    test_yes_id.append(sample)
    alpha = np.zeros([len(sample)])
    
    for a in range(len(sample)):
        r = np.max(movie_genre[sample[a]] * usr_genre_norm[test_idx[s]]) #sample a 的category vec *user_category vec
        #print(test_idx[s])
        #print(np.dot(Au[test_idx[s]],np.expand_dims(U[test_idx[s]],0)))
        alpha[a] = np.dot(A1[test_idx[s]],(relu(np.dot(Au,np.expand_dims(U[test_idx[s]],0).T) +
                                                np.dot(Ay[sample[a]],np.expand_dims(Y[sample[a]],0).T) +
                                                np.dot(Aa,np.expand_dims(A[sample[a]],0).T) +
                                                np.dot(Av,np.expand_dims(all_npy[sample[a]],0).T)))) * r
    mul = np.zeros((1,latent_dim))
    
    print("{:<15}{}".format('alpha:', alpha))
    print("{:<15}{}".format('softmax alpha:', softmax(alpha)))
    print('==================================================')
    
    for i in range(len(sample)):
        mul += softmax(alpha)[i] * A[sample[i]] #attention alpha*Ai part 
    new_mul = mul + U[test_idx[s]]  #(U+auxilary)
    
    for k in range(movie_nb):
        result[s][k] = np.dot(new_mul,Y[k].T) #(U+auxilary)*photo latent factor
        RS[s][k] = np.dot(new_mul,Y[k].T) + np.dot(B[test_idx[s]], np.dot(E, all_npy[k].T))
        
#print(RS[s])

0 13
alpha:         [-1.86732083e-11  7.38520614e-09  1.25798476e-09 -1.00382685e-09
 -5.62949731e-10 -4.25034472e-09  1.29294239e-10]
softmax alpha: [0.14285714 0.14285714 0.14285714 0.14285714 0.14285714 0.14285714
 0.14285714]
1 51
alpha:         [ 2.46475607e-07  4.02408278e-08  4.61239915e-08  1.82962353e-08
  2.06763161e-08 -2.46709941e-08]
softmax alpha: [0.1666667  0.16666666 0.16666666 0.16666666 0.16666666 0.16666665]
2 54
alpha:         [9.67456655e-08 4.05519482e-08 1.11193161e-07 2.46216842e-08
 8.57371319e-08 8.87956095e-09]
softmax alpha: [0.16666667 0.16666666 0.16666667 0.16666666 0.16666667 0.16666666]
3 61
alpha:         [-1.44880152e-08 -3.92417516e-07 -1.99946223e-08 -9.54528901e-08
 -2.31607875e-07]
softmax alpha: [0.20000003 0.19999995 0.20000003 0.20000001 0.19999998]
4 65
alpha:         [-1.35771098e-07 -1.35036570e-07  1.04240152e-08  3.04834580e-08
 -1.09828319e-08  3.48865239e-08]
softmax alpha: [0.16666665 0.16666665 0.16666667 0.16666668 0.16666667 0.16666

In [40]:
#取出test的資料
testRS = np.zeros((usr_test_amount, movie_test_amount)) #shape 150*20
target = np.zeros((usr_test_amount, movie_test_amount))
#test_t 是true的
#test_f 是false的
        
for z in range(usr_test_amount):
    user_id = test_idx[z]
    #positive target YouTuber list
    youtube_t = test_t[z] 
    #not target YouTuber list
    youtube_f = test_f[z]
    
    #前兩個放target的RS
    for i in range(len(youtube_t)):
        testRS[z][i] = RS[z][youtube_t[i]]
        target[z][i] = 1
        
    for i in range(len(youtube_f)):
        testRS[z][i+len(youtube_t)] = RS[z][youtube_f[i]]

In [41]:
print(target.shape, testRS.shape)

(150, 32) (150, 32)


In [42]:
sumtarget = 0
for i in range(len(target)):
    #print(np.sum(target[i]))
    sumtarget += np.sum(target[i])
print('num of positive data in testing:',sumtarget)
print('total testing data:', usr_test_amount * movie_test_amount)

num of positive data in testing: 1078.0
total testing data: 4800


In [43]:
def topN(sortlist,n):
    topList = []
    for i in range(n):
        topList.append(sortlist.index(max(sortlist)))
        #print(max(sortlist))
        #print(sortlist.index(max(sortlist)))
        sortlist[sortlist.index(max(sortlist))] = -1000000000
    return topList

In [44]:
count_0_all = []
for i in range(len(testRS)):
    top_0 = topN(list(testRS[i]),int(np.sum(target[i])))
    count_0_all.append(top_0)
    #print(top_0)

acc_0 = 0
total = 0
for i in range(len(count_0_all)):
    for j in range(len(count_0_all[i])):
        #print(int(np.sum(target[i])))
        total+=int(np.sum(target[i]))
        if count_0_all[i][j] < int(np.sum(target[i])): #代表是0或1 (也就是target)
            acc_0 += 1
avg_acc = acc_0/100
#print('avg_accuarcy for count_0:',avg_acc)
#print(acc_0)

In [45]:
acc_0

507

In [46]:
total

8834

# Top n

In [47]:
def F1_score(prec,rec):
    f1 = (2*prec*rec)/(prec+rec)
    return f1

## Top 1

In [48]:
correct = 0
for i in range(len(testRS)):
    top_0 = topN(list(testRS[i]),1) #取一個
    count_0_all.append(top_0)
    #print(np.sum(target[i]))
    #print(top_0)
    if top_0[0] < int(np.sum(target[i])):
        correct += 1

In [49]:
top1_prec = correct/len(testRS)
top1_recall = correct/(sumtarget)
print('prec ',top1_prec,'recall ',top1_recall)
print('F1_score:',F1_score(top1_prec,top1_recall))

prec  0.52 recall  0.07235621521335807
F1_score: 0.12703583061889248


## Top 3

In [50]:
correct = 0
for i in range(len(testRS)):
    top_3 = topN(list(testRS[i]),3) #取一個
    count_0_all.append(top_3)
    #print(top_3)
    for j in range(len(top_3)):
        if top_3[j] < int(np.sum(target[i])):
            correct += 1

In [51]:
top3_prec = correct/(len(testRS)*3)
top3_recall = correct/(sumtarget)
print('prec ',top3_prec,'recall ',top3_recall)
print('F1_score:',F1_score(top3_prec,top3_recall))

prec  0.4444444444444444 recall  0.18552875695732837
F1_score: 0.2617801047120419


## Top 5

In [52]:
correct = 0
for i in range(len(testRS)):
    top_5 = topN(list(testRS[i]),5) #取一個
    count_0_all.append(top_5)
    #print(top_5)
    for j in range(len(top_5)):
        if top_5[j] < int(np.sum(target[i])):
            correct += 1

In [53]:
top5_prec = correct/(len(testRS)*5)
top5_recall = correct/(sumtarget)
print('prec ',top5_prec,'recall ',top5_recall)
print('F1_score:',F1_score(top5_prec,top5_recall))

prec  0.46266666666666667 recall  0.32189239332096475
F1_score: 0.37964989059080967


# pre_list

In [54]:
'''
usr_test_amount = 150
movie_test_amount = 16
'''
all_sort = []
pre_matrix = np.zeros(shape=(usr_test_amount, movie_test_amount))
for i in range(usr_test_amount):
    top_5 = topN(list(testRS[i]),5) #取一個
    #print(top_5)
    all_sort.append(topN(list(testRS[i]),len(testRS[i])))
    for j in range(len(top_5)):
        pre_matrix[i][top_5[j]] = 1

In [55]:
print(pre_matrix.shape, target.shape)

(150, 32) (150, 32)


# NDCG
* https://daiwk.github.io/posts/nlp-ndcg.html

In [56]:
#Ideal DCG，理想状况下的DCG。也就是说，相关性完全由高到低排序时算出的DCG：

def IDCG(ideal_list): #ideal_list example = [1,1,1,1,1,0,0,....]
    idcg = 0
    for i in range(len(ideal_list)):
        #print((2**true_list[i]-1),math.log2(i+2))
        idcg += (2**ideal_list[i]-1)/math.log2(i+2)
    #print('idcg',idcg)
    return idcg

def DCG(prec_list): #找出前n名的[1,1,1,0,...]
    dcg = 0
    for i in range(len(prec_list)):
        dcg += (2**prec_list[i]-1)/math.log2(i+2)
    #print('dcg',dcg)
    return dcg

In [57]:
total_ndcg = 0
num_ndcg = 5
for m in range(usr_test_amount):
    idcg = IDCG([1]*num_ndcg)
    pre_list = []
    for s in all_sort[m][:num_ndcg]:
        #print(s)
        #print(target[m][s])
        pre_list.append(target[m][s])
    dcg = DCG(pre_list)
    ndcg = dcg/idcg
    #print(ndcg)
    total_ndcg += ndcg
avg_ndcg = total_ndcg/usr_test_amount
print('NDCG:',avg_ndcg)

NDCG: 0.4692476981758553


# MAP

In [58]:
from sklearn.metrics import average_precision_score

In [59]:
total_prec = 0
for u in range(usr_test_amount):
    y_true = target[u]
    y_scores = pre_matrix[u]
    total_prec += average_precision_score(y_true, y_scores)
    
MAP = total_prec/usr_test_amount

print('MAP:', MAP)

MAP: 0.33385484988867326
