In [1]:
import os
import pandas as pd
import numpy as np
import random
import time
import tensorflow as tf

In [2]:
def relu(x):
    return np.maximum(0,x)  

def softmax(x):
    exp_x = np.exp(x)
    softmax_x = exp_x / np.sum(exp_x)
    return softmax_x 

def sigmoid(z):
    return 1 / (1 + np.exp(-z))

In [3]:
user_following = np.load('../Data/npy/user_following.npy')
image_2048 = np.load('../Data/npy/Image_2048D.npy')
user_category = np.load('../Data/npy/user_category_1216.npy')
YouTuber_category = np.load('../Data/npy/YouTuber_category_0.7.npy')

In [4]:
print('user_following shape ',user_following.shape)
print('image_2048 shape ',image_2048.shape)
print('user_category shape ',user_category.shape)
print('YouTuber_category shape ',YouTuber_category.shape)

user_following shape  (1489, 88)
image_2048 shape  (88, 2048)
user_category shape  (1489, 17)
YouTuber_category shape  (88, 17)


In [5]:
image_2048

array([[0.41365993, 0.38064754, 2.3639207 , ..., 0.57627535, 0.3821758 ,
        0.65140206],
       [0.788161  , 0.5106883 , 1.7793416 , ..., 0.77766   , 0.43860996,
        0.3568556 ],
       [0.76625705, 0.47573683, 2.83721   , ..., 0.5284711 , 0.7731626 ,
        0.448657  ],
       ...,
       [0.80085856, 1.1623696 , 1.8066967 , ..., 0.43688986, 0.22302635,
        0.31184107],
       [0.5818967 , 0.5491421 , 2.5990047 , ..., 1.1997312 , 0.96370906,
        0.3987947 ],
       [0.23862542, 0.5079424 , 1.8336693 , ..., 0.7668115 , 0.45325607,
        0.5253927 ]], dtype=float32)

In [6]:
user_category_norm = np.zeros(user_category.shape)
for i in range(len(user_category)):
    user_category_norm[i] = user_category[i]/np.max(user_category[i])
print('user_category after normalized by max...')
print('user_category_norm shape ',user_category_norm.shape)

user_category after normalized by max...
user_category_norm shape  (1489, 17)


In [7]:
following_true = [0]*len(user_following)
for i in range(len(user_following)):
    each_user = []
    for j in range(len(user_following[i])):
        if user_following[i][j] == 1:
            each_user.append(j)
    following_true[i] = each_user

In [26]:
following_true

[[13, 15, 16, 24, 29, 37, 44],
 [3, 25, 67, 71, 74, 75, 82],
 [8, 11, 24, 28, 41, 63, 67, 79, 86],
 [8, 24, 28, 29, 63, 70, 79],
 [8, 24, 37, 63, 67, 70, 79],
 [8, 17, 24, 25, 31, 43, 63, 67, 70, 73, 74, 79],
 [3, 10, 11, 22, 40, 44, 47, 74, 76, 82],
 [8, 17, 21, 27, 28, 32, 33, 45, 49, 67, 75, 82, 84],
 [3, 25, 63, 74, 79],
 [3, 10, 11, 14, 25, 40, 45, 54, 60, 65, 67, 74, 75, 79, 82],
 [3, 11, 22, 24, 40, 54, 57, 63, 67, 74, 76, 79],
 [7, 11, 14, 24, 25, 40, 43, 54, 63, 67, 75, 79],
 [7, 8, 24, 41, 63, 79],
 [0,
  11,
  12,
  25,
  26,
  27,
  32,
  33,
  42,
  44,
  45,
  60,
  67,
  69,
  71,
  72,
  74,
  76,
  79,
  82,
  84],
 [8, 24, 29, 43, 57, 63, 67, 76, 79],
 [3, 8, 25, 28, 40, 75, 79],
 [8, 54, 63, 67, 70, 79],
 [3, 9, 10, 25, 40, 44, 45, 54, 60, 67, 71, 74, 76, 82, 84],
 [8, 10, 28, 40, 43, 45, 54, 67, 76, 79],
 [25, 32, 33, 67, 71, 82, 83, 84],
 [8, 24, 43, 63, 67, 70, 79],
 [8, 24, 28, 29, 41, 44, 57, 79],
 [8, 14, 24, 29, 41, 43, 57, 63, 67, 78, 79],
 [0, 12, 26, 27, 32

In [25]:
#最少跟最多的following 
minlen = 10000
maxlen = 0
total = 0
for i in range(len(following_true)):
    total+=len(following_true[i])
    if len(following_true[i]) < minlen:
        minlen = len(following_true[i])
    if len(following_true[i]) > maxlen:
        maxlen = len(following_true[i])
avg = total/len(following_true)
print('Min number of followings ',minlen)
print('Max number of followings ',maxlen)
print('Average number of followings ',avg)

Min number of followings  5
Max number of followings  34
Average number of followings  10.308932169241102


# Training data and Testing data

In [9]:
user_idx = [i for i in range(len(user_following))]
#test_idx is the number of user for testing
test_idx = random.sample(user_idx,200)

In [10]:
# Training  and Testing
train_t = [0]*(len(user_following))
train_f = [0]*(len(user_following))
# Testing 
test_t = [0]*200
test_f = [0]*200
test_pos = -1

for i in range(len(user_following)):
    t_for_train = []
    f_for_train = []
    if i not in test_idx: #if not in test id, just append it to true or false list
        for j in range(88):
            if user_following[i][j] == 1:
                t_for_train.append(j)
            else:
                f_for_train.append(j)
        train_t[i] = t_for_train
        train_f[i] = f_for_train
        
    else: #if in test id, choose 2 true and other 
        test_pos += 1
        temp_t = []
        temp_f = []
        for j in range(88):
            if user_following[i][j] == 1:
                temp_t.append(j)
            else:
                temp_f.append(j)
        # random choose 2 true and 8 false for test 
        t_for_test = random.sample(temp_t,2)
        f_for_test  = random.sample(temp_f,8)
        test_t[test_pos] = t_for_test
        test_f[test_pos] = f_for_test
        
        #other for training
        t_for_train = [item for item in temp_t if not item in t_for_test]
        f_for_train = [item for item in temp_f if not item in f_for_test]
        train_t[i] = t_for_train
        train_f[i] = f_for_train

In [11]:
# train_t[i] 代表的是user i positive feedback
print('The length of train_t:',len(train_t))
print('The length of train_f:',len(train_t))
print('The length of test_t:',len(test_t))
print('The length of test_f:',len(test_f))

The length of train_t: 1489
The length of train_f: 1489
The length of test_t: 200
The length of test_f: 200


In [12]:
user_aux_size = [len(train_t[i]) for i in range(len(train_t))]
len(user_aux_size)

1489

# Recommendation  Module

In [13]:
"""
n: the number of users
m: the number of YouTubers
k: latent dims
l: feature dims
"""
n = 1489 
m = 88  
k = 32
l = 2048 

user = tf.placeholder(tf.int32,[None])
i = tf.placeholder(tf.int32, [None])
j = tf.placeholder(tf.int32, [None])

#多少個auxliary 
xf = tf.placeholder(tf.float32, [m,l])
l_id = tf.placeholder(tf.int32, [m])
r = tf.placeholder(tf.float32,[m])

image_i = tf.placeholder(tf.float32, [1,l])
image_j = tf.placeholder(tf.float32, [1,l])

with tf.variable_scope("item_level"):
    user_latent = tf.get_variable("user_latent", [n, k],
                                      initializer=tf.random_normal_initializer(0,0.1,seed=3))
    item_latent = tf.get_variable("item_latent", [m, k],
                                      initializer=tf.random_normal_initializer(0,0.1,seed=3)) 
    aux_item = tf.get_variable("aux_item", [m, k],
                                      initializer=tf.random_normal_initializer(0,0.1,seed=3))
    W1 = tf.get_variable("W1", [n, k],
                                      initializer=tf.contrib.layers.xavier_initializer())
    Wu = tf.get_variable("Wu", [n,k,k], 
                                      initializer=tf.contrib.layers.xavier_initializer()) #所有的user 都共用一個權重
    Wy = tf.get_variable("Wy", [n,k,k],
                                     initializer=tf.contrib.layers.xavier_initializer()) #不同的YouTuber有不同的權重
    Wa = tf.get_variable("Wa", [n,k,k],
                                     initializer=tf.contrib.layers.xavier_initializer())
    Wv = tf.get_variable("Wv", [n,k,l],
                                    initializer=tf.contrib.layers.xavier_initializer())
    
    

    aux_new = tf.get_variable("aux_new", [1,k], initializer=tf.constant_initializer(0.0))
    ########## Error part, how to get auxisize dynamically
    ####aux_size= tf.get_variable(name='aux_size', initializer=l_id.get_shape().as_list()[-1])
    
with tf.variable_scope('feature_level'):
    Beta = tf.get_variable("beta", [n,l],
                             # initializer=tf.contrib.layers.xavier_initializer())
                                     initializer=tf.random_normal_initializer(0.00001,0.000001,seed=10))

#lookup the latent factors by user and id
u = tf.nn.embedding_lookup(user_latent, user) #(1*k) 第幾個user latent factor
vi = tf.nn.embedding_lookup(item_latent, i) 
vj = tf.nn.embedding_lookup(item_latent, j)

#取消 Weight 共用
w1 = tf.nn.embedding_lookup(W1, user) #(1*k)
wu = tf.squeeze(tf.nn.embedding_lookup(Wu, user)) #(k*k)
wy = tf.squeeze(tf.nn.embedding_lookup(Wy, user)) #(k*k)
wa = tf.squeeze(tf.nn.embedding_lookup(Wa, user)) #(k*k)
wv = tf.squeeze(tf.nn.embedding_lookup(Wv, user)) #(k,l)

beta = tf.nn.embedding_lookup(Beta, user) #user feature latent factor


The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.



In [14]:
a_list=[]
for q in range(m): #取l_id個 YouTuber 的 類別
    xfi = tf.expand_dims(xf[q],0) #(1,l)
    a_list.append((tf.matmul( w1, tf.nn.relu( tf.matmul(wu, u, transpose_b=True) +
        tf.matmul(wy, tf.expand_dims(tf.nn.embedding_lookup(item_latent,l_id[q]),0), transpose_b=True) +
        tf.matmul(wa, tf.expand_dims(tf.nn.embedding_lookup(aux_item, l_id[q]),0), transpose_b=True) +
        tf.matmul(wv, xfi, transpose_b=True)))[0][0])*r[q])
        
                                          
a_list_soft=tf.nn.softmax(a_list)
#print(sess.run(a_list_soft))
aux_np = np.zeros(k)

#改成while
for q in range(m): #取q個auxliary item
    aux_np+=a_list_soft[q]*tf.expand_dims(tf.nn.embedding_lookup(aux_item, l_id[q]),0)

aux_np+=u #user_latent factor + sum (alpha*auxilary)
aux_new=tf.assign(aux_new,aux_np) #把aux_new 的 值變成aux_np

#矩陣中對應函數各自相乘
xui = tf.matmul(aux_new, vi, transpose_b=True)+ tf.matmul(beta,image_i, transpose_b=True)
xuj = tf.matmul(aux_new, vj, transpose_b=True)+ tf.matmul(beta,image_j, transpose_b=True)

xuij = xui- xuj

l2_norm = tf.add_n([
            0.001 * tf.reduce_sum(tf.multiply(u, u)),
            0.001 * tf.reduce_sum(tf.multiply(vi, vi)),
            0.001 * tf.reduce_sum(tf.multiply(vj, vj)),
  
            0.001 * tf.reduce_sum(tf.multiply(w1, w1)),
            0.001 * tf.reduce_sum(tf.multiply(wu, wu)),
            0.001 * tf.reduce_sum(tf.multiply(wy, wy)),
            0.001 * tf.reduce_sum(tf.multiply(wa, wa)),
            0.001 * tf.reduce_sum(tf.multiply(wv,wv)),
            
            0.1 * tf.reduce_sum(tf.multiply(beta,beta)),
            
          ])

loss = l2_norm -tf.log(tf.sigmoid(xuij)) # objective funtion
train_op = tf.train.AdamOptimizer(learning_rate=0.0001).minimize(loss) #parameter optimize 
auc = tf.reduce_mean(tf.to_float(xuij > 0))

Instructions for updating:
Use `tf.cast` instead.


In [15]:
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)

t0=time.time()

#use_true=init_list_of_objects(136)
#use_test=init_list_of_objects(136)

train_pair_t=[] #positive feedback
train_pair_f=[] #negative feedback
train_yes_id=[] 
for q in range(10):
    print('Iteraction:',q)
    train_auc=0
    total_loss=0
    xuij_auc=0
    length = 0
    for z in range(1489):
        """
        yes 用來存放選擇到的YouTuber feature (for auxilary)
        yesr 用來存放user對該YouTuber的喜好程度(user_category 跟 YouTuber_category的相似性)
        r_3 用來存放user 對該YouTuber種類的偏好(取max)
        """
        yes=[]
        yesr=[]
        r_88=np.zeros(88) 
        
        #這裡不知道怎麼讓3變成變動型的長度
        sample=random.sample(train_t[z],len(train_t[z])) #隨機選3個sample true's YouTuber
        train_yes_id.append(sample) #sample全部丟進去
        
        
        for k in range(len(sample)):
            yes.append(image_2048[sample[k]])
            yesr.append(YouTuber_category[sample[k]]*user_category_norm[z])
            #print('YouTuber_category ', YouTuber_category[sample[k]])
            #print('User_category ',user_category_norm[z])
        for k in range(88):
            r_88[k]=max(yesr[k])
        yes=np.array(yes)
        
        not_used_list = list(set(train_t[z]).difference(set(sample)))
        
        train_t_sample = random.sample(train_t[z],2)
        #print('number of positive feedback', len(train_t[z]))
        for ta in train_t_sample:
            #ta=random.choice(train_t[z]) #ta is true positve photo
            train_pair_t.append(ta)
            image_1=np.expand_dims(image_2048[ta],0)
            
            train_f_sample = random.sample(train_f[z],10)
            for b in train_f_sample:
                #print('likes:',ta,';Not likes:',b)
                #b=random.choice(train_f[z])  #b is no feedback photo
                train_pair_f.append(b)
                image_2=np.expand_dims(image_2048[b],0)
                #print('Image_2',image_2.shape)
            
                #use_test[z].append(b)
                r3,_auc, _loss,_=sess.run([a_list_soft,auc,loss,train_op], feed_dict={user: [z],
                                        i: [ta], j: [b], xf: yes , l_id:sample,r:r_3,
                                        image_i:image_1,image_j:image_2})
                #print(XUIJ)
                #print('loss=',_loss)
                #print('auc=',_auc)
                #print('sub r3:',r3)
                train_auc+=_auc
                total_loss+=_loss
                length += 1
            #now1+=1
    
    #print('mine:',xuij_auc/136)    
    #print('a_list_soft:',r3)
    print("total_loss:-----------------", total_loss/length)
    print("train_auc:-------------------", train_auc/length)
    print('time:',time.time()-t0,' sec')
print('Total cost ',time.time()-t0,' sec')       

Iteraction: 0
total_loss:----------------- [[0.43915877]]
train_auc:------------------- 0.8899932840832774
time: 580.4198482036591  sec
Iteraction: 1
total_loss:----------------- [[0.30291602]]
train_auc:------------------- 0.8812961719274681
time: 1157.7696454524994  sec
Iteraction: 2
total_loss:----------------- [[0.25425795]]
train_auc:------------------- 0.9056413700470114
time: 1735.1459066867828  sec
Iteraction: 3
total_loss:----------------- [[0.24454887]]
train_auc:------------------- 0.9142713230355943
time: 2312.715378522873  sec
Iteraction: 4
total_loss:----------------- [[0.22833727]]
train_auc:------------------- 0.9208865010073876
time: 2890.1788256168365  sec
Iteraction: 5
total_loss:----------------- [[0.22520535]]
train_auc:------------------- 0.924076561450638
time: 3467.6743490695953  sec
Iteraction: 6
total_loss:----------------- [[0.21746048]]
train_auc:------------------- 0.9293485560779047
time: 4044.8970053195953  sec
Iteraction: 7
total_loss:----------------- [

# Get latent factor and Each weight

In [16]:
U, Y, A, A1, Au, Ay, Aa, Av,B =sess.run([user_latent, item_latent, aux_item, W1, Wu, Wy, Wa, Wv,Beta])

In [17]:
print('User latent shape: ',U.shape)
print('photo latent shape: ', Y.shape)
print('Auxilary latent shape: ',A.shape)
print('W1 weight shape: ',A1.shape)
print('Wu weight shape:',Au.shape)
print('Wy weight shape:', Ay.shape)
print('Wa weight shape:',Aa.shape)
print('Wv weight shape:',Av.shape)
print('Beta shape:',B.shape)

User latent shape:  (1489, 32)
photo latent shape:  (88, 32)
Auxilary latent shape:  (88, 32)
W1 weight shape:  (1489, 32)
Wu weight shape: (1489, 32, 32)
Wy weight shape: (1489, 32, 32)
Wa weight shape: (1489, 32, 32)
Wv weight shape: (1489, 32, 2048)
Beta shape: (1489, 2048)


# Testing Part

In [23]:
result=np.zeros((200,88))
RS=np.zeros((200,88))
#test_idx --> Test 的 index

test_yes_id=[]
for s in range(200):
    print(s,test_idx[s])

    yes=[]
    sample=random.sample(train_t[test_idx[s]],len(train_t[test_idx[s]])) #從training part 的positive feedback 取出YouTuber 當成Auxilary
    #sample=result_yes_id[now]
    test_yes_id.append(sample)
    alpha=np.zeros([len(sample)])
    
    for a in range(len(sample)):
        r =np.max(YouTuber_category[sample[a]]*user_category_norm[test_idx[s]]) #sample a 的category vec *user_category vec
    
        alpha[a]=np.dot(A1[test_idx[s]],(relu(np.dot(Au[test_idx[s]],np.expand_dims(U[test_idx[s]],0).T)+np.dot(Ay[test_idx[s]],np.expand_dims(Y[sample[a]],0).T)+np.dot(Aa[test_idx[s]],
                            np.expand_dims(A[sample[a]],0).T)+ np.dot(Av[test_idx[s]],np.expand_dims(image_2048[sample[a]],0).T))))*r
    mul=np.zeros((1,32))
    #print('alpha------------',alpha)
    print('softmax alpha--------------',softmax(alpha))
    for i in range(len(sample)):
        mul+=softmax(alpha)[i]*A[sample[i]] #attention alpha*Ai part 
    new_mul=mul+U[test_idx[s]]  #(U+auxilary)
    for k in range(88):
        result[s][k]=np.dot(new_mul,Y[k].T) #(U+auxilary)*photo latent factor
        RS[s][k] = np.dot(new_mul,Y[k].T)+np.dot(B[test_idx[s]], image_2048[k].T)
print(RS[s])

0 241
softmax alpha-------------- [0.0769242  0.07692545 0.07692421 0.07692562 0.07692072 0.07692191
 0.07692434 0.07692221 0.07692446 0.07692011 0.07692143 0.07692449
 0.07692085]
1 353
softmax alpha-------------- [0.0666711  0.0666665  0.06666519 0.06666587 0.06666577 0.06666396
 0.06666434 0.06666646 0.06666685 0.0666652  0.06666622 0.06667102
 0.06667102 0.06666572 0.06666478]
2 1466
softmax alpha-------------- [0.33333765 0.33333199 0.33333036]
3 939
softmax alpha-------------- [0.07692575 0.07692619 0.07692004 0.07692571 0.0769193  0.07691991
 0.07692437 0.07692516 0.0769255  0.07692597 0.07692404 0.07691832
 0.07691974]
4 597
softmax alpha-------------- [0.0909142  0.09091446 0.09091434 0.09091406 0.09090126 0.09090523
 0.09091386 0.09090729 0.09090302 0.09090548 0.0909068 ]
5 143
softmax alpha-------------- [0.19999692 0.19999933 0.20000381 0.20000121 0.19999872]
6 1260
softmax alpha-------------- [0.08333945 0.08333713 0.08333816 0.08333722 0.08333004 0.08332722
 0.08332614 0.

115 791
softmax alpha-------------- [0.07692218 0.07692205 0.07692245 0.07692335 0.07692347 0.07692339
 0.07692351 0.07692196 0.07692225 0.07692376 0.07692336 0.07692265
 0.07692562]
116 1279
softmax alpha-------------- [0.25000306 0.25000324 0.24999941 0.24999429]
117 1202
softmax alpha-------------- [0.142856   0.14285627 0.14285187 0.14286113 0.14285901 0.14285817
 0.14285755]
118 279
softmax alpha-------------- [0.09090918 0.09090918 0.09090915 0.09091003 0.09091001 0.09090985
 0.090909   0.09090867 0.0909086  0.09090918 0.09090716]
119 1237
softmax alpha-------------- [0.08333261 0.08333255 0.08333539 0.08333388 0.08333482 0.08333417
 0.08333413 0.08333228 0.08333266 0.08333297 0.08333198 0.08333256]
120 1437
softmax alpha-------------- [0.25000773 0.25000715 0.25000812 0.249977  ]
121 93
softmax alpha-------------- [0.16665838 0.16667393 0.16665731 0.16667056 0.16667344 0.16666638]
122 405
softmax alpha-------------- [0.076923   0.07692363 0.07692238 0.0769241  0.07692195 0.07692

In [19]:
#取出test的資料
testRS = np.zeros((200,10)) #shape 200*10

#test_t 是true的
#test_f 是false的
        
for z in range(200):
    user_id = test_idx[z]
    #positive target YouTuber list
    youtube_t = test_t[z] 
    #not target YouTuber list
    youtube_f = test_f[z]
    
    #前兩個放target的RS
    for i in range(len(youtube_t)):
        testRS[z][i] = RS[z][youtube_t[i]]
    for i in range(len(youtube_f)):
        testRS[z][i+len(youtube_t)] = RS[z][youtube_f[i]]


In [20]:
def topN(sortlist,n):
    topList = []
    for i in range(n):
        topList.append(sortlist.index(max(sortlist)))
        #print(max(sortlist))
        #print(sortlist.index(max(sortlist)))
        sortlist[sortlist.index(max(sortlist))] = -1000000000
    return topList

In [27]:
count_0_all = []
for i in range(len(testRS)):
    top_0 = topN(list(testRS[i]),2)
    count_0_all.append(top_0)
    #print(top_0)

acc_0 = 0
total = len(count_0_all)*len(count_0_all[0])
#print(total) #(200*2)
for i in range(len(count_0_all)):
    for j in range(len(count_0_all[i])):
        if count_0_all[i][j] < 2: #代表是0或1 (也就是target)
            acc_0 += 1
#print(acc_0)
avg_acc = acc_0/total
print('avg_accuarcy for count_0:',avg_acc)

avg_accuarcy for count_0: 0.425


# Testing for dynamic length for loop 

In [22]:
length = tf.placeholder(tf.int32)
i = tf.constant(0)

while_condition = lambda i: tf.less(i, length)
def body(i):
    # do something here which you want to do in your loop
    # increment i
    return [tf.add(i, 1)]

# do the loop:
output = tf.while_loop(while_condition, body, [i])


init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)
sess.run(output,feed_dict={length:3})

3