In [1]:
import os
import pandas as pd
import numpy as np
import random
import time
import tensorflow as tf

In [2]:
def relu(x):
    return np.maximum(0,x)  

def softmax(x):
    exp_x = np.exp(x)
    softmax_x = exp_x / np.sum(exp_x)
    return softmax_x 

def sigmoid(z):
    return 1 / (1 + np.exp(-z))

In [3]:
user_following = np.load('../Data/npy/user_following.npy')
image_2048 = np.load('../Data/npy/Image_2048D.npy')
user_category = np.load('../Data/npy/user_category_1216.npy')
YouTuber_category = np.load('../Data/npy/YouTuber_category_0.7.npy')

In [4]:
print('user_following shape ',user_following.shape)
print('image_2048 shape ',image_2048.shape)
print('user_category shape ',user_category.shape)
print('YouTuber_category shape ',YouTuber_category.shape)

user_following shape  (1489, 88)
image_2048 shape  (88, 2048)
user_category shape  (1489, 17)
YouTuber_category shape  (88, 17)


In [5]:
user_category_norm = np.zeros(user_category.shape)
for i in range(len(user_category)):
    user_category_norm[i] = user_category[i]/np.max(user_category[i])
print('user_category after normalized by max...')
print('user_category_norm shape ',user_category_norm.shape)

user_category after normalized by max...
user_category_norm shape  (1489, 17)


In [6]:
following_true = [0]*len(user_following)
for i in range(len(user_following)):
    each_user = []
    for j in range(len(user_following[i])):
        if user_following[i][j] == 1:
            each_user.append(j)
    following_true[i] = each_user

In [7]:
#最少跟最多的following 
minlen = 10000
maxlen = 0
for i in range(len(following_true)):
    if len(following_true[i]) < minlen:
        minlen = len(following_true[i])
    if len(following_true[i]) > maxlen:
        maxlen = len(following_true[i])
print('Min number of followings ',minlen)
print('Max number of followings ',maxlen)

Min number of followings  5
Max number of followings  34


# Training data and Testing data

In [8]:
user_idx = [i for i in range(len(user_following))]
#test_idx is the number of user for testing
test_idx = random.sample(user_idx,200)

In [9]:
# Training  and Testing
train_t = [0]*(len(user_following))
train_f = [0]*(len(user_following))
# Testing 
test_t = [0]*200
test_f = [0]*200
test_pos = -1

for i in range(len(user_following)):
    t_for_train = []
    f_for_train = []
    if i not in test_idx: #if not in test id, just append it to true or false list
        for j in range(88):
            if user_following[i][j] == 1:
                t_for_train.append(j)
            else:
                f_for_train.append(j)
        train_t[i] = t_for_train
        train_f[i] = f_for_train
        
    else: #if in test id, choose 2 true and other 
        test_pos += 1
        temp_t = []
        temp_f = []
        for j in range(88):
            if user_following[i][j] == 1:
                temp_t.append(j)
            else:
                temp_f.append(j)
        # random choose 2 true and 8 false for test 
        t_for_test = random.sample(temp_t,2)
        f_for_test  = random.sample(temp_f,8)
        test_t[test_pos] = t_for_test
        test_f[test_pos] = f_for_test
        
        #other for training
        t_for_train = [item for item in temp_t if not item in t_for_test]
        f_for_train = [item for item in temp_f if not item in f_for_test]
        train_t[i] = t_for_train
        train_f[i] = f_for_train

In [10]:
# train_t[i] 代表的是user i positive feedback
print('The length of train_t:',len(train_t))
print('The length of train_f:',len(train_t))
print('The length of test_t:',len(test_t))
print('The length of test_f:',len(test_f))

The length of train_t: 1489
The length of train_f: 1489
The length of test_t: 200
The length of test_f: 200


In [11]:
user_aux_size = [len(train_t[i]) for i in range(len(train_t))]
len(user_aux_size)

1489

# Recommendation  Module

In [12]:
"""
n: the number of users
m: the number of YouTubers
k: latent dims
l: feature dims
"""
n = 1489 
m = 88  
k = 32
l = 2048 

user = tf.placeholder(tf.int32,shape=(1,))
i = tf.placeholder(tf.int32, shape=(1,))
j = tf.placeholder(tf.int32, shape=(1,))

#多少個auxliary 
xf = tf.placeholder(tf.float32, shape=(None,l))
l_id = tf.placeholder(tf.int32, shape=(None,))
l_id_len = tf.placeholder(tf.int32,shape=(1,))
r = tf.placeholder(tf.float32,shape=(None,))


image_i = tf.placeholder(tf.float32, shape=(1,l))
image_j = tf.placeholder(tf.float32, shape=(1,l))

"""
r3,_auc, _loss,_=sess.run([a_list_soft,auc,loss,train_op], feed_dict={user: [z],
                                        i: [ta], j: [b], xf: yes , l_id:sample,l_id_len:len(sample),r:r_3,
                                        image_i:image_1,image_j:image_2})
"""

with tf.variable_scope("item_level"):
    user_latent = tf.get_variable("user_latent", [n, k],
                                      initializer=tf.random_normal_initializer(0,0.1,seed=3))
    item_latent = tf.get_variable("item_latent", [m, k],
                                      initializer=tf.random_normal_initializer(0,0.1,seed=3)) 
    aux_item = tf.get_variable("aux_item", [m, k],
                                      initializer=tf.random_normal_initializer(0,0.1,seed=3))
    W1 = tf.get_variable("W1", [n, k],
                                      initializer=tf.contrib.layers.xavier_initializer())
    Wu = tf.get_variable("Wu", [n,k,k],
                                      initializer=tf.contrib.layers.xavier_initializer())
    Wy = tf.get_variable("Wy", [n,k,k],
                                     initializer=tf.contrib.layers.xavier_initializer())
    Wa = tf.get_variable("Wa", [n,k,k],
                                     initializer=tf.contrib.layers.xavier_initializer())
    Wv = tf.get_variable("Wv", [n,k,l],
                                    initializer=tf.contrib.layers.xavier_initializer())
    
    

    aux_new = tf.get_variable("aux_new", [1,k], initializer=tf.constant_initializer(0.0))
    ########## Error part, how to get auxisize dynamically
    ####aux_size= tf.get_variable(name='aux_size', initializer=l_id.get_shape().as_list()[-1])
    
with tf.variable_scope('feature_level'):
    Beta = tf.get_variable("beta", [n,l],
                             # initializer=tf.contrib.layers.xavier_initializer())
                                     initializer=tf.random_normal_initializer(0.00001,0.000001,seed=10))

#lookup the latent factors by user and id
u = tf.nn.embedding_lookup(user_latent, user) #(1*k) 第幾個user latent factor
vi = tf.nn.embedding_lookup(item_latent, i) 
vj = tf.nn.embedding_lookup(item_latent, j)

w1 = tf.nn.embedding_lookup(W1, user) #(1*k)
wu = tf.squeeze(tf.nn.embedding_lookup(Wu, user)) #(k*k)
wy = tf.squeeze(tf.nn.embedding_lookup(Wy, user)) #(k*k)
wa = tf.squeeze(tf.nn.embedding_lookup(Wa, user)) #(k*k)
wv = tf.squeeze(tf.nn.embedding_lookup(Wv, user)) #(k,l)

beta = tf.nn.embedding_lookup(Beta, user) #user feature latent factor


The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.



In [13]:
a_list=tf.Variable([])
q = tf.constant(0)
def att_cond(q,a_list):
    return tf.less(q,l_id_len[0])
def att_body(q,a_list):
    xfi = tf.expand_dims(xf[q],0) #(1,l)
    
    a_list = tf.concat([a_list,[(tf.matmul( w1, tf.nn.relu( tf.matmul(wu, u, transpose_b=True) +
        tf.matmul(wy, tf.expand_dims(tf.nn.embedding_lookup(item_latent,l_id[q]),0), transpose_b=True) +
        tf.matmul(wa, tf.expand_dims(tf.nn.embedding_lookup(aux_item, l_id[q]),0), transpose_b=True) +
        tf.matmul(wv, xfi, transpose_b=True)))[0][0])*r[q]]],0)
    q += 1
    return q,  a_list

_, a_list = tf.while_loop(att_cond,att_body,[q,a_list],shape_invariants=[q.get_shape(),tf.TensorShape([None])])


"""
for q in range(3): #取l_id個 YouTuber 的 類別
    xfi = tf.expand_dims(xf[q],0) #(1,l)
    a_list.append((tf.matmul( w1, tf.nn.relu( tf.matmul(wu, u, transpose_b=True) +
        tf.matmul(wy, tf.expand_dims(tf.nn.embedding_lookup(item_latent,l_id[q]),0), transpose_b=True) +
        tf.matmul(wa, tf.expand_dims(tf.nn.embedding_lookup(aux_item, l_id[q]),0), transpose_b=True) +
        tf.matmul(wv, xfi, transpose_b=True)))[0][0])*r[q])
"""
a_list_soft=tf.nn.softmax(a_list)


aux_np = tf.expand_dims(tf.zeros(32),0) #dimension (1,32)
q = tf.constant(0)
def sum_att_cond(q,aux_np):
    return tf.less(q,l_id_len[0])

def sum_att_body(q,aux_np):
    #aux_np+=a_list_soft[q]*tf.expand_dims(tf.nn.embedding_lookup(aux_item, l_id[q]),0)
    aux_np = tf.math.add_n([aux_np,a_list_soft[q]*tf.expand_dims(tf.nn.embedding_lookup(aux_item, l_id[q]),0)])  # [[7, 16], [10, 25]]
    q += 1
    return q, aux_np

_,aux_np = tf.while_loop(sum_att_cond,sum_att_body,[q,aux_np])

"""
for q in range(3): #取q個auxliary item
    aux_np+=a_list_soft[q]*tf.expand_dims(tf.nn.embedding_lookup(aux_item, l_id[q]),0)
"""


aux_np+=u #user_latent factor + sum (alpha*auxilary)
aux_new=tf.assign(aux_new,aux_np) #把aux_new 的 值變成aux_np

#矩陣中對應函數各自相乘
xui = tf.matmul(aux_new, vi, transpose_b=True)+ tf.matmul(beta,image_i, transpose_b=True)
xuj = tf.matmul(aux_new, vj, transpose_b=True)+ tf.matmul(beta,image_j, transpose_b=True)

xuij = xui- xuj

l2_norm = tf.add_n([
            0.001 * tf.reduce_sum(tf.multiply(u, u)),
            0.001 * tf.reduce_sum(tf.multiply(vi, vi)),
            0.001 * tf.reduce_sum(tf.multiply(vj, vj)),
  
            0.001 * tf.reduce_sum(tf.multiply(w1, w1)),
            0.001 * tf.reduce_sum(tf.multiply(wu, wu)),
            0.001 * tf.reduce_sum(tf.multiply(wy, wy)),
            0.001 * tf.reduce_sum(tf.multiply(wa, wa)),
            0.001 * tf.reduce_sum(tf.multiply(wv,wv)),
            
            0.1 * tf.reduce_sum(tf.multiply(beta,beta)),
            
          ])

loss = l2_norm -tf.log(tf.sigmoid(xuij)) # objective funtion
train_op = tf.train.AdamOptimizer(learning_rate=0.0001).minimize(loss) #parameter optimize 
auc = tf.reduce_mean(tf.to_float(xuij > 0))

Instructions for updating:
Use `tf.cast` instead.


In [15]:
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)

t0=time.time()

#use_true=init_list_of_objects(136)
#use_test=init_list_of_objects(136)

train_pair_t=[] #positive feedback
train_pair_f=[] #negative feedback
train_yes_id=[] 
for q in range(10):
    print('Iteraction:',q)
    train_auc=0
    total_loss=0
    xuij_auc=0
    length = 0
    for z in range(1489):
        """
        yes 用來存放選擇到的YouTuber feature (for auxilary)
        yesr 用來存放user對該YouTuber的喜好程度(user_category 跟 YouTuber_category的相似性)
        r_3 用來存放user 對該YouTuber種類的偏好(取max)
        """
        yes=[]
        yesr=[]
        r_3=np.zeros(len(train_t[z])) 
        
        #這裡不知道怎麼讓3變成變動型的長度
        sample=random.sample(train_t[z],len(train_t[z])) #隨機選3個sample true's YouTuber
        train_yes_id.append(sample) #sample全部丟進去
        
        for k in range(len(sample)):
            yes.append(image_2048[sample[k]])
            yesr.append(YouTuber_category[sample[k]]*user_category_norm[z])
            #print('YouTuber_category ', YouTuber_category[sample[k]])
            #print('User_category ',user_category_norm[z])
        for k in range(len(sample)):
            r_3[k]=max(yesr[k])
        
        yes=np.array(yes)
        #print('user shape should be ',np.array([z]).shape)
        #print('xf shape should be ',yes.shape)
        #print('r shape should be ',np.array(r_3).shape)
        #print('l_id shape should be ',np.array(sample).shape)
        
        #not_used_list = list(set(train_t[z]).difference(set(sample)))
        
        train_t_sample = random.sample(train_t[z],2)
        #print('number of positive feedback', len(train_t[z]))
        for ta in train_t_sample:
            #ta=random.choice(train_t[z]) #ta is true positve photo
            train_pair_t.append(ta)
            image_1=np.expand_dims(image_2048[ta],0) #(1,2048)
            #print('Image_1 shape ',image_1.shape)
            train_f_sample = random.sample(train_f[z],10)
            for b in train_f_sample:
                #print('likes:',ta,';Not likes:',b)
                #b=random.choice(train_f[z])  #b is no feedback photo
                train_pair_f.append(b)
                image_2=np.expand_dims(image_2048[b],0) #(1,2048)
                #print('Image_2 shape',image_2.shape)
            
                #use_test[z].append(b)
                r3,_auc, _loss,_=sess.run([a_list_soft,auc,loss,train_op], feed_dict={user: [z],
                                        i: [ta], j: [b], xf: yes , l_id:sample, l_id_len:[len(sample)],r:r_3,
                                        image_i:image_1,image_j:image_2})
                #print(XUIJ)
                #print('loss=',_loss)
                #print('auc=',_auc)
                #print('sub r3:',r3)
                train_auc+=_auc
                total_loss+=_loss
                length += 1
            #now1+=1
    
    #print('mine:',xuij_auc/136)    
    #print('a_list_soft:',r3)
    print("total_loss:-----------------", total_loss/length)
    print("train_auc:-------------------", train_auc/length)
    print('time:',time.time()-t0,' sec')
print('Total cost ',time.time()-t0,' sec')       

Iteraction: 0
total_loss:----------------- [[0.43658698]]
train_auc:------------------- 0.898556077904634
time: 595.3776261806488  sec
Iteraction: 1
total_loss:----------------- [[0.29628184]]
train_auc:------------------- 0.8846541302887844
time: 1192.8945577144623  sec
Iteraction: 2
total_loss:----------------- [[0.26666075]]
train_auc:------------------- 0.9013096037609134
time: 1790.4275691509247  sec
Iteraction: 3
total_loss:----------------- [[0.23308463]]
train_auc:------------------- 0.9183344526527871
time: 2388.032740831375  sec
Iteraction: 4
total_loss:----------------- [[0.22402173]]
train_auc:------------------- 0.9225319006044325
time: 2985.543319940567  sec
Iteraction: 5
total_loss:----------------- [[0.22249722]]
train_auc:------------------- 0.9270651443922096
time: 3583.111857175827  sec
Iteraction: 6
total_loss:----------------- [[0.20726994]]
train_auc:------------------- 0.9351578240429819
time: 4180.742301225662  sec
Iteraction: 7
total_loss:----------------- [[0.

# Get latent factor and Each weight

In [16]:
U, Y, A, A1, Au, Ay, Aa, Av,B =sess.run([user_latent, item_latent, aux_item, W1, Wu, Wy, Wa, Wv,Beta])

In [17]:
print('User latent shape: ',U.shape)
print('photo latent shape: ', Y.shape)
print('Auxilary latent shape: ',A.shape)
print('W1 weight shape: ',A1.shape)
print('Wu weight shape:',Au.shape)
print('Wy weight shape:', Ay.shape)
print('Wa weight shape:',Aa.shape)
print('Wv weight shape:',Av.shape)
print('Beta shape:',B.shape)

User latent shape:  (1489, 32)
photo latent shape:  (88, 32)
Auxilary latent shape:  (88, 32)
W1 weight shape:  (1489, 32)
Wu weight shape: (1489, 32, 32)
Wy weight shape: (1489, 32, 32)
Wa weight shape: (1489, 32, 32)
Wv weight shape: (1489, 32, 2048)
Beta shape: (1489, 2048)


# Testing Part

In [18]:
result=np.zeros((200,88))
RS=np.zeros((200,88))
#test_idx --> Test 的 index

test_yes_id=[]
for s in range(200):
    print(s,test_idx[s])

    yes=[]
    sample=random.sample(train_t[test_idx[s]],len(train_t[test_idx[s]])) #從training part 的positive feedback 取出YouTuber 當成Auxilary
    #sample=result_yes_id[now]
    test_yes_id.append(sample)
    alpha=np.zeros([len(sample)])
    
    for a in range(len(sample)):
        r =np.max(YouTuber_category[sample[a]]*user_category_norm[test_idx[s]]) #sample a 的category vec *user_category vec
    
        alpha[a]=np.dot(A1[test_idx[s]],(relu(np.dot(Au[test_idx[s]],np.expand_dims(U[test_idx[s]],0).T)+np.dot(Ay[test_idx[s]],np.expand_dims(Y[sample[a]],0).T)+np.dot(Aa[test_idx[s]],
                            np.expand_dims(A[sample[a]],0).T)+ np.dot(Av[test_idx[s]],np.expand_dims(image_2048[sample[a]],0).T))))*r
    mul=np.zeros((1,32))
    #print('alpha------------',alpha)
    print('softmax alpha--------------',softmax(alpha))
    for i in range(len(sample)):
        mul+=softmax(alpha)[i]*A[sample[i]] #attention alpha*Ai part 
    new_mul=mul+U[test_idx[s]]  #(U+auxilary)
    for k in range(88):
        result[s][k]=np.dot(new_mul,Y[k].T) #(U+auxilary)*photo latent factor
        RS[s][k] = np.dot(new_mul,Y[k].T)+np.dot(B[test_idx[s]], image_2048[k].T)
print(RS[s])

0 619
softmax alpha-------------- [0.11111174 0.11110595 0.11111175 0.11111766 0.11110977 0.11111171
 0.11110908 0.11111006 0.11111229]
1 175
softmax alpha-------------- [0.25000715 0.24999735 0.24999637 0.24999914]
2 1076
softmax alpha-------------- [0.09999564 0.10000095 0.09999904 0.10000294 0.10000369 0.09999825
 0.10000273 0.09999887 0.09999921 0.09999867]
3 877
softmax alpha-------------- [0.12500227 0.12500127 0.12500473 0.12499921 0.12498982 0.12499717
 0.12500285 0.12500268]
4 694
softmax alpha-------------- [0.11110773 0.11110786 0.11110967 0.11111604 0.11111938 0.11110753
 0.11111666 0.11110771 0.11110741]
5 239
softmax alpha-------------- [0.12500096 0.12499863 0.12500346 0.12498898 0.12499013 0.12500187
 0.12500588 0.12501008]
6 432
softmax alpha-------------- [0.12499704 0.12499984 0.1250024  0.12499848 0.12499954 0.12499947
 0.12499949 0.12500374]
7 1072
softmax alpha-------------- [0.07692188 0.07692309 0.07692399 0.07692354 0.0769236  0.07692132
 0.07692231 0.07692379 

 0.09090894 0.09090928 0.09090787 0.09090983 0.09090757]
99 1468
softmax alpha-------------- [0.16666319 0.16666594 0.16666926 0.16666297 0.16667045 0.16666819]
100 1110
softmax alpha-------------- [0.19999581 0.19999916 0.20000756 0.20000246 0.19999502]
101 689
softmax alpha-------------- [0.09091136 0.09090699 0.09090686 0.09091259 0.09091094 0.09091005
 0.09091004 0.09091062 0.09090683 0.09090684 0.09090688]
102 769
softmax alpha-------------- [0.16666355 0.1666799  0.16666349 0.16666404 0.16666444 0.16666458]
103 601
softmax alpha-------------- [0.11111148 0.1111127  0.11111152 0.11111112 0.11111214 0.11111042
 0.11110821 0.1111114  0.11111099]
104 799
softmax alpha-------------- [0.14285073 0.14285998 0.14285942 0.14286164 0.1428538  0.14285069
 0.14286375]
105 440
softmax alpha-------------- [0.08333501 0.08333137 0.08333256 0.08333217 0.08333383 0.08333357
 0.08333313 0.08333272 0.08333428 0.08333313 0.08333535 0.08333288]
106 628
softmax alpha-------------- [0.09999899 0.099997

199 494
softmax alpha-------------- [0.12500535 0.1250055  0.12500527 0.12499555 0.12499539 0.12499552
 0.12500504 0.12499238]
[2.87535014 1.75057218 2.69432993 4.85072411 3.12099056 3.13097162
 1.63191052 4.01333912 5.56458614 0.765456   3.67157739 3.97191017
 3.24378358 2.4253219  6.23240707 4.58837329 1.3892063  4.31815865
 3.30904709 4.94784735 1.6276781  3.72509614 2.14981532 0.70450129
 6.58090356 4.70395902 4.21040021 4.29922449 5.50120153 5.95975051
 5.23532166 1.70656795 3.21524818 2.21622504 0.08978345 5.81256104
 3.80021296 2.53248812 2.58190951 2.44506777 4.60414938 6.62539436
 3.03918864 7.5274398  1.15056083 4.87520306 2.92885649 2.59561959
 2.88269401 5.081179   3.81385504 3.75424073 2.44872233 2.706339
 4.5526099  2.64593648 4.09560541 6.6507663  2.63925389 2.09603226
 3.83591531 3.70843931 3.17565661 4.7821282  1.55462828 0.89119548
 4.31827927 8.13430222 4.20360525 2.7739807  6.03290588 6.44507164
 1.58130232 2.43416964 4.64143474 4.62135675 4.43603132 1.61329926
 6.0

In [19]:
#取出test的資料
testRS = np.zeros((200,10)) #shape 200*10

#test_t 是true的
#test_f 是false的
        
for z in range(200):
    user_id = test_idx[z]
    #positive target YouTuber list
    youtube_t = test_t[z] 
    #not target YouTuber list
    youtube_f = test_f[z]
    
    #前兩個放target的RS
    for i in range(len(youtube_t)):
        testRS[z][i] = RS[z][youtube_t[i]]
    for i in range(len(youtube_f)):
        testRS[z][i+len(youtube_t)] = RS[z][youtube_f[i]]


In [20]:
def topN(sortlist,n):
    topList = []
    for i in range(n):
        topList.append(sortlist.index(max(sortlist)))
        #print(max(sortlist))
        #print(sortlist.index(max(sortlist)))
        sortlist[sortlist.index(max(sortlist))] = -1000000000
    return topList

In [21]:
count_0_all = []
for i in range(len(testRS)):
    top_0 = topN(list(testRS[i]),2)
    count_0_all.append(top_0)
    #print(top_0)

acc_0 = 0
total = len(count_0_all)*len(count_0_all[0])
#print(total) #(200*2)
for i in range(len(count_0_all)):
    for j in range(len(count_0_all[i])):
        if count_0_all[i][j] < 2: #代表是0或1 (也就是target)
            acc_0 += 1
#print(acc_0)
avg_acc = acc_0/total
print('avg_accuarcy for count_0:',avg_acc)

avg_accuarcy for count_0: 0.32
