In [1]:
import os
import pandas as pd
import numpy as np
import random
import time
import tensorflow as tf
import math
from IPython.display import clear_output

In [2]:
def relu(x):
    return np.maximum(0,x)  

def softmax(x):
    exp_x = np.exp(x)
    softmax_x = exp_x / np.sum(exp_x)
    return softmax_x 

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

### Load numpy array

In [3]:
text_npy = np.load('./npy/text.npy')
poster_npy = np.load('./npy/poster.npy')
IGimg_npy = np.load('./npy/IGimg.npy')
movie_genre = np.load('./npy/movie_genre.npy')
usr_following = np.load('./npy/user_followings.npy')
usr_genre = np.load('./npy/user_genre.npy')

print('Text:', text_npy.shape)
print('Poster:', poster_npy.shape)
print('IGimg:', IGimg_npy.shape)
print('Movie genre:', movie_genre.shape)
print('User following:', usr_following.shape)
print('User genre:', usr_genre.shape)

Text: (166, 300)
Poster: (166, 256)
IGimg: (166, 256)
Movie genre: (166, 20)
User following: (2020, 166)
User genre: (2020, 20)


### Normalize usr_genre

In [4]:
usr_genre_norm = np.zeros(usr_genre.shape)
for i in range(len(usr_genre)):
    usr_genre_norm[i] = usr_genre[i]/np.max(usr_genre[i])
print(usr_genre_norm.shape)

(2020, 20)


In [5]:
print('Before:', usr_genre)
print('After:', usr_genre_norm)

Before: [[2 1 0 ... 1 0 0]
 [0 0 0 ... 1 0 1]
 [3 7 4 ... 0 0 0]
 ...
 [5 3 0 ... 1 1 0]
 [2 2 0 ... 0 1 0]
 [2 1 0 ... 1 1 0]]
After: [[0.22222222 0.11111111 0.         ... 0.11111111 0.         0.        ]
 [0.         0.         0.         ... 0.125      0.         0.125     ]
 [0.33333333 0.77777778 0.44444444 ... 0.         0.         0.        ]
 ...
 [0.26315789 0.15789474 0.         ... 0.05263158 0.05263158 0.        ]
 [0.28571429 0.28571429 0.         ... 0.         0.14285714 0.        ]
 [0.22222222 0.11111111 0.         ... 0.11111111 0.11111111 0.        ]]


# Training & testing split

### Setup 

In [6]:
usr_test_amount = 200 # int(usr_npy.shape[0] * 0.1)
movie_test_amount = 30 # int(movie_npy.shape[0] * 0.2)

print(usr_test_amount, movie_test_amount)

200 30


In [7]:
usr_idx = [i for i in range(len(usr_following))]
print(len(usr_idx))

test_idx = random.sample(usr_idx, usr_test_amount)
print(len(test_idx))

2020
200


In [8]:
#Training
train_t = [0]*(len(usr_following))
train_f = [0]*(len(usr_following))
# Testing 
test_t = [0]*usr_test_amount
test_f = [0]*usr_test_amount
test_pos = -1

for i in range(len(usr_following)):
    
    t_for_train = []
    f_for_train = []
    if i not in test_idx: #if not in test id, just append it to true or false list
        for j in range(166):
            if usr_following[i][j] == 1:
                t_for_train.append(j)
            else:
                f_for_train.append(j)
        train_t[i] = t_for_train
        train_f[i] = f_for_train
        
    else: #if in test id, choose 2 true and other 
        test_pos += 1
        temp_t = []
        temp_f = []
        
        for j in range(166):
            
            if usr_following[i][j] == 1:
                temp_t.append(j)
            else:
                temp_f.append(j)
        
        # random choose 2 true and 8 false for test 
        t_for_test = random.sample(temp_t, 2)
        f_for_test  = random.sample(temp_f, 8)
        test_t[test_pos] = t_for_test
        test_f[test_pos] = f_for_test
        
        #other for training
        t_for_train = [item for item in temp_t if not item in t_for_test]
        f_for_train = [item for item in temp_f if not item in f_for_test]
        train_t[i] = t_for_train
        train_f[i] = f_for_train

In [9]:
# train_t[i] 代表的是user i positive feedback
print('The length of train_t:',len(train_t))
print('The length of train_f:',len(train_t))
print('The length of test_t:',len(test_t))
print('The length of test_f:',len(test_f))

The length of train_t: 2020
The length of train_f: 2020
The length of test_t: 200
The length of test_f: 200


# Recommendation model

In [11]:
usr_nb = len(usr_following) # the number of users
movie_nb = len(movie_genre)  # the number of movies
laten_dim = 32 # latent dims
ft_dim = 256 # feature dims

In [12]:
user = tf.placeholder(tf.int32,[None])
i = tf.placeholder(tf.int32, [None])
j = tf.placeholder(tf.int32, [None])

#多少個auxliary 
xf = tf.placeholder(tf.float32, [3, ft_dim])
l_id = tf.placeholder(tf.int32, [3])
r = tf.placeholder(tf.float32,[3])

image_i = tf.placeholder(tf.float32, [1, ft_dim])
image_j = tf.placeholder(tf.float32, [1, ft_dim])

with tf.variable_scope("item_level"):
    user_latent = tf.get_variable("user_latent", [usr_nb, laten_dim],
                                  initializer=tf.random_normal_initializer(0,0.1,seed=3))
    item_latent = tf.get_variable("item_latent", [movie_nb, laten_dim],
                                  initializer=tf.random_normal_initializer(0,0.1,seed=3)) 
    aux_item = tf.get_variable("aux_item", [movie_nb, laten_dim],
                               initializer=tf.random_normal_initializer(0,0.1,seed=3))
    W1 = tf.get_variable("W1", [usr_nb, laten_dim],
                         initializer=tf.contrib.layers.xavier_initializer())
    Wu = tf.get_variable("Wu", [usr_nb,laten_dim,laten_dim], 
                         initializer=tf.contrib.layers.xavier_initializer()) #所有的user 都共用一個權重
    Wa = tf.get_variable("Wa", [usr_nb, laten_dim, laten_dim],
                         initializer=tf.contrib.layers.xavier_initializer())
    Wm = tf.get_variable("Wy", [usr_nb, laten_dim, laten_dim],
                         initializer=tf.contrib.layers.xavier_initializer()) #不同的電影有不同的權重
    Wv = tf.get_variable("Wv", [usr_nb, laten_dim, ft_dim],
                         initializer=tf.contrib.layers.xavier_initializer())
    
    
    aux_new = tf.get_variable("aux_new", [1, laten_dim], initializer=tf.constant_initializer(0.0))
    ########## Error part, how to get auxisize dynamically
    ####aux_size= tf.get_variable(name='aux_size', initializer=l_id.get_shape().as_list()[-1])
    
with tf.variable_scope('feature_level'):
    Beta = tf.get_variable("beta", [usr_nb, ft_dim],
                           initializer=tf.random_normal_initializer(0.00001,0.000001,seed=10))

#lookup the latent factors by user and id
u = tf.nn.embedding_lookup(user_latent, user) #(1*k) 第幾個user latent factor
vi = tf.nn.embedding_lookup(item_latent, i)
vj = tf.nn.embedding_lookup(item_latent, j)

#取消 Weight 共用
w1 = tf.nn.embedding_lookup(W1, user) #(1*k)
wu = tf.squeeze(tf.nn.embedding_lookup(Wu, user)) #(k*k)
wa = tf.squeeze(tf.nn.embedding_lookup(Wa, user)) #(k*k)
wm = tf.squeeze(tf.nn.embedding_lookup(Wm, user)) #(k*k)
wv = tf.squeeze(tf.nn.embedding_lookup(Wv, user)) #(k,l)

beta = tf.nn.embedding_lookup(Beta, user) #user feature latent factor

Instructions for updating:
Colocations handled automatically by placer.

For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
If you depend on functionality not listed there, please file an issue.



In [13]:
a_list=[]

for q in range(3): #取l_id個 YouTuber 的 類別
    xfi = tf.expand_dims(xf[q],0) #(1,l)
    a_list.append((tf.matmul( w1, tf.nn.relu( tf.matmul(wu, u, transpose_b=True) +
        tf.matmul(wm, tf.expand_dims(tf.nn.embedding_lookup(item_latent,l_id[q]),0), transpose_b=True) +
        tf.matmul(wa, tf.expand_dims(tf.nn.embedding_lookup(aux_item, l_id[q]),0), transpose_b=True) +
        tf.matmul(wv, xfi, transpose_b=True)))[0][0])*r[q])
        
                                          
a_list_soft=tf.nn.softmax(a_list)
#print(sess.run(a_list_soft))
aux_np = np.zeros(laten_dim)

#改成while
for q in range(3): #取q個auxliary item
    aux_np+=a_list_soft[q]*tf.expand_dims(tf.nn.embedding_lookup(aux_item, l_id[q]),0)

aux_np+=u #user_latent factor + sum (alpha*auxilary)
aux_new=tf.assign(aux_new,aux_np) #把aux_new 的 值變成aux_np

#矩陣中對應函數各自相乘
xui = tf.matmul(aux_new, vi, transpose_b=True)+ tf.matmul(beta,image_i, transpose_b=True)
xuj = tf.matmul(aux_new, vj, transpose_b=True)+ tf.matmul(beta,image_j, transpose_b=True)

xuij = xui- xuj

l2_norm = tf.add_n([
            0.001 * tf.reduce_sum(tf.multiply(u, u)),
            0.001 * tf.reduce_sum(tf.multiply(vi, vi)),
            0.001 * tf.reduce_sum(tf.multiply(vj, vj)),
  
            0.001 * tf.reduce_sum(tf.multiply(w1, w1)),
            0.001 * tf.reduce_sum(tf.multiply(wu, wu)),
            0.001 * tf.reduce_sum(tf.multiply(wm, wm)),
            0.001 * tf.reduce_sum(tf.multiply(wa, wa)),
            0.001 * tf.reduce_sum(tf.multiply(wv,wv)),
            
            0.1 * tf.reduce_sum(tf.multiply(beta,beta)),
            
          ])

loss = l2_norm -tf.log(tf.sigmoid(xuij)) # objective funtion
train_op = tf.train.AdamOptimizer(learning_rate=0.0001).minimize(loss) #parameter optimize 
auc = tf.reduce_mean(tf.to_float(xuij > 0))

Instructions for updating:
Use tf.cast instead.
Instructions for updating:
Use tf.cast instead.


In [15]:
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)
loss_acc_list = []
t0 = time.time()

train_pair_t = [] #positive feedback
train_pair_f = [] #negative feedback
train_yes_id = [] 
for q in range(10):
    print('Iteraction:',q)
    train_auc = 0
    total_loss = 0
    xuij_auc = 0
    length = 0
    
    for z in range(usr_nb):
        """
        yes 用來存放選擇到的YouTuber feature (for auxilary)
        yesr 用來存放user對該YouTuber的喜好程度(user_category 跟 YouTuber_category的相似性)
        r_3 用來存放user 對該YouTuber種類的偏好(取max)
        """
        yes=[]
        yesr=[]
        r_3=np.zeros(3) 
        
        #這裡不知道怎麼讓3變成變動型的長度
        sample=random.sample(train_t[z],3) #隨機選3個sample true's YouTuber
        train_yes_id.append(sample) #sample全部丟進去
        
        for k in range(len(sample)):
            yes.append(IGimg_npy[sample[k]])
            yesr.append(movie_genre[sample[k]]*usr_genre_norm[z])
#             print('movie_genre:', movie_genre[sample[k]])
#             print('usr_genre_norm:',usr_genre_norm[z])
            
        for k in range(3):
            r_3[k]=max(yesr[k])
        yes=np.array(yes)
        
        not_used_list = list(set(train_t[z]).difference(set(sample)))
        
        train_t_sample = random.sample(train_t[z],2)
        #print('number of positive feedback', len(train_t[z]))
        for ta in train_t_sample:
            #ta=random.choice(train_t[z]) #ta is true positve photo
            train_pair_t.append(ta)
            image_1=np.expand_dims(IGimg_npy[ta],0)
            
            train_f_sample = random.sample(train_f[z],10)
            for b in train_f_sample:
                #print('likes:',ta,';Not likes:',b)
                #b=random.choice(train_f[z])  #b is no feedback photo
                train_pair_f.append(b)
                image_2=np.expand_dims(IGimg_npy[b],0)
                #print('Image_2',image_2.shape)
            
                #use_test[z].append(b)
                r3,_auc, _loss,_=sess.run([a_list_soft,auc,loss,train_op], 
                                          feed_dict={user: [z], i: [ta], j: [b],
                                                     xf: yes , l_id:sample,r:r_3,
                                                     image_i:image_1,image_j:image_2})
                #print(XUIJ)
                #print('loss=',_loss)
                #print('auc=',_auc)
                #print('sub r3:',r3)
                train_auc+=_auc
                total_loss+=_loss
                length += 1
            #now1+=1
    
    #print('mine:',xuij_auc/136)    
    #print('a_list_soft:',r3)
    print("total_loss:-----------------", total_loss/length)
    print("train_auc:-------------------", train_auc/length)
    print('time:',time.time()-t0,' sec')
print('Total cost ',time.time()-t0,' sec')

Iteraction: 0
total_loss:----------------- [[0.56588656]]
train_auc:------------------- 0.8188613861386138
time: 270.62279629707336  sec
Iteraction: 1
total_loss:----------------- [[0.48105934]]
train_auc:------------------- 0.7844306930693069
time: 536.0553126335144  sec
Iteraction: 2
total_loss:----------------- [[0.4441793]]
train_auc:------------------- 0.8003217821782178
time: 805.3530004024506  sec
Iteraction: 3
total_loss:----------------- [[0.4165572]]
train_auc:------------------- 0.8174009900990099
time: 1077.22527718544  sec
Iteraction: 4
total_loss:----------------- [[0.40781003]]
train_auc:------------------- 0.8221782178217821
time: 1344.3898437023163  sec
Iteraction: 5
total_loss:----------------- [[0.39214453]]
train_auc:------------------- 0.8309653465346535
time: 1609.7548031806946  sec
Iteraction: 6
total_loss:----------------- [[0.38956276]]
train_auc:------------------- 0.8328217821782178
time: 1875.3597333431244  sec
Iteraction: 7
total_loss:----------------- [[0.

# Get latent factor and Each weight

In [16]:
U, M, A, A1, Au, Am, Aa, Av,B = sess.run([user_latent, item_latent, aux_item, W1, Wu, Wm, Wa, Wv,Beta])

In [17]:
print('User latent shape: ',U.shape)
print('photo latent shape: ', Y.shape)
print('Auxilary latent shape: ',A.shape)
print('W1 weight shape: ',A1.shape)
print('Wu weight shape:',Au.shape)
print('Wy weight shape:', Ay.shape)
print('Wa weight shape:',Aa.shape)
print('Wv weight shape:',Av.shape)
print('Beta shape:',B.shape)

User latent shape:  (2020, 32)
photo latent shape:  (166, 32)
Auxilary latent shape:  (166, 32)
W1 weight shape:  (2020, 32)
Wu weight shape: (2020, 32, 32)
Wy weight shape: (2020, 32, 32)
Wa weight shape: (2020, 32, 32)
Wv weight shape: (2020, 32, 256)
Beta shape: (2020, 256)


# Testing Part

In [21]:
result=np.zeros((200, 166))
RS=np.zeros((200, 166))
#test_idx --> Test 的 index

test_yes_id=[]
for s in range(200):
    print(s,test_idx[s])

    yes=[]
    sample=random.sample(train_t[test_idx[s]],len(train_t[test_idx[s]])) #從training part 的positive feedback 取出YouTuber 當成Auxilary
    #sample=result_yes_id[now]
    test_yes_id.append(sample)
    alpha=np.zeros([len(sample)])
    
    for a in range(len(sample)):
        r =np.max(movie_genre[sample[a]] * usr_genre_norm[test_idx[s]]) #sample a 的category vec *user_category vec
    
        alpha[a]=np.dot(A1[test_idx[s]],(relu(np.dot(Au[test_idx[s]],np.expand_dims(U[test_idx[s]],0).T)+np.dot(Ay[test_idx[s]],np.expand_dims(Y[sample[a]],0).T)+np.dot(Aa[test_idx[s]],
                            np.expand_dims(A[sample[a]],0).T)+ np.dot(Av[test_idx[s]],np.expand_dims(IGimg_npy[sample[a]],0).T))))*r
    mul=np.zeros((1,32))
    #print('alpha------------',alpha)
    print('softmax alpha--------------',softmax(alpha))
    for i in range(len(sample)):
        mul+=softmax(alpha)[i]*A[sample[i]] #attention alpha*Ai part 
    new_mul=mul+U[test_idx[s]]  #(U+auxilary)
    for k in range(166):
        result[s][k]=np.dot(new_mul,Y[k].T) #(U+auxilary)*photo latent factor
        RS[s][k] = np.dot(new_mul,Y[k].T)+np.dot(B[test_idx[s]], IGimg_npy[k].T)
print(RS[s])

0 1613
softmax alpha-------------- [0.09091353 0.09090975 0.09091393 0.09090153 0.09089638 0.09090909
 0.09092979 0.09091765 0.0908948  0.09089801 0.09091555]
1 976
softmax alpha-------------- [0.09999805 0.10000009 0.09999888 0.09999762 0.10000073 0.0999969
 0.10000392 0.10000327 0.09999952 0.10000101]
2 1701
softmax alpha-------------- [0.09999832 0.10000116 0.09999801 0.10000443 0.10000037 0.10000392
 0.09999626 0.10000179 0.09999676 0.09999898]
3 1579
softmax alpha-------------- [0.11111152 0.11111348 0.1111069  0.11112669 0.11110689 0.11110431
 0.11111232 0.11109956 0.11111832]
4 437
softmax alpha-------------- [0.07692758 0.07692198 0.0769213  0.07692369 0.07692378 0.07692322
 0.07692234 0.0769224  0.0769235  0.07691917 0.07692347 0.07692073
 0.07692683]
5 1687
softmax alpha-------------- [0.08332684 0.08332971 0.08333831 0.08333145 0.08333192 0.08333428
 0.08333216 0.08333142 0.08333777 0.08333464 0.08333532 0.08333618]
6 1760
softmax alpha-------------- [0.11111169 0.11111781 0

In [22]:
#取出test的資料
testRS = np.zeros((200,10)) #shape 200*10

#test_t 是true的
#test_f 是false的
        
for z in range(200):
    user_id = test_idx[z]
    #positive target YouTuber list
    youtube_t = test_t[z] 
    #not target YouTuber list
    youtube_f = test_f[z]
    
    #前兩個放target的RS
    for i in range(len(youtube_t)):
        testRS[z][i] = RS[z][youtube_t[i]]
    for i in range(len(youtube_f)):
        testRS[z][i+len(youtube_t)] = RS[z][youtube_f[i]]

In [23]:
def topN(sortlist,n):
    topList = []
    for i in range(n):
        topList.append(sortlist.index(max(sortlist)))
        #print(max(sortlist))
        #print(sortlist.index(max(sortlist)))
        sortlist[sortlist.index(max(sortlist))] = -1000000000
    return topList

In [24]:
count_0_all = []
for i in range(len(testRS)):
    top_0 = topN(list(testRS[i]),2)
    count_0_all.append(top_0)
    #print(top_0)

acc_0 = 0
total = len(count_0_all)*len(count_0_all[0])
#print(total) #(200*2)
for i in range(len(count_0_all)):
    for j in range(len(count_0_all[i])):
        if count_0_all[i][j] < 2: #代表是0或1 (也就是target)
            acc_0 += 1
#print(acc_0)
avg_acc = acc_0/total
print('avg_accuarcy for count_0:',avg_acc)

avg_accuarcy for count_0: 0.2775


# Testing for dynamic length for loop

In [25]:
length = tf.placeholder(tf.int32)
i = tf.constant(0)

while_condition = lambda i: tf.less(i, length)
def body(i):
    # do something here which you want to do in your loop
    # increment i
    return [tf.add(i, 1)]

# do the loop:
output = tf.while_loop(while_condition, body, [i])


init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)
sess.run(output,feed_dict={length:3})

3