In [10]:
import os
import json
import numpy as np
import pandas as pd
import random
from IPython.display import display

from helper import read_json

In [2]:
def relu(x):
    return np.maximum(0,x)  

# Original features

In [3]:
all_npy = np.load('./npy/all_2372.npy')
movie_genre = np.load('./npy/movie_genre.npy')
usr_following = np.load('./npy/user_followings.npy')
usr_genre = np.load('./npy/user_genre_like.npy')

print('All features:', all_npy.shape)
print('Movie genre:', movie_genre.shape)
print('User following:', usr_following.shape)
print('User genre:', usr_genre.shape)

usr_nb = len(usr_following) # the number of users
movie_nb = len(movie_genre)  # the number of movies
print(usr_nb, movie_nb)

usr_test_amount = 150
movie_test_amount = 82
print(usr_test_amount, movie_test_amount)

latent_dim = 64 # latent dims
ft_dim = all_npy.shape[1] # feature dims
print(latent_dim, ft_dim)

usr_genre_norm = np.zeros(usr_genre.shape)
for i in range(len(usr_genre)):
    usr_genre_norm[i] = usr_genre[i]/np.max(usr_genre[i])
print(usr_genre_norm.shape)
print('Before:', usr_genre)
print('After:', usr_genre_norm)

All features: (165, 2372)
Movie genre: (165, 20)
User following: (1582, 165)
User genre: (1582, 20)
1582 165
150 82
64 2372
(1582, 20)
Before: [[ 2  2  0 ...  1  0  0]
 [34 49 14 ...  1  0  0]
 [10 17 17 ...  4  0  0]
 ...
 [79 43  0 ...  8 12  0]
 [ 6  6  0 ...  0  1  0]
 [67 53  1 ...  3  5  0]]
After: [[0.05263158 0.05263158 0.         ... 0.02631579 0.         0.        ]
 [0.41463415 0.59756098 0.17073171 ... 0.01219512 0.         0.        ]
 [0.24390244 0.41463415 0.41463415 ... 0.09756098 0.         0.        ]
 ...
 [0.30620155 0.16666667 0.         ... 0.03100775 0.04651163 0.        ]
 [0.24       0.24       0.         ... 0.         0.04       0.        ]
 [0.74444444 0.58888889 0.01111111 ... 0.03333333 0.05555556 0.        ]]


In [4]:
usr_idx = [i for i in range(len(usr_following))]
print(len(usr_idx))

random.seed(42)
test_idx = sorted(random.sample(usr_idx, usr_test_amount))
print(len(test_idx), test_idx[:10])
# 150 [13, 51, 54, 61, 65, 88, 93, 96, 114, 130]

1582
150 [13, 51, 54, 61, 65, 88, 93, 96, 114, 130]


In [5]:
with open('../orderedListMovies.json', 'r') as openfile: 
    movie_ls = json.load(openfile)
print(len(movie_ls), movie_ls[:10])

with open('../genresDic.json', 'r') as openfile: 
    genresDic = json.load(openfile)
print(len(genresDic))

165 ['21bridgesmovie', '47metersdown', 'abeautifuldaymovie', 'abominablemovie', 'adastramovie', 'adogsjourneymovie', 'aftermathmovie', 'aftermovie', 'ahiddenlifefilm', 'alitamovie']
166


In [6]:
# Load train/test split
SPLIT_PATH = './split/LHO_82_no33/'
train_t = read_json(SPLIT_PATH + 'train_t.json')
train_f = read_json(SPLIT_PATH + 'train_f.json')
test_t = read_json(SPLIT_PATH + 'test_t.json')
test_f = read_json(SPLIT_PATH + 'test_f_82.json')

# Embedding features

In [45]:
SAVE_DIR = './weight/grid/'
files = sorted(os.listdir(SAVE_DIR))
print('files:', files)
SAVE_NAMES = [string for string in files if string.startswith('MRM_E240')]
print(len(SAVE_NAMES), SAVE_NAMES)

files: ['.ipynb_checkpoints', 'MRM_E100.npz', 'MRM_E150.npz', 'MRM_E200.npz', 'MRM_E210.npz', 'MRM_E220.npz', 'MRM_E220_2.npz', 'MRM_E230.npz', 'MRM_E230_2.npz', 'MRM_E230_like.npz', 'MRM_E240.npz', 'MRM_E240_1.npz', 'MRM_E240_11.npz', 'MRM_E240_13.npz', 'MRM_E240_16.npz', 'MRM_E240_17.npz', 'MRM_E240_19.npz', 'MRM_E240_2.npz', 'MRM_E240_20.npz', 'MRM_E240_21.npz', 'MRM_E240_22.npz', 'MRM_E240_23.npz', 'MRM_E240_25.npz', 'MRM_E240_26.npz', 'MRM_E240_27.npz', 'MRM_E240_28.npz', 'MRM_E240_29.npz', 'MRM_E240_3.npz', 'MRM_E240_30.npz', 'MRM_E240_31.npz', 'MRM_E240_32.npz', 'MRM_E240_33.npz', 'MRM_E240_4.npz', 'MRM_E240_5.npz', 'MRM_E240_6.npz', 'MRM_E240_9.npz', 'MRM_E240_like.npz', 'MRM_E245.npz', 'MRM_E250.npz', 'MRM_E250_2.npz', 'MRM_E250_like.npz', 'MRM_E255.npz', 'MRM_E260_1.npz', 'MRM_E260_2.npz', 'MRM_E260_3.npz', 'MRM_E270.npz', 'MRM_E280_2.npz', 'MRM_E290_2.npz', 'MRM_E300.npz']
27 ['MRM_E240.npz', 'MRM_E240_1.npz', 'MRM_E240_11.npz', 'MRM_E240_13.npz', 'MRM_E240_16.npz', 'MRM_E24

In [33]:
def testing(U, Y, A, E, Au, Ay, Aa, Av, B):
    #with Embedding
    result = np.zeros((usr_nb, movie_nb))

    #test_idx --> Test 的 index length = 150
    sum_alpha = 0

    for u in range(usr_nb):
        alpha = np.zeros([movie_nb])

        for m in range(movie_nb):
            r = np.max(movie_genre[m] * usr_genre_norm[u]) #sample a 的category vec *user_category vec

            alpha_a = (np.dot(Au[u][m],np.expand_dims(U[u],0).T) + 
                       np.dot(Ay[u][m],np.expand_dims(Y[m],0).T) + 
                       np.dot(Aa[u][m],np.expand_dims(A[m],0).T) +
                       np.dot(Av[u][m],np.dot(E,np.expand_dims(all_npy[m],0).T)))


            # relu part
            alpha[m]=np.sum((relu(alpha_a)))*r

        mul = np.zeros((1,latent_dim))
        added_alpha = np.add(alpha,0.0000000001)
        norm_alpha = added_alpha/np.sum(added_alpha)

        for i in range(movie_nb):
            mul += norm_alpha[i] * A[i] # attention alpha*Ai part

        for k in range(movie_nb):
            result[u][k] = np.dot(mul,Y[k].T) #(U+auxilary)* latent factor
            
#     print(result.shape)
    return result

In [70]:
def loadNpz(path):
    params = np.load(path)
    U = params['U']
    Y = params['Y']
    A = params['A']
    E = params['E']
    Au = params['Wu']
    Ay = params['Wy']
    Aa = params['Wa']
    Av = params['Wv']
    B = params['B']

#     print('User latent shape: ',U.shape)
#     print('photo latent shape: ', Y.shape)
#     print('Auxilary latent shape: ',A.shape)
#     print('Embedding shape:', E.shape)
#     print('Wu weight shape:', Au.shape)
#     print('Wy weight shape:', Ay.shape)
#     print('Wa weight shape:', Aa.shape)
#     print('Wv weight shape:', Av.shape)
#     print('Beta shape:',B.shape)

    topn = 10
    
#     AM = np.dot(A, Y.T)
#     print('AM:', AM.shape)
#     for m in range(len(A)):    
#         desc_AM = np.argsort(AM[m])[::-1]
#         print(m, desc_AM[:topn])
#         val = [UA[u][i] for i in desc_A]
#         print(val)
#         movies_AM = [movie_ls[i] for i in desc_AM[:topn]]
#         print('=====AM=====', len(set(movies) & set(movies_AM)))
#         for m in movies_AM:
#             print(m, genresDic[m])
    
    UM = np.dot(U, Y.T)
    print('UM:', UM.shape)
    
    result = testing(U, Y, A, E, Au, Ay, Aa, Av, B)
    print('result:', result.shape)
    
    for u in range(len(usr_following)):
        sumfollow = sum(usr_following[u])
#         print('Sum:', sumfollow)
        follow = [i for i in np.nonzero(usr_following[u] == 1)[0]]

        # UM
        desc_UM = np.argsort(UM[u])[::-1][:topn]
#         print(desc_UM)        

        # alpha * AM
        desc_A = np.argsort(result[u])[::-1][:topn]
#         print(desc_A)
        if len(follow) <= 20:
            if len(set(follow) & set(desc_UM)) >= 5:
                if len(set(follow) & set(desc_A)) >= 5:
                    if len(set(desc_UM) & set(desc_A)) <= 2:
                        print('User:', u)
                        print('UM true:', (set(follow) & set(desc_UM)))
                        print('AM true:', (set(follow) & set(desc_A)))

                        print('UM&AM:', len((set(desc_UM) & set(desc_A))),(set(desc_UM) & set(desc_A)))

                        print('follow', follow)
                        movies = [movie_ls[i] for i in follow]
                        for m in movies:
                            print(m, genresDic[m])

                        movies_UM = [movie_ls[i] for i in desc_UM]
                        print('=====UM=====')
                        for m in movies_UM:
                            print(m, genresDic[m])

                        movies_A = [movie_ls[i] for i in desc_A]
                        print('=====AM=====')
                        for m in movies_A:
                            print(m, genresDic[m])

In [71]:
for name in SAVE_NAMES:
    print(name)
    loadNpz(SAVE_DIR + name)

MRM_E240.npz
UM: (1582, 165)
(1582, 165)
result: (1582, 165)
MRM_E240_1.npz
UM: (1582, 165)
(1582, 165)
result: (1582, 165)
MRM_E240_11.npz
UM: (1582, 165)
(1582, 165)
result: (1582, 165)
MRM_E240_13.npz
UM: (1582, 165)
(1582, 165)
result: (1582, 165)
MRM_E240_16.npz
UM: (1582, 165)
(1582, 165)
result: (1582, 165)
MRM_E240_17.npz
UM: (1582, 165)
(1582, 165)
result: (1582, 165)
MRM_E240_19.npz
UM: (1582, 165)
(1582, 165)
result: (1582, 165)
MRM_E240_2.npz
UM: (1582, 165)
(1582, 165)
result: (1582, 165)
MRM_E240_20.npz
UM: (1582, 165)
(1582, 165)
result: (1582, 165)
MRM_E240_21.npz
UM: (1582, 165)
(1582, 165)
result: (1582, 165)
MRM_E240_22.npz
UM: (1582, 165)
(1582, 165)
result: (1582, 165)
User: 489
UM true: {161, 129, 134, 112, 154}
AM true: {161, 112, 144, 86, 58}
UM&AM: 2 {112, 161}
follow [12, 28, 34, 49, 58, 66, 68, 76, 78, 86, 106, 112, 123, 129, 134, 141, 144, 154, 159, 161]
annabellemovie ['Horror', 'Mystery', 'Thriller']
brightburnmovie ['Horror', 'Sci-Fi']
childsplaymovie ['H