In [3]:
from time import time
from scipy.sparse import csc_matrix
import tensorflow as tf
import numpy as np
import h5py

In [4]:
tf.__version__

'2.11.0'

# 1. Data Import

In [5]:
def load_data_100k(path='./', delimiter='\t'):
    train = np.loadtxt(path+'movielens_100k_u1.base', skiprows=0, delimiter=delimiter).astype('int32')
    test = np.loadtxt(path+'movielens_100k_u1.test', skiprows=0, delimiter=delimiter).astype('int32')
    total = np.concatenate((train, test), axis=0)

    n_u = np.unique(total[:,0]).size  # num of users
    n_m = np.unique(total[:,1]).size  # num of movies
    n_train = train.shape[0]  # num of training ratings
    n_test = test.shape[0]  # num of test ratings

    train_r = np.zeros((n_m, n_u), dtype='float32')
    test_r = np.zeros((n_m, n_u), dtype='float32')

    for i in range(n_train):
        train_r[train[i,1]-1, train[i,0]-1] = train[i,2]

    for i in range(n_test):
        test_r[test[i,1]-1, test[i,0]-1] = test[i,2]

    train_m = np.greater(train_r, 1e-12).astype('float32')  # masks indicating non-zero entries
    test_m = np.greater(test_r, 1e-12).astype('float32')

    print('data matrix loaded')
    print('num of users: {}'.format(n_u))
    print('num of movies: {}'.format(n_m))
    print('num of training ratings: {}'.format(n_train))
    print('num of test ratings: {}'.format(n_test))

    return n_m, n_u, train_r, train_m, test_r, test_m

In [6]:
data_path = 'datas/'
path = data_path + '/MovieLens_100K/'
##
# 1. n_m : number of movies
# 2. n_u : number of users
# 3. train_r : rating datas (train_datas)
# 4 . train_m : movies datas (train_datas)
# 5,6 . test_r, test_m
##

n_m, n_u, train_r, train_m, test_r, test_m = load_data_100k(path=path, delimiter='\t')

data matrix loaded
num of users: 943
num of movies: 1682
num of training ratings: 80000
num of test ratings: 20000


# 2. Setting Value

In [7]:
# Common hyperparameter settings
n_hid = 500
n_dim = 5
n_layers = 2
gk_size = 3

lambda_2 = 20.  # l2 regularisation
lambda_s = 0.006
iter_p = 5  # optimisation
iter_f = 5
epoch_p = 30  # training epoch
epoch_f = 60
dot_scale = 1  # scaled dot product

# 3. Pre-training with Local Kernel, Item-based AutoEncoder

## 3-1. Process

In [8]:
def local_kernel(u, v):
    dist = tf.norm(u - v, ord=2, axis=2)
    hat = tf.maximum(0., 1. - dist**2)

    return hat

In [9]:
# 1. Pre Training with Local Kernel, Item-Based Auto Encoder
n_in = n_u
n_dim = 5
n_hid = 500
lambda_2 = 20.  # l2 regularisation
lambda_s = 0.006

y = tf.Variable(tf.ones(shape=[n_m, n_u], dtype=float))

default_initializer = tf.keras.initializers.GlorotUniform()

W = tf.Variable(default_initializer(shape=[n_in, n_hid]), name='W')
u = tf.Variable(tf.random.truncated_normal([n_in, 1, n_dim]), name="u")
v = tf.Variable(tf.random.truncated_normal([1, n_hid, n_dim]), name="v")
b = tf.Variable(default_initializer(shape=[n_hid]))

w_hat = local_kernel(u, v)

sparse_reg = tf.keras.regularizers.L2(lambda_s)
sparse_reg_term = sparse_reg(w_hat)

l2_reg = tf.keras.regularizers.L2(lambda_2)
l2_reg_term = l2_reg(W)

loss_value = sparse_reg_term + l2_reg_term
print(sparse_reg_term, l2_reg_term, loss_value)

W_eff = W * w_hat
print(W_eff)

y = tf.matmul(y, W_eff) + b
y = tf.nn.sigmoid(y)

print(y)

tf.Tensor(4.494037, shape=(), dtype=float32) tf.Tensor(13085.154, shape=(), dtype=float32) tf.Tensor(13089.648, shape=(), dtype=float32)
tf.Tensor(
[[ 0.  0. -0. ...  0.  0.  0.]
 [ 0.  0.  0. ...  0. -0.  0.]
 [ 0. -0.  0. ... -0.  0. -0.]
 ...
 [-0. -0. -0. ...  0. -0. -0.]
 [-0.  0.  0. ...  0. -0. -0.]
 [ 0. -0. -0. ... -0.  0. -0.]], shape=(943, 500), dtype=float32)
tf.Tensor(
[[0.51634717 0.50545365 0.5000723  ... 0.5379147  0.5053494  0.5102376 ]
 [0.51634717 0.50545365 0.5000723  ... 0.5379147  0.5053494  0.5102376 ]
 [0.51634717 0.50545365 0.5000723  ... 0.5379147  0.5053494  0.5102376 ]
 ...
 [0.51634717 0.50545365 0.5000723  ... 0.5379147  0.5053494  0.5102376 ]
 [0.51634717 0.50545365 0.5000723  ... 0.5379147  0.5053494  0.5102376 ]
 [0.51634717 0.50545365 0.5000723  ... 0.5379147  0.5053494  0.5102376 ]], shape=(1682, 500), dtype=float32)


2023-01-09 10:43:06.234545: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


## 3-2. Modularization

In [10]:
# Common hyperparameter settings
n_hid = 500
n_dim = 5
n_layers = 2
gk_size = 3

lambda_2 = 20.  # l2 regularisation
lambda_s = 0.006
iter_p = 5  # optimisation
iter_f = 5
epoch_p = 30  # training epoch
epoch_f = 60
dot_scale = 1  # scaled dot product

def local_kernel(u, v):
    dist = tf.norm(u - v, ord=2, axis=2)
    hat = tf.maximum(0., 1. - dist**2)

    return hat

class kernel_layer(tf.keras.layers.Layer):
    def __init__(self, n_hid=n_hid, n_dim=n_dim, activation=tf.nn.sigmoid, lambda_s=lambda_s, lambda_2 = lambda_2):
        super(kernel_layer, self).__init__()
        
        self.n_dim = n_dim
        self.n_hid = n_hid
        self.lambda_2 = lambda_2
        self.lambda_s = lambda_s
        
        self.activation = activation
        
    def call(self, inputs):
        default_initializer = tf.keras.initializers.GlorotUniform()

        W = tf.Variable(default_initializer(shape=[inputs.shape[1], self.n_hid]), name='W')
        n_in = inputs.shape[1]
        u = tf.Variable(tf.random.truncated_normal([n_in, 1, self.n_dim]), name="u")
        v = tf.Variable(tf.random.truncated_normal([1, self.n_hid, self.n_dim]), name="v")
        b = tf.Variable(default_initializer(shape=[self.n_hid]))
        
        w_hat = local_kernel(u, v)

        sparse_reg = tf.keras.regularizers.L2(self.lambda_s)
        sparse_reg_term = sparse_reg(w_hat)

        l2_reg = tf.keras.regularizers.L2(self.lambda_2)
        l2_reg_term = l2_reg(W)

        loss_value = sparse_reg_term + l2_reg_term

        W_eff = W * w_hat

        y = tf.matmul(inputs, W_eff) + b
        y = self.activation(y)

        return y, loss_value

In [35]:
import tensorflow as tf

y = tf.Variable(tf.ones(shape=[n_m, n_u], dtype=float))
reg_losses = None

k_p = kernel_layer()

for i in range(n_layers):
    y, reg_loss = k_p(y)
    reg_losses = reg_loss if reg_losses is None else reg_losses + reg_loss
    
k_p2 = kernel_layer(n_u, activation=tf.identity)
pred_p, reg_loss = k_p2(y)
reg_losses += reg_loss

# L2 Loss
diff = train_m * (train_r - pred_p)
sqE = tf.nn.l2_loss(diff)
loss_p = sqE + reg_losses

optimizer_p = tf.optimizers.Adam(learning_rate=0.01)

# 3. Fine Tunning with Global Kernel

In [45]:
def global_kernel(inputs, gk_size, dot_scale):
    avg_pooling = tf.reduce_mean(inputs, axis=1)  # Item (axis=1) based average pooling
    avg_pooling = tf.reshape(avg_pooling, [1, -1])
    n_kernel = avg_pooling.shape[1]

    conv_kernel = tf.Variable(tf.random.truncated_normal([n_kernel, gk_size**2], stddev=0.1),name="conv_kernel")
    gk = tf.matmul(avg_pooling, conv_kernel) * dot_scale  # Scaled dot product
    gk = tf.reshape(gk, [gk_size, gk_size, 1, 1])

    return gk

In [55]:
def global_conv(inputs, W):
    inputs = tf.reshape(inputs, [1, inputs.shape[0], inputs.shape[1], 1])
    conv2d = tf.nn.relu(tf.nn.conv2d(inputs, W, strides=[1,1,1,1], padding='SAME'))

    return tf.reshape(conv2d, [conv2d.shape[1], conv2d.shape[2]])

In [71]:
y = tf.Variable(tf.ones(shape=[n_m, n_u], dtype=float))
reg_losses = None

k_f = kernel_layer()

for i in range(n_layers):
    y, reg_loss = k_f(y)
    reg_losses = reg_loss if reg_losses is None else reg_losses + reg_loss
    
k_f2 = kernel_layer(n_u, activation=tf.identity)
y_dash, _ = k_f2(y)

gk = global_kernel(y_dash, gk_size, dot_scale)
y_hat = global_conv(train_r, gk)

k_f3 = kernel_layer()
for i in range(n_layers):
    y_hat, reg_losses = k_f3(y_hat)
    reg_losses = reg_loss if reg_losses is None else reg_losses + reg_loss

k_f4 = kernel_layer(n_u, activation=tf.identity)
pred_f, reg_loss = k_f4(y_hat)
reg_losses += reg_loss

# L2 loss
diff = train_m * (train_r - pred_f)
sqE = tf.nn.l2_loss(diff)
loss_f = sqE + reg_losses

# 4. Evaluation Code

In [72]:
def dcg_k(score_label, k):
    dcg, i = 0., 0
    for s in score_label:
        if i < k:
            dcg += (2**s[1]-1) / np.log2(2+i)
            i += 1
    return dcg

In [73]:
def ndcg_k(y_hat, y, k):
    score_label = np.stack([y_hat, y], axis=1).tolist()
    score_label = sorted(score_label, key=lambda d:d[0], reverse=True)
    score_label_ = sorted(score_label, key=lambda d:d[1], reverse=True)
    norm, i = 0., 0
    for s in score_label_:
        if i < k:
            norm += (2**s[1]-1) / np.log2(2+i)
            i += 1
    dcg = dcg_k(score_label, k)
    return dcg / norm

In [74]:
def call_ndcg(y_hat, y):
    ndcg_sum, num = 0, 0
    y_hat, y = y_hat.T, y.T
    n_users = y.shape[0]

    for i in range(n_users):
        y_hat_i = y_hat[i][np.where(y[i])]
        y_i = y[i][np.where(y[i])]

        if y_i.shape[0] < 2:
            continue

        ndcg_sum += ndcg_k(y_hat_i, y_i, y_i.shape[0])  # user-wise calculation
        num += 1

    return ndcg_sum / num

# 5. Model

In [62]:
from tensorflow import keras

class GLocalK(keras.Model):
    def __init__(self, **kwargs):
        pass

In [91]:
for epoch in range(epoch_p):
    with tf.GradientTape() as tape:
        output, loss = k_p2(train_r)
        print("local kernel loss : {}".format(loss))
    gradients = tape.gradient(loss, k_p2.trainable_variables, unconnected_gradients=tf.UnconnectedGradients.ZERO)
    optimizer_p.apply_gradients(zip(gradients, k_p2.trainable_variables))
    
    pre, _ = k_p2(train_r)

    error = (test_m * (np.clip(pre, 1., 5.) - test_r) ** 2).sum() / test_m.sum()  # test error
    test_rmse = np.sqrt(error)

    error_train = (train_m * (np.clip(pre, 1., 5.) - train_r) ** 2).sum() / train_m.sum()  # train error
    train_rmse = np.sqrt(error_train)

    print('.-^-._' * 12)
    print('PRE-TRAINING')
    print('Epoch:', i+1, 'test rmse:', test_rmse, 'train rmse:', train_rmse)
    print('.-^-._' * 12)

local kernel loss : 18902.314453125
.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._
PRE-TRAINING
Epoch: 2 test rmse: 2.7859828 train rmse: 2.7647288
.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._
local kernel loss : 18888.63671875
.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._
PRE-TRAINING
Epoch: 2 test rmse: 2.7859828 train rmse: 2.7647288
.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._
local kernel loss : 18885.46484375
.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._
PRE-TRAINING
Epoch: 2 test rmse: 2.7859828 train rmse: 2.7647288
.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._
local kernel loss : 18895.3359375
.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._
PRE-TRAINING
Epoch: 2 test rmse: 2.7859828 train rmse: 2.7647288
.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._
local kernel los

(<tf.Tensor: shape=(1682, 943), dtype=float32, numpy=
 array([[-0.07430239, -0.04062451,  0.23960903, ...,  0.01779529,
         -0.0182954 , -0.02805335],
        [ 0.02291551, -0.02921245,  0.05562797, ...,  0.01779529,
         -0.03620867, -0.02805335],
        [ 0.02991716, -0.03307576,  0.02324167, ...,  0.01779529,
         -0.03620867, -0.02805335],
        ...,
        [ 0.02291551, -0.03307576,  0.01562973, ...,  0.01779529,
         -0.03620867, -0.02805335],
        [ 0.02291551, -0.03307576,  0.01562973, ...,  0.01779529,
         -0.03620867, -0.02805335],
        [ 0.02291551, -0.03307576,  0.01562973, ...,  0.01779529,
         -0.03620867, -0.02805335]], dtype=float32)>,
 <tf.Tensor: shape=(), dtype=float32, numpy=18867.727>)