In [1]:
import os
import numpy as np
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
import tensorflow as tf
import random

In [2]:
seed = 100
tf.random.set_seed(seed)
np.random.seed(seed)
random.seed(seed)

In [3]:
print(tf.__version__)

2.9.0


In [4]:
dataset_folder='citeulike'
dataset_name='citeulike'
preflix_folder='24_08_30'

In [5]:
import platform
if platform.system().lower() == 'windows':
    import pickle5 as pickle
else:
    import pickle
pretrain=False

users_num,items_num=5551,16980

batch_size=2048
epoch_num=1000
hidden_size=64
keep_rate=0.9
layers_num=4
loss_ratio=0.0001
group_num=10

neg_num=1
test_neg_num=1000

In [6]:
print(items_num)

16980


In [7]:
if platform.system().lower() == 'windows':
    print("windows")
    with open('E:/datasets/'+dataset_folder+'/'+dataset_name+'/'+preflix_folder+'/'+dataset_name+'_gcn_train_data_index_from_0.pkl', 'rb') as f:
        train_data=pickle.load(f)
    with open('E:/datasets/'+dataset_folder+'/'+dataset_name+'/'+preflix_folder+'/'+dataset_name+'_gcn_test_data_index_from_0.pkl', 'rb') as f:
        test_data=pickle.load(f)
    val_data=test_data
elif platform.system().lower() == 'linux':
    print("linux")
    with open('/root/autodl-fs/workspace/datasets/'+dataset_folder+'/'+dataset_name+'/'+preflix_folder+'/'+dataset_name+'_gcn_train_data_index_from_0.pkl', 'rb') as f:
        train_data=pickle.load(f)
    with open('/root/autodl-fs/workspace/datasets/'+dataset_folder+'/'+dataset_name+'/'+preflix_folder+'/'+dataset_name+'_gcn_test_data_index_from_0.pkl', 'rb') as f:
        test_data=pickle.load(f)
    val_data=test_data

linux


In [8]:
print(np.shape(train_data['pos_item']),np.shape(test_data['pos_item']),np.shape(val_data['pos_item']))

(166025,) (44512,) (44512,)


In [9]:
users_num_max=max(max(train_data['users']),max(test_data['users']),max(val_data['users']))
users_num_min=min(min(train_data['users']),min(test_data['users']),min(val_data['users']))
items_num_max=max(max(train_data['pos_item']),max(test_data['pos_item']),max(val_data['pos_item']))
items_num_min=min(min(train_data['pos_item']),min(test_data['pos_item']),min(val_data['pos_item']))
print('users range:[',users_num_min,',',users_num_max,']')
print('items range:[',items_num_min,',',items_num_max,']')

users range:[ 0 , 5550 ]
items range:[ 0 , 16979 ]


In [10]:
import scipy.sparse as sp
if platform.system().lower() == 'windows':
    print("windows")
    pre_adj_mat = sp.load_npz('E:/datasets/'+dataset_folder+'/'+dataset_name+'/'+preflix_folder+'/'+dataset_name+'_s_pre_adj_mat_index_from_0.npz')#D^-1/2*A*D^-1/2
elif platform.system().lower() == 'linux':
    print("linux")
    pre_adj_mat = sp.load_npz('/root/autodl-fs/workspace/datasets/'+dataset_folder+'/'+dataset_name+'/'+preflix_folder+'/'+dataset_name+'_s_pre_adj_mat_index_from_0.npz')#D^-1/2*A*D^-1/2
print('already load adj matrix', pre_adj_mat.shape)

linux
already load adj matrix (22531, 22531)


In [11]:
norm_adj=pre_adj_mat#D^-1/2*A*D^-1/2
adj_dense=pre_adj_mat.todense()
rowsum = np.array(np.sum((adj_dense!=0),axis=1)).reshape(-1).astype(np.float32)
d=np.diag(rowsum).astype(np.float32)
a=(adj_dense!=0).astype(np.float32)
a=sp.csr_matrix(a)

In [12]:
import scipy.sparse.linalg
eigenvalues, eigenvectors = sp.linalg.eigsh(norm_adj, k=4, which='LA')
sorted_indices = np.argsort(eigenvalues)[::-1]
sorted_eigenvalues = eigenvalues[sorted_indices]
sorted_eigenvectors = eigenvectors[:, sorted_indices]
second_smallest_eigenvalue=1-sorted_eigenvalues[2]
second_smallest_eigenvector=sorted_eigenvectors[:, 2].reshape((-1,1))

In [13]:
print(second_smallest_eigenvalue,second_smallest_eigenvector.shape)

0.05791020393371582 (22531, 1)


In [14]:
class QueryLayer(tf.keras.layers.Layer):
    def __init__(self, embedding_dim):
        super(QueryLayer, self).__init__()
        self.embedding_dim = embedding_dim
        self.dense1 = tf.keras.layers.Dense(2*self.embedding_dim)
        self.tanh = tf.keras.layers.Activation('tanh')
        self.dense2 = tf.keras.layers.Dense(group_num)
        self.softmax = tf.keras.layers.Activation('softmax')

    def call(self, inputs):
        x = self.dense1(inputs)
        x = self.tanh(x)
        x = self.dense2(x)
        x = self.softmax(x)
        return x

In [15]:
class lightgcn_layer(tf.keras.layers.Layer):
    def multiply_by_coefficients(self,sp_matrix, coefficients):
        def map_fn(values):
            row_indices = sp_matrix.indices[:, 0]
            return values * tf.gather(coefficients, row_indices)

        return tf.sparse.map_values(map_fn, sp_matrix)
    
    def add_by_coefficients(self,sp_matrix, coefficients):
        def map_fn(values):
            row_indices = sp_matrix.indices[:, 0]
            return values + tf.gather(coefficients, row_indices)

        return tf.sparse.map_values(map_fn, sp_matrix)
    
    def reshapes(self,embed1):
        return tf.reshape(embed1, [-1,hidden_size])
    
    def sparse_matrix_subtraction(self,matrix1, matrix2):
        result_sparse = tf.sparse.add(matrix1, tf.sparse.SparseTensor(matrix2.indices, -matrix2.values, matrix2.dense_shape))

        return result_sparse

    
    
    def __init__(self,norm_adj,a,d,mu_2,hidden_size=64, dropout=0., embed_reg=1e-6):
        super(lightgcn_layer, self).__init__()
        self.norm_adj=norm_adj
        self.a=a
        self.d=tf.where(d == 0., d + 1., d)
        self.mu_2=mu_2
        
        self.dropout = tf.keras.layers.Dropout(dropout)
        self.beta=tf.Variable(-0.5,trainable=True)
        self.gamma=tf.Variable(0.01*tf.reshape(tf.range(group_num,dtype=tf.float32),[-1,1]),trainable=True)
        self.query_u=QueryLayer(hidden_size)
        self.query_i=QueryLayer(hidden_size)


    def call(self,item_embedding_pool,user_embedding_pool,training=False):
        #available when accept
        return tf.reshape(self.g1_ua_embeddings,[-1,hidden_size]),tf.reshape(self.g1_ia_embeddings,[-1,hidden_size]),\
    tf.nn.tanh(delta_lambda_2),boost_a,tf.nn.tanh(delta_a_loss)

In [16]:
class lightgcn(tf.keras.models.Model):

    def _convert_sp_mat_to_sp_tensor(self, X):
        coo = X.tocoo().astype(np.float32)
        indices = np.mat([coo.row, coo.col]).transpose()
        return tf.sparse.reorder(tf.SparseTensor(indices, coo.data, coo.shape))

    def cal_loss(self,u,p,n,test_mask,full=False):
        pos_score = tf.reduce_sum(tf.multiply(u, p), axis=-1) # (None, 1)
        neg_score = tf.reduce_sum(tf.multiply(u, n), axis=-1)
        
        
        if full:
            neg_score=tf.where(neg_score == 0,-1e10,neg_score)
        
        logits = tf.nn.softmax(tf.concat([pos_score, neg_score], axis=-1))
        loss=tf.reduce_mean(tf.math.softplus(neg_score-pos_score))
        
        return loss,logits
    
    

    def cosine_similarity(self,a, b):
        normalize_a = tf.math.l2_normalize(a,1)        
        normalize_b = tf.math.l2_normalize(b,1)
        cos_similarity=tf.linalg.matmul(normalize_a,normalize_b,transpose_b=True)
        return cos_similarity
    
    
    
    
    def cal_cl_loss(self,view1,view2,temp):
        cs=self.cosine_similarity(view1,view2)
        pos_cs=tf.linalg.diag_part(cs)
        loss=tf.math.reduce_mean(- tf.math.log(tf.math.exp(pos_cs/temp)/tf.math.reduce_sum(tf.math.exp(cs/temp),axis=1)))
        return loss
    

    def __init__(self,norm_adj,a,d,mu_2):
        super(lightgcn, self).__init__()
        blocks=1
        embed_reg=0.
        layer_norm_eps=1e-6
        num_heads=1
        use_l2norm=False
        initializer_p = tf.keras.initializers.RandomNormal(mean=0., stddev=0.05,seed=2024)
        if pretrain:
            item_embeddings=np.load(dataset_name+'_item_embedding.npy')
            self.item_embedding_pool = tf.keras.layers.Embedding(items_num,hidden_size,input_length=items_num,
                                                       weights=[item_embeddings],trainable=True)
            user_embeddings=np.load(dataset_name+'_user_embedding.npy')
            self.user_embedding_pool = tf.keras.layers.Embedding(users_num,hidden_size,input_length=users_num,
                                                       weights=[user_embeddings],trainable=True)
        else:
            self.item_embedding_pool = tf.keras.layers.Embedding(items_num,hidden_size,input_length=items_num,
                                                       embeddings_initializer=initializer_p,
                                                        embeddings_regularizer=tf.keras.regularizers.l2(embed_reg),trainable=True)
            self.user_embedding_pool = tf.keras.layers.Embedding(users_num,hidden_size,input_length=users_num,
                                                       embeddings_initializer=initializer_p,
                                        embeddings_regularizer=tf.keras.regularizers.l2(embed_reg),trainable=True)


        norm_adj=self._convert_sp_mat_to_sp_tensor(norm_adj)
        a=self._convert_sp_mat_to_sp_tensor(a)
        d=tf.constant(rowsum)
        mu_2=mu_2
        self.lg_layer=lightgcn_layer(norm_adj,a,d,mu_2,hidden_size,0.25,1e-6)
        self.use_l2norm=use_l2norm
        self.temp=0.5
        
    def call(self,inputs,training=False,full=False):
        self.g1_ua_embeddings, self.g1_ia_embeddings,self.delta_lambda_2,self.boost_a,self.delta_a_loss=self.lg_layer(self.item_embedding_pool,self.user_embedding_pool,training)
   
        g1_u = tf.gather(
            params=self.g1_ua_embeddings, indices=tf.reshape(inputs['users'],[1,-1])
        )
        g1_p = tf.gather(
            params=self.g1_ia_embeddings, indices=tf.reshape(inputs['pos_item'],[1,-1])
        )
        
        
        indices = tf.tile(tf.reshape(tf.range(items_num), [1, items_num]),[tf.shape(inputs['pos_item'])[0],1])
        pos_idx_expanded = tf.tile(tf.reshape(inputs['pos_item'],[-1,1]), (1, items_num))
        test_mask = tf.math.not_equal(indices, pos_idx_expanded)#[b,1430]
        
        
        if full==True:
            g1_n=tf.expand_dims(self.g1_ia_embeddings,1)
        else:
            neg_indx=tf.random.uniform([tf.shape(inputs['pos_item'])[0],neg_num],dtype=tf.int32,maxval=items_num,minval=0,seed=2023)
            pos_idx = tf.repeat(tf.reshape(inputs['users'],[-1,1]), neg_num, axis=1)
            mask = tf.equal(neg_indx, pos_idx)
            neg_indx = tf.where(mask, tf.random.uniform([tf.shape(inputs['pos_item'])[0],neg_num],dtype=tf.int32,maxval=items_num,minval=0,seed=2024), neg_indx)   
            g1_n = tf.gather(
                params=self.g1_ia_embeddings, indices=tf.reshape(neg_indx,[tf.shape(neg_indx)[1],-1])
            )
        g1_u=tf.transpose(g1_u,[1,0,2])#[b,1,h]
        g1_p=tf.transpose(g1_p,[1,0,2])
        g1_n=tf.transpose(g1_n,[1,0,2])

        

        l1,logits1=self.cal_loss(g1_u,g1_p,g1_n,test_mask,full)
        
        bpr_loss=l1
        
        user_norm = tf.reduce_sum(tf.square(g1_u))
        pos_norm = tf.reduce_sum(tf.square(g1_p))
        neg_norm = tf.reduce_sum(tf.square(g1_n))
        reg_loss = 0.5 * (user_norm + pos_norm + neg_norm) / tf.cast(tf.shape(g1_n)[0], tf.float32)
        logits=logits1
        
        return logits,(bpr_loss,loss_ratio*reg_loss,self.delta_lambda_2,self.delta_a_loss),(self.g1_ua_embeddings, self.g1_ia_embeddings,self.boost_a)
    
    def test(self):
        g1_ua_embeddings, g1_ia_embeddings,_,_,_=self.lg_layer(self.item_embedding_pool,self.user_embedding_pool,training=False)
        score=tf.matmul(g1_ua_embeddings, g1_ia_embeddings,transpose_b=True)
        score=tf.where(score == 0,-1e10,score)
        return score
    def test_pos_score(self,inputs):
        g1_ua_embeddings, g1_ia_embeddings,_,_,_=self.lg_layer(self.item_embedding_pool,self.user_embedding_pool,training=False)
        g1_u = tf.gather(
            params=g1_ua_embeddings, indices=inputs['users'])
        g1_p = tf.gather(
            params=g1_ia_embeddings, indices=inputs['pos_item'])
        return tf.reshape(tf.reduce_sum(tf.multiply(g1_u, g1_p), axis=-1),[-1,1])
    def summary(self):
        inputs = {
            'users': tf.keras.layers.Input(shape=(), dtype=tf.int32),
            'pos_item': tf.keras.layers.Input(shape=(), dtype=tf.int32)
        }
        tf.keras.models.Model(inputs=inputs, outputs=self.call(inputs)).summary()

In [17]:
import tqdm
import heapq
def hr_mrr_ndcf(rank,k):
    hr = 0.0
    mrr = 0.0
    ndcg = 0.0
    precision=0.0
    for r in rank:
        if r < k:
            if r<1:
                precision+=1
            hr += 1
            mrr += 1 / (r + 1)
            ndcg += 1 / np.log2(r + 2)
    return hr/len(rank),mrr/len(rank),ndcg/len(rank),precision/len(rank)

def eval_rank(pred_y, metric_names, k=10):
    rank = pred_y.argsort().argsort()[:, 0]
    res_dict = {}
    res=hr_mrr_ndcf(rank, k)
    for idx,name in enumerate(metric_names):
        res_dict[name] = res[idx]
    return res_dict

def eval_pos_neg(model, test_data, metric_names, k=10, batch_size=None):
    whole_rating=model.test()
    whole_rating=whole_rating#.numpy()
    other_score_top_k=np.array([heapq.nlargest(k, row) for row in whole_rating.numpy()])
    print('start testing!')
    test_dataset = tf.data.Dataset.from_tensor_slices(test_data).batch(batch_size)
    final_dict={}
    for i in metric_names:
        final_dict.update({i:[]})
    for step, batch_test in tqdm.tqdm(enumerate(test_dataset)):
        cord=tf.concat([tf.reshape(batch_test['users'],[-1,1]),tf.reshape(batch_test['pos_item'],[-1,1])],axis=1)
        pos_score=tf.gather_nd(whole_rating,cord)
        pos_score=tf.reshape(pos_score,[-1,1])
        other_score=tf.nn.embedding_lookup(whole_rating,batch_test['users'])
        used_other_score_top_k=other_score_top_k[batch_test['users'].numpy()]
        pred_y=np.concatenate((pos_score,used_other_score_top_k),axis=1)
        pred_y=-pred_y
        part_dict=eval_rank(pred_y, metric_names, k)
        for i in metric_names:
            final_dict [i].append(part_dict[i])
    for i in metric_names:
        final_dict [i]=np.mean(final_dict [i])
    return final_dict

In [18]:
callbacks = [tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5)]

In [26]:
from time import time
from tensorflow.keras.backend import clear_session
import tensorflow as tf

number = 1
max_layers = 10
skip_epoch=10
ave_dict = {'hr': [], 'mrr': [], 'ndcg': [], 'precision': []}


train_dataset = tf.data.Dataset.from_tensor_slices(train_data).batch(batch_size)
val_dataset = tf.data.Dataset.from_tensor_slices(val_data).batch(batch_size)

update_interval = int(len(train_dataset) / 1)
epoch_interval=10


#grid_search=[1,0.5,0.2,0.1,0.05,0.02,0.01,0.005,0.002,0.001]
#grid_search=[0.01,0.005,0.002,0.001]
grid_search=[0.001]
for w in grid_search:
    @tf.function
    def train_step_k(batch_train, model, optimizer_k):
        with tf.GradientTape() as tape1:
            logits, (bpr_loss,reg_loss,delta_lambda_2), (g1_ua_embeddings, g1_ia_embeddings,boost_a) = model(batch_train, training=True, full=False)
            loss_value = bpr_loss+reg_loss
        gradients1 = tape1.gradient(loss_value, model.layers[2].trainable_weights[0:2])
        optimizer_k.apply_gradients(zip(gradients1, model.layers[2].trainable_weights[0:2]))
        return loss_value, (bpr_loss,reg_loss), g1_ua_embeddings, g1_ia_embeddings,boost_a

    @tf.function
    def train_step_query(batch_train, model, optimizer_query):
        with tf.GradientTape() as tape3:
            logits, (bpr_loss,reg_loss,delta_lambda_2), (g1_ua_embeddings, g1_ia_embeddings,boost_a) = model(batch_train, training=True, full=False)
            loss_value = bpr_loss+reg_loss
        gradients3 = tape3.gradient(loss_value, model.layers[2].trainable_weights)
        optimizer_query.apply_gradients(zip(gradients3, model.layers[2].trainable_weights))
        return loss_value, (bpr_loss,reg_loss), g1_ua_embeddings, g1_ia_embeddings,boost_a

    @tf.function
    def train_step_k_query(batch_train, model, optimizer_k_query,w):
        with tf.GradientTape() as tape3:
            logits, (bpr_loss,reg_loss,delta_lambda_2), (g1_ua_embeddings, g1_ia_embeddings,boost_a) = model(batch_train, training=True, full=False)
            loss_value = bpr_loss+reg_loss - w * delta_lambda_2[0][0]
        gradients3 = tape3.gradient(loss_value, model.layers[2].trainable_weights)
        optimizer_k_query.apply_gradients(zip(gradients3, model.layers[2].trainable_weights))
        return loss_value, (bpr_loss,reg_loss,delta_lambda_2[0][0]), g1_ua_embeddings, g1_ia_embeddings,boost_a

    @tf.function
    def train_step_emb(batch_train, model, optimizer_emb):
        with tf.GradientTape() as tape2:
            logits, (bpr_loss,reg_loss,delta_lambda_2), (g1_ua_embeddings, g1_ia_embeddings,boost_a) = model(batch_train, training=True, full=False)
            loss_value = bpr_loss+reg_loss
        gradients2 = tape2.gradient(loss_value, model.layers[0].trainable_weights + model.layers[1].trainable_weights+model.layers[2].trainable_weights[2:])
        optimizer_emb.apply_gradients(zip(gradients2, model.layers[0].trainable_weights + model.layers[1].trainable_weights+model.layers[2].trainable_weights[2:]))

        return loss_value, (bpr_loss,reg_loss,delta_lambda_2[0][0]), g1_ua_embeddings, g1_ia_embeddings,boost_a

    @tf.function
    def train_step(batch_train, model, optimizer_total,w1,w2=0.001):
        with tf.GradientTape() as tape:
            logits, (bpr_loss,reg_loss,delta_lambda_2,delta_a_loss), (g1_ua_embeddings, g1_ia_embeddings,boost_a) = model(batch_train, training=True, full=False)
            loss_value = bpr_loss+reg_loss + w1 *delta_lambda_2[0][0] + w2 * delta_a_loss
        gradients = tape.gradient(loss_value, model.trainable_weights)
        optimizer_total.apply_gradients(zip(gradients, model.trainable_weights))

        return loss_value, (bpr_loss,reg_loss,w1 *delta_lambda_2[0][0],w2 * delta_a_loss), g1_ua_embeddings, g1_ia_embeddings,boost_a

    @tf.function
    def val_step(model,batch_val):
        val_logits, val_loss,_ = model(batch_val, training=False, full=False)
        return val_loss[0]
    model = lightgcn(norm_adj,a,d,second_smallest_eigenvector)
    #model.summary()
    for callback in callbacks:
        callback.set_model(model)
        callback.on_train_begin(logs={})

    optimizer_k = tf.keras.optimizers.Nadam(learning_rate=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, schedule_decay=0.004)
    optimizer_query = tf.keras.optimizers.Nadam(learning_rate=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, schedule_decay=0.004)
    optimizer_k_query = tf.keras.optimizers.Nadam(learning_rate=0.0001, beta_1=0.9, beta_2=0.999, epsilon=None, schedule_decay=0.004)
    optimizer_emb = tf.keras.optimizers.Nadam(learning_rate=0.0005, beta_1=0.9, beta_2=0.999, epsilon=None, schedule_decay=0.004)
    optimizer_total = tf.keras.optimizers.Nadam(learning_rate=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, schedule_decay=0.004)
    tb = time()
    for epoch in range(epoch_num):

        total_train_loss = 0.
        total_bpr_loss = 0.
        total_reg_loss = 0.
        total_delta_lambda_2 = 0.
        total_delta_a_loss = 0.
        total_cl_loss = 0.
        num_train_batches = 0.
        total_val_loss = 0.
        num_val_batches = 0.
        print(f"Epoch {epoch + 1}/{epoch_num}")
        t1 = time()
        if epoch>=skip_epoch:
            for callback in callbacks:
                callback.on_epoch_begin(epoch, logs={})

        for step, batch_train in enumerate(train_dataset):
            if pretrain:
                loss_value,(bpr_loss,reg_loss,delta_lambda_2),\
                    g1_ua_embeddings,g1_ia_embeddings,boost_a= train_step_k_query(batch_train,model,optimizer_k_query,w)
            else:
                loss_value,(bpr_loss,reg_loss,delta_lambda_2,delta_a_loss),\
                        g1_ua_embeddings,g1_ia_embeddings,boost_a= train_step(batch_train,model,optimizer_total,w)

            total_train_loss += loss_value
            total_bpr_loss += bpr_loss
            total_reg_loss += reg_loss
            total_delta_lambda_2 += delta_lambda_2
            total_delta_a_loss += delta_a_loss
            num_train_batches += 1

        avg_train_loss = total_train_loss / num_train_batches
        avg_bpr_loss = total_bpr_loss / num_train_batches
        avg_reg_loss = total_reg_loss / num_train_batches
        avg_delta_lambda_2 = total_delta_lambda_2 / num_train_batches
        avg_delta_a_loss = total_delta_a_loss / num_train_batches
        print(f"Time cost: {time()-t1:.2f}, Training loss: {avg_train_loss.numpy():.4f}",\
              f"bpr loss: {avg_bpr_loss.numpy():.4f},reg loss: {avg_reg_loss.numpy():.4f}",\
              f"delta_lambda_2: {avg_delta_lambda_2.numpy():.4f} , delta_a_loss: {avg_delta_a_loss.numpy():.4f}")
        t2=time()
        for batch_val in val_dataset:
            val_loss = val_step(model,batch_val)
            #val_loss = val_step(model,batch_val,ori_A_norm,ori_L_norm)
            total_val_loss += val_loss
            num_val_batches += 1

        avg_val_loss = total_val_loss / num_val_batches
        print(f"Time cost: {time()-t2:.2f}, Validation loss: {avg_val_loss.numpy():.4f}")

        if epoch>=skip_epoch:
            for callback in callbacks:
                callback.on_epoch_end(epoch, logs={'val_loss': avg_val_loss})
        if any([callback.stopped_epoch for callback in callbacks]):
            break

    for callback in callbacks:
        callback.on_train_end()
    tt = time()
    eval_dict = eval_pos_neg(model, test_data,['hr', 'mrr', 'ndcg', 'precision'], 10, batch_size)
    for m in ['hr', 'mrr', 'ndcg', 'precision']:
        ave_dict[m].append(eval_dict[m])
    te = time()
    print('pretrain = ',pretrain,' delta_lambda_2 [%.5f] Fit [%.1f s] Train [%.1f s] Test [%.1f s]: Precision = %.4f, HR = %.4f, MRR = %.4f, NDCG = %.4f'
          % (w,te - tb,tt - tb,te - tt,eval_dict['precision'], eval_dict['hr'], eval_dict['mrr'], eval_dict['ndcg']))

Epoch 1/1000
Time cost: 9.28, Training loss: 0.5072 bpr loss: 0.5059,reg loss: 0.0003 delta_lambda_2: 0.0009 , delta_a_loss: 0.0001
Time cost: 1.69, Validation loss: 0.3331
Epoch 2/1000
Time cost: 6.77, Training loss: 0.2275 bpr loss: 0.2253,reg loss: 0.0011 delta_lambda_2: 0.0010 , delta_a_loss: 0.0002
Time cost: 1.30, Validation loss: 0.2566
Epoch 3/1000
Time cost: 6.85, Training loss: 0.1743 bpr loss: 0.1718,reg loss: 0.0014 delta_lambda_2: 0.0010 , delta_a_loss: 0.0002
Time cost: 1.33, Validation loss: 0.2274
Epoch 4/1000
Time cost: 6.85, Training loss: 0.1496 bpr loss: 0.1469,reg loss: 0.0015 delta_lambda_2: 0.0010 , delta_a_loss: 0.0003
Time cost: 1.23, Validation loss: 0.1993
Epoch 5/1000
Time cost: 6.87, Training loss: 0.1300 bpr loss: 0.1272,reg loss: 0.0016 delta_lambda_2: 0.0010 , delta_a_loss: 0.0003
Time cost: 1.31, Validation loss: 0.1884
Epoch 6/1000
Time cost: 6.88, Training loss: 0.1202 bpr loss: 0.1174,reg loss: 0.0016 delta_lambda_2: 0.0010 , delta_a_loss: 0.0003
Tim

22it [00:00, 238.05it/s]

pretrain =  False  delta_lambda_2 [0.00100] Fit [220.2 s] Train [212.4 s] Test [7.8 s]: Precision = 0.0071, HR = 0.0587, MRR = 0.0186, NDCG = 0.0278





In [20]:
import tensorflow as tf
import scipy.sparse as sp
boost_a_sp = sp.csr_matrix((boost_a.values.numpy(), (boost_a.indices.numpy()[:, 0], boost_a.indices.numpy()[:, 1])), shape=boost_a.dense_shape)
def compute_normalized_adj(adj_matrix):
    degrees = np.array(adj_matrix.sum(axis=1)).flatten()
    degrees = np.where(degrees <= 1e-2, 1., degrees)
    degree_matrix = sp.diags(degrees)
    inv_sqrt_degrees = np.power(degrees, -0.5)
    inv_sqrt_degrees[np.isinf(inv_sqrt_degrees)] = 0
    inv_sqrt_degree_matrix = sp.diags(inv_sqrt_degrees)
    normalized_laplacian = inv_sqrt_degree_matrix @ adj_matrix @ inv_sqrt_degree_matrix

    return normalized_laplacian
normalized_adj = compute_normalized_adj(boost_a_sp)
eigenvalues_aft, eigenvectors_aft = sp.linalg.eigsh(normalized_adj, k=4, which='LA')
sorted_indices_aft = np.argsort(eigenvalues_aft)[::-1]
sorted_eigenvalues_aft = eigenvalues_aft[sorted_indices_aft]
sorted_eigenvectors_aft = eigenvectors_aft[:, sorted_indices_aft]
second_smallest_eigenvalue_aft=1-sorted_eigenvalues_aft[2]
second_smallest_eigenvector_aft=sorted_eigenvectors_aft[:, 2].reshape((-1,1))
print(second_smallest_eigenvalue,second_smallest_eigenvalue_aft)

0.05791020393371582 0.06674391031265259
