<a href="https://colab.research.google.com/github/flywithu/cornac/blob/master/examples/glocal_k.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install cornac==1.17



In [None]:
# !cp -Rf /usr/local/lib/python3.10/dist-packages/cornac /content/drive/MyDrive/mycornac

In [None]:
try:
  import google.colab
  IN_COLAB = True
except:
  IN_COLAB = False

In [None]:
FILE_PREFIX="."
if IN_COLAB:
  from google.colab import drive
  drive.mount('/content/drive')
  FILE_PREFIX="/content/drive/MyDrive/mycornac"

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import sys
# sys.path.insert(0,'/content/drive/MyDrive/daicon/msr')
if FILE_PREFIX not in sys.path:
  sys.path.insert(0,FILE_PREFIX)

In [None]:
import os
import numpy as np
import random
import torch
import cornac
SEED=42
def seed_everything(seed):
    os.environ['PYTHONHASHSEED'] = str(seed)
    random.seed(seed)
    np.random.seed(seed)
    # pl.seed_everything(seed)
    # mx.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.enabled = False




=====


#Data Load Function

In [None]:
def load_data_100k(path='./', delimiter='\t'):

    train = np.loadtxt(path+'movielens_100k_u1.base', skiprows=0, delimiter=delimiter).astype('int32')
    test = np.loadtxt(path+'movielens_100k_u1.test', skiprows=0, delimiter=delimiter).astype('int32')
    total = np.concatenate((train, test), axis=0)

    n_u = np.unique(total[:,0]).size  # num of users
    n_m = np.unique(total[:,1]).size  # num of movies
    n_train = train.shape[0]  # num of training ratings
    n_test = test.shape[0]  # num of test ratings

    train_r = np.zeros((n_m, n_u), dtype='float32')
    test_r = np.zeros((n_m, n_u), dtype='float32')

    for i in range(n_train):
        train_r[train[i,1]-1, train[i,0]-1] = train[i,2]

    for i in range(n_test):
        test_r[test[i,1]-1, test[i,0]-1] = test[i,2]

    train_m = np.greater(train_r, 1e-12).astype('float32')  # masks indicating non-zero entries
    test_m = np.greater(test_r, 1e-12).astype('float32')

    print('data matrix loaded')
    print('num of users: {}'.format(n_u))
    print('num of movies: {}'.format(n_m))
    print('num of training ratings: {}'.format(n_train))
    print('num of test ratings: {}'.format(n_test))

    print('num of users from training: {}'.format(np.unique(train[:,0]).size))
    print('num of items from training: {}'.format(np.unique(train[:,1]).size))

    import pandas as pd
    from scipy.sparse import csr_matrix


    # train_2 = train
    # test_2 = test


    # existing_items = np.unique(train[:, 1])
    # all_items = np.unique(total[:, 1])
    # missing_items = np.setdiff1d(all_items, existing_items)
    # new_items =  np.column_stack([[1] * len(missing_items), missing_items, [0] * len(missing_items),[0] * len(missing_items)])

    # print(f"missing item: [{len(missing_items)}]")

    # train = np.vstack([train,new_items])

    # train_cornac =  cornac.data.Dataset.from_uir(train)
    # test_cornac =  cornac.data.Dataset.from_uir(test)

    train_r2 = train_r.T.copy()
    train_r2_max = np.max(np.max(train_r2,axis=1))
    train_r2[train_r2==0] = train_r2_max+1

    train_matrix = csr_matrix(train_r2)
    train_row,train_col = train_matrix.nonzero()
    train_rating = train_matrix.data
    train_rating[train_rating==(train_r2_max+1)] = 0
    train_cornac =  cornac.data.Dataset.from_uir(list(zip(train_row, train_col, train_rating)))


    test_r2 = test_r.T.copy()
    test_r2_max = np.max(np.max(test_r2,axis=1))
    test_r2[test_r2==0] = test_r2_max+1

    test_matrix = csr_matrix(test_r2)
    test_row,test_col = test_matrix.nonzero()
    test_rating = test_matrix.data
    test_rating[test_rating==(test_r2_max+1)] = 0
    test_cornac =  cornac.data.Dataset.from_uir(list(zip(test_row, test_col, test_rating)))



    train_equal = np.array_equal(train_cornac.csc_matrix.toarray().T,  train_r)
    test_equal = np.array_equal(test_cornac.csc_matrix.toarray().T,  test_r)


    #########################################################
    total_cornac = cornac.data.Dataset.from_uir(total)
    train_cornac_wo_neg= cornac.data.Dataset.from_uir(train)
    test_cornac_wo_neg= cornac.data.Dataset.from_uir(test)

    # print(len(train_cornac.csc_matrix.toarray()))
    # print(len(train_r))
    # print(train_cornac.csc_matrix.toarray())
    # print(train_r)

    print(f'Cornac loaded: [{train_equal}][{test_equal}]')



    print('num of users from trainings: {}'.format(train_cornac.num_users))
    print('num of items from training: {}'.format(train_cornac.num_items))
    print('num of training ratings: {}'.format(train_cornac.num_ratings))
    print('num of training max ratings: {}'.format(train_cornac.max_rating ))
    print('num of training min ratings: {}'.format(train_cornac.min_rating ))
    print('num of training num_ratings ratings: {}'.format(train_cornac.num_ratings ))
    print(f'shape: {train_cornac.csc_matrix.toarray().shape}')
    print("#############################################################")


    print('num of users from test: {}'.format(test_cornac.num_users))
    print('num of items from test: {}'.format(test_cornac.num_items))
    print('num of test ratings: {}'.format(test_cornac.num_ratings))
    print('num of testing max ratings: {}'.format(test_cornac.max_rating))
    print('num of training min ratings: {}'.format(test_cornac.min_rating ))


    print("WOWOWO #######################################################")
    print('num of users from train: {}'.format(train_cornac_wo_neg.num_users))
    print('num of items from train: {}'.format(train_cornac_wo_neg.num_items))
    print('num of train ratings: {}'.format(train_cornac_wo_neg.num_ratings))
    print('num of train max ratings: {}'.format(train_cornac_wo_neg.max_rating))
    print('num of train min ratings: {}'.format(train_cornac_wo_neg.min_rating ))


    return n_m, n_u, train_r, train_m, test_r, test_m ,train_cornac,test_cornac,train_cornac_wo_neg,test_cornac_wo_neg


#Load Data

In [None]:
data_path = FILE_PREFIX

In [None]:
# Data Load
try:
    path = data_path + '/data/'
    n_m, n_u, train_r, train_m, test_r, test_m ,train_uir,test_uir,train_uir_wo_neg,test_uir_wo_neg = load_data_100k(path=path, delimiter='\t')
except Exception as e:
    print('Error: Unable to load data')
    print(e)


data matrix loaded
num of users: 943
num of movies: 1682
num of training ratings: 80000
num of test ratings: 20000
num of users from training: 943
num of items from training: 1650
Cornac loaded: [True][True]
num of users from trainings: 943
num of items from training: 1682
num of training ratings: 1586126
num of training max ratings: 5.0
num of training min ratings: 0.0
num of training num_ratings ratings: 1586126
shape: (943, 1682)
#############################################################
num of users from test: 943
num of items from test: 1682
num of test ratings: 1586126
num of testing max ratings: 5.0
num of training min ratings: 0.0
WOWOWO #######################################################
num of users from train: 943
num of items from train: 1650
num of train ratings: 80000
num of train max ratings: 5.0
num of train min ratings: 1.0


## Original CODE

In [None]:

from time import time
from scipy.sparse import csc_matrix
import numpy as np
import h5py
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable
from torch.utils.data import Dataset, DataLoader
from torch.nn.parameter import Parameter

In [None]:

# Common hyperparameter settings
n_hid = 500 # size of hidden layers
n_dim = 5 # inner AE embedding size
n_layers = 2 # number of hidden layers
gk_size = 3 # width=height of kernel for convolution

# Hyperparameters to tune for specific case
max_epoch_p = 500 # max number of epochs for pretraining
max_epoch_f = 1000  # max number of epochs for finetuning
patience_p = 5 # number of consecutive rounds of early stopping condition before actual stop for pretraining
patience_f = 10 # and finetuning
tol_p = 1e-4 # minimum threshold for the difference between consecutive values of train rmse, used for early stopping, for pretraining
tol_f = 1e-5 # and finetuning
lambda_2 = 20. # regularisation of number or parameters
lambda_s = 0.006 # regularisation of sparsity of the final matrix
dot_scale = 1 # dot product weight for global kernel


In [None]:
def local_kernel(u, v):
    dist = torch.norm(u - v, p=2, dim=2)
    hat = torch.clamp(1. - dist**2, min=0.)
    return hat

class KernelLayer(nn.Module):
    def __init__(self, n_in, n_hid, n_dim, lambda_s, lambda_2, activation=nn.Sigmoid()):
      super().__init__()
      self.W = nn.Parameter(torch.randn(n_in, n_hid))
      self.u = nn.Parameter(torch.randn(n_in, 1, n_dim))
      self.v = nn.Parameter(torch.randn(1, n_hid, n_dim))
      self.b = nn.Parameter(torch.randn(n_hid))


      self.lambda_s = lambda_s
      self.lambda_2 = lambda_2

      nn.init.xavier_uniform_(self.W, gain=torch.nn.init.calculate_gain("relu"))
      nn.init.xavier_uniform_(self.u, gain=torch.nn.init.calculate_gain("relu"))
      nn.init.xavier_uniform_(self.v, gain=torch.nn.init.calculate_gain("relu"))
      nn.init.zeros_(self.b)
      self.activation = activation

    def forward(self, x):
      w_hat = local_kernel(self.u, self.v)

      sparse_reg = torch.nn.functional.mse_loss(w_hat, torch.zeros_like(w_hat))
      sparse_reg_term = self.lambda_s * sparse_reg

      l2_reg = torch.nn.functional.mse_loss(self.W, torch.zeros_like(self.W))
      l2_reg_term = self.lambda_2 * l2_reg

      W_eff = self.W * w_hat  # Local kernelised weight matrix
      y = torch.matmul(x, W_eff) + self.b
      y = self.activation(y)

      return y, sparse_reg_term + l2_reg_term

class KernelNet(nn.Module):
    def __init__(self, n_u, n_hid, n_dim, n_layers, lambda_s, lambda_2):
      super().__init__()
      layers = []
      for i in range(n_layers):
        if i == 0:
          layers.append(KernelLayer(n_u, n_hid, n_dim, lambda_s, lambda_2))
        else:
          layers.append(KernelLayer(n_hid, n_hid, n_dim, lambda_s, lambda_2))
      layers.append(KernelLayer(n_hid, n_u, n_dim, lambda_s, lambda_2, activation=nn.Identity()))
      self.layers = nn.ModuleList(layers)
      self.dropout = nn.Dropout(0.5)

    def forward(self, x):
      total_reg = None
      for i, layer in enumerate(self.layers):
        x, reg = layer(x)
        if i < len(self.layers)-1:
          x = self.dropout(x)
        if total_reg is None:
          total_reg = reg
        else:
          total_reg += reg
      return x, total_reg

In [None]:
class CompleteNet(nn.Module):
    def __init__(self, kernel_net, n_u, n_m, n_hid, n_dim, n_layers, lambda_s, lambda_2, gk_size, dot_scale):
      super().__init__()
      self.gk_size = gk_size
      self.dot_scale = dot_scale
      self.local_kernel_net = kernel_net
      self.global_kernel_net = KernelNet(n_u, n_hid, n_dim, n_layers, lambda_s, lambda_2)
      self.conv_kernel = torch.nn.Parameter(torch.randn(n_m, gk_size**2) * 0.1)
      nn.init.xavier_uniform_(self.conv_kernel, gain=torch.nn.init.calculate_gain("relu"))


    def forward(self, train_r):
      x, _ = self.local_kernel_net(train_r)
      gk = self.global_kernel(x, self.gk_size, self.dot_scale)
      x = self.global_conv(train_r, gk)
      x, global_reg_loss = self.global_kernel_net(x)
      return x, global_reg_loss

    def global_kernel(self, input, gk_size, dot_scale):
      avg_pooling = torch.mean(input, dim=1)  # Item (axis=1) based average pooling
      avg_pooling = avg_pooling.view(1, -1)

      gk = torch.matmul(avg_pooling, self.conv_kernel) * dot_scale  # Scaled dot product
      gk = gk.view(1, 1, gk_size, gk_size)

      return gk

    def global_conv(self, input, W):
      input = input.unsqueeze(0).unsqueeze(0)
      conv2d = nn.LeakyReLU()(F.conv2d(input, W, stride=1, padding=1))
      return conv2d.squeeze(0).squeeze(0)

class Loss(nn.Module):
    def forward(self, pred_p, reg_loss, train_m, train_r):
      # L2 loss
      diff = train_m * (train_r - pred_p)
      sqE = torch.nn.functional.mse_loss(diff, torch.zeros_like(diff))
      loss_p = sqE + reg_loss
      return loss_p

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [None]:
seed_everything(SEED)

In [None]:
model = KernelNet(n_u, n_hid, n_dim, n_layers, lambda_s, lambda_2).double().to(device)

In [None]:
complete_model = CompleteNet(model, n_u, n_m, n_hid, n_dim, n_layers, lambda_s, lambda_2, gk_size, dot_scale).double().to(device)

In [None]:
def dcg_k(score_label, k):
    dcg, i = 0., 0
    for s in score_label:
        if i < k:
            dcg += (2**s[1]-1) / np.log2(2+i)
            i += 1
    return dcg


In [None]:
def ndcg_k(y_hat, y, k):
    score_label = np.stack([y_hat, y], axis=1).tolist()
    score_label = sorted(score_label, key=lambda d:d[0], reverse=True)
    score_label_ = sorted(score_label, key=lambda d:d[1], reverse=True)
    norm, i = 0., 0
    for s in score_label_:
        if i < k:
            norm += (2**s[1]-1) / np.log2(2+i)
            i += 1
    dcg = dcg_k(score_label, k)
    return dcg / norm

In [None]:

def call_ndcg(y_hat, y):
    ndcg_sum, num = 0, 0
    y_hat, y = y_hat.T, y.T
    n_users = y.shape[0]

    for i in range(n_users):
        y_hat_i = y_hat[i][np.where(y[i])]
        y_i = y[i][np.where(y[i])]

        if y_i.shape[0] < 2:
            continue

        ndcg_sum += ndcg_k(y_hat_i, y_i, y_i.shape[0])  # user-wise calculation
        num += 1

    return ndcg_sum / num

In [None]:

best_rmse_ep, best_mae_ep, best_ndcg_ep = 0, 0, 0
best_rmse, best_mae, best_ndcg = float("inf"), float("inf"), 0

time_cumulative = 0
tic = time()

# Pre-Training
optimizer = torch.optim.AdamW(complete_model.local_kernel_net.parameters(), lr=0.001)

def closure():
  optimizer.zero_grad()
  x = torch.Tensor(train_r).double().to(device)
  m = torch.Tensor(train_m).double().to(device)
  complete_model.local_kernel_net.train()
  pred, reg = complete_model.local_kernel_net(x)
  loss = Loss().to(device)(pred, reg, m, x)
  loss.backward()
  return loss

last_rmse = np.inf
counter = 0

print(f"shape1: {train_r.shape}")

for i in range(max_epoch_p):
  optimizer.step(closure)
  complete_model.local_kernel_net.eval()
  t = time() - tic
  time_cumulative += t

  pre, _ = model(torch.Tensor(train_r).double().to(device))

  pre = pre.float().cpu().detach().numpy()

  error = (test_m * (np.clip(pre, 1., 5.) - test_r) ** 2).sum() / test_m.sum()  # test error
  test_rmse = np.sqrt(error)

  error_train = (train_m * (np.clip(pre, 1., 5.) - train_r) ** 2).sum() / train_m.sum()  # train error
  train_rmse = np.sqrt(error_train)

  if last_rmse-train_rmse < tol_p:
    counter += 1
  else:
    counter = 0

  last_rmse = train_rmse

  if patience_p == counter:
    print('.-^-._' * 12)
    print('PRE-TRAINING')
    print('Epoch:', i+1, 'test rmse:', test_rmse, 'train rmse:', train_rmse)
    print('Time:', t, 'seconds')
    print('Time cumulative:', time_cumulative, 'seconds')
    print('.-^-._' * 12)
    break

  if i % 50 != 0:
    continue
  print('.-^-._' * 12)
  print('PRE-TRAINING')
  print('Epoch:', i, 'test rmse:', test_rmse, 'train rmse:', round(train_rmse,4))
  print('Time:', t, 'seconds')
  print('Time cumulative:', time_cumulative, 'seconds')
  print('.-^-._' * 12)

print('.-^-._' * 12)
print('PRE-TRAINING')
print('Epoch:', i, 'test rmse:', test_rmse, 'train rmse:', round(train_rmse,4))
print('Time:', t, 'seconds')
print('Time cumulative:', time_cumulative, 'seconds')
print('.-^-._' * 12)

### CPU 5-10
# shape1: (1682, 943)
# .-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._
# PRE-TRAINING
# Epoch: 0 test rmse: 2.7650917 train rmse: 2.739
# Time: 0.5638654232025146 seconds
# Time cumulative: 0.5638654232025146 seconds
# .-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._
# .-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._
# PRE-TRAINING
# Epoch: 4 test rmse: 2.4029963 train rmse: 2.3221
# Time: 3.3076937198638916 seconds
# Time cumulative: 9.638086080551147 seconds
# .-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._

############GPU 50-100
# .-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._
# PRE-TRAINING
# Epoch: 0 test rmse: 2.764546 train rmse: 2.7389
# Time: 4.680256128311157 seconds
# Time cumulative: 4.680256128311157 seconds
# .-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._
# .-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._
# PRE-TRAINING
# Epoch: 28 test rmse: 1.140111 train rmse: 1.112194
# Time: 7.5728209018707275 seconds
# Time cumulative: 172.887677192688 seconds
# .-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._
# .-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._
# PRE-TRAINING
# Epoch: 27 test rmse: 1.140111 train rmse: 1.1122
# Time: 7.5728209018707275 seconds
# Time cumulative: 172.887677192688 seconds
# .-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._


shape1: (1682, 943)
.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._
PRE-TRAINING
Epoch: 0 test rmse: 2.764546 train rmse: 2.7389
Time: 1.8104169368743896 seconds
Time cumulative: 1.8104169368743896 seconds
.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._
.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._
PRE-TRAINING
Epoch: 28 test rmse: 1.140111 train rmse: 1.112194
Time: 4.801568508148193 seconds
Time cumulative: 94.54065608978271 seconds
.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._
.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._
PRE-TRAINING
Epoch: 27 test rmse: 1.140111 train rmse: 1.1122
Time: 4.801568508148193 seconds
Time cumulative: 94.54065608978271 seconds
.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._


In [None]:
# Fine-Tuning
optimizer = torch.optim.AdamW(complete_model.parameters(), lr=0.001)

def closure():
  optimizer.zero_grad()
  x = torch.Tensor(train_r).double().to(device)
  m = torch.Tensor(train_m).double().to(device)
  complete_model.train()
  pred, reg = complete_model(x)
  loss = Loss().to(device)(pred, reg, m, x)
  loss.backward()
  return loss

last_rmse = np.inf
counter = 0

for i in range(max_epoch_f):
  optimizer.step(closure)
  complete_model.eval()
  t = time() - tic
  time_cumulative += t

  pre, _ = complete_model(torch.Tensor(train_r).double().to(device))

  pre = pre.float().cpu().detach().numpy()

  error = (test_m * (np.clip(pre, 1., 5.) - test_r) ** 2).sum() / test_m.sum()  # test error
  test_rmse = np.sqrt(error)

  error_train = (train_m * (np.clip(pre, 1., 5.) - train_r) ** 2).sum() / train_m.sum()  # train error
  train_rmse = np.sqrt(error_train)

  test_mae = (test_m * np.abs(np.clip(pre, 1., 5.) - test_r)).sum() / test_m.sum()
  train_mae = (train_m * np.abs(np.clip(pre, 1., 5.) - train_r)).sum() / train_m.sum()

  test_ndcg = call_ndcg(np.clip(pre, 1., 5.), test_r)
  train_ndcg = call_ndcg(np.clip(pre, 1., 5.), train_r)

  if test_rmse < best_rmse:
      best_rmse = test_rmse
      best_rmse_ep = i+1

  if test_mae < best_mae:
      best_mae = test_mae
      best_mae_ep = i+1

  if best_ndcg < test_ndcg:
      best_ndcg = test_ndcg
      best_ndcg_ep = i+1

  if last_rmse-train_rmse < tol_f:
    counter += 1
  else:
    counter = 0

  last_rmse = train_rmse

  if patience_f == counter:
    print('.-^-._' * 12)
    print('FINE-TUNING')
    print('Epoch:', i+1, 'test rmse:', test_rmse, 'test mae:', test_mae, 'test ndcg:', test_ndcg)
    print('Epoch:', i+1, 'train rmse:', round(train_rmse,4), 'train mae:', train_mae, 'train ndcg:', train_ndcg)
    print('Time:', t, 'seconds')
    print('Time cumulative:', time_cumulative, 'seconds')
    print('.-^-._' * 12)
    break


  if i % 50 != 0:
    continue

  print('.-^-._' * 12)
  print('FINE-TUNING')
  print('Epoch:', i, 'test rmse:', test_rmse, 'test mae:', test_mae, 'test ndcg:', test_ndcg)
  print('Epoch:', i, 'train rmse:',round(train_rmse,4), 'train mae:', train_mae, 'train ndcg:', train_ndcg)
  print('Time:', t, 'seconds')
  print('Time cumulative:', time_cumulative, 'seconds')
  print('.-^-._' * 12)
print('.-^-._' * 12)
print('FINE-TUNING')
print('Epoch:', i, 'test rmse:', test_rmse, 'test mae:', test_mae, 'test ndcg:', test_ndcg)
print('Epoch:', i, 'train rmse:',round(train_rmse,4), 'train mae:', train_mae, 'train ndcg:', train_ndcg)
print('Time:', t, 'seconds')
print('Time cumulative:', time_cumulative, 'seconds')
print('.-^-._' * 12)


# CPU 5-10

# .-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._
# FINE-TUNING
# Epoch: 0 test rmse: 2.7633 test mae: 2.5107932 test ndcg: 0.8331753579801311
# Epoch: 0 train rmse: 2.7389 train mae: 2.5003824 train ndcg: 0.834420735340282
# Time: 5.08611273765564 seconds
# Time cumulative: 14.724198818206787 seconds
# .-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._
# .-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._
# FINE-TUNING
# Epoch: 9 test rmse: 1.369984 test mae: 1.1203239 test ndcg: 0.8250590978071585
# Epoch: 9 train rmse: 1.3019 train mae: 1.058034 train ndcg: 0.8252134725755771
# Time: 28.390460729599 seconds
# Time cumulative: 177.8281741142273 seconds
# .-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._

#GPU 50 100
# .-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._
# FINE-TUNING
# Epoch: 0 test rmse: 2.7616875 test mae: 2.508944 test ndcg: 0.833032527168417
# Epoch: 0 train rmse: 2.7388 train mae: 2.5002937 train ndcg: 0.8345416868859795
# Time: 7.729712009429932 seconds
# Time cumulative: 180.61738920211792 seconds
# .-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._
# .-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._
# FINE-TUNING
# Epoch: 50 test rmse: 1.065533 test mae: 0.8672719 test ndcg: 0.8497837155057424
# Epoch: 50 train rmse: 1.0261 train mae: 0.8370878 train ndcg: 0.8526058221490135
# Time: 53.09919834136963 seconds
# Time cumulative: 1818.776295185089 seconds
# .-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._
# .-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._
# FINE-TUNING
# Epoch: 99 test rmse: 0.96924686 test mae: 0.7714432 test ndcg: 0.880215288549668
# Epoch: 99 train rmse: 0.9277 train mae: 0.73899 train ndcg: 0.8832026233743437
# Time: 89.53782510757446 seconds
# Time cumulative: 5351.143161773682 seconds
# .-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._



.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._
FINE-TUNING
Epoch: 0 test rmse: 2.7616875 test mae: 2.508944 test ndcg: 0.833032527168417
Epoch: 0 train rmse: 2.7388 train mae: 2.5002937 train ndcg: 0.8345416868859795
Time: 4.94537353515625 seconds
Time cumulative: 99.48602962493896 seconds
.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._
.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._
FINE-TUNING
Epoch: 50 test rmse: 1.065533 test mae: 0.8672719 test ndcg: 0.8497837155057424
Epoch: 50 train rmse: 1.0261 train mae: 0.8370878 train ndcg: 0.8526058221490135
Time: 42.450783491134644 seconds
Time cumulative: 1314.8469276428223 seconds
.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._
.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._
FINE-TUNING
Epoch: 100 test rmse: 0.9691369 test mae: 0.7718908 test ndcg: 0.8808006539878511
Epoch: 100 train rmse: 0.9273 train mae: 0.7391

In [None]:

# Final result
print('Epoch:', best_rmse_ep, ' best rmse:', best_rmse)
print('Epoch:', best_mae_ep, ' best mae:', best_mae)
print('Epoch:', best_ndcg_ep, ' best ndcg:', best_ndcg)

Epoch: 884  best rmse: 0.9184946
Epoch: 830  best mae: 0.7242841
Epoch: 448  best ndcg: 0.8966600318027631


### CORNAC


In [None]:
test_m

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]], dtype=float32)

In [None]:
import cornac
print(f"Cornac version: {cornac.__version__}")
from cornac.eval_methods import BaseMethod, RatioSplit, StratifiedSplit, CrossValidation
from cornac.metrics import Precision, Recall, NDCG, AUC, MAP,RMSE, NCRR

Cornac version: 1.17


In [None]:
print( sys.path)

['/content/drive/MyDrive/mycornac', '/content', '/env/python', '/usr/lib/python310.zip', '/usr/lib/python3.10', '/usr/lib/python3.10/lib-dynload', '', '/usr/local/lib/python3.10/dist-packages', '/usr/lib/python3/dist-packages', '/usr/local/lib/python3.10/dist-packages/IPython/extensions', '/root/.ipython']


In [None]:
seed_everything(SEED)

In [None]:
# !ls /content/drive/MyDrive/mycornac/myfunction

In [None]:
from cornac.models import GLocalK, VMF, FM

import importlib

import cornac.models.glocalk.glocalk
importlib.reload(cornac.models.glocalk)
importlib.reload(cornac.models.glocalk.recom_glocalk)
importlib.reload(cornac.models.glocalk.glocalk)
importlib.reload(cornac.models)


hello world


<module 'cornac.models' from '/content/drive/MyDrive/mycornac/cornac/models/__init__.py'>

In [None]:
for module_name in sys.modules:
  if "glocalk" in module_name:
    print(module_name)

myfunction.recom_glocalk
myfunction.glocalk
cornac.models.glocalk
cornac.models.glocalk.recom_glocalk
cornac.models.glocalk.glocalk


In [None]:
numpy_data_train = np.array([(t[0][0], t[1][0], t[2][0]) for t in train_uir.uir_iter()], dtype=int)
print(numpy_data_train[:5])

[[0 0 5]
 [0 1 3]
 [0 2 4]
 [0 3 3]
 [0 4 3]]


In [None]:
numpy_data_test = np.array([(t[0][0], t[1][0], t[2][0]) for t in test_uir.uir_iter()], dtype=int)
print(numpy_data_test[:5])

[[0 0 0]
 [0 1 0]
 [0 2 0]
 [0 3 0]
 [0 4 0]]


In [None]:
# 평가방법 설정
eval_method = BaseMethod.from_splits(
    train_data=np.array(numpy_data_train),
    test_data=np.array(numpy_data_test),
    rating_threshold=1,
    exclude_unknowns=True,  # Unknown users and items will be ignored.
    verbose=True
)


# data matrix loaded
# num of users: 943
# num of movies: 1682
# num of training ratings: 80000
# num of test ratings: 20000
# Cornac loaded
# num of users: 943
# num of movies: 1682
# num of training ratings: 80000
# num of test ratings: 20000

rating_threshold = 1.0
exclude_unknowns = True
---
Training data:
Number of users = 943
Number of items = 1682
Number of ratings = 1586126
Max rating = 5.0
Min rating = 0.0
Global mean = 0.2
---
Test data:
Number of users = 943
Number of items = 1682
Number of ratings = 1586126
Number of unknown users = 0
Number of unknown items = 0
---
Total users = 943
Total items = 1682


In [None]:
np.array(numpy_data_train)

array([[   0,    0,    5],
       [   0,    1,    3],
       [   0,    2,    4],
       ...,
       [ 942, 1679,    0],
       [ 942, 1680,    0],
       [ 942, 1681,    0]])

In [None]:
seed_everything(SEED)

In [None]:
print(torch.initial_seed())

42


In [None]:
glocalk_model = GLocalK(seed=SEED, verbose=True)

In [None]:
pmf_model1=  cornac.models.PMF(seed=SEED)

In [None]:
seed_everything(SEED)
# 평가척도 설정

# importlib.reload(cornac.metrics.ranking)
# importlib.reload(cornac.metrics)

# from cornac.metrics import Precision, Recall, NDCG, AUC, MAP,RMSE, NCRR



metrics = [RMSE()]

# 실험 수행
cornac.Experiment(
    eval_method=eval_method,
    models=[glocalk_model,pmf_model1],
    metrics=metrics,
).run()




[GLocalK] Training started!
Learning...
trainset shape: (1682, 943)
pre_min 1.0 pre_max: 5.0


  0%|          | 0/500 [00:00<?, ?it/s]

Pre-training Early Stopping
Pre-training Finished! rmse: 1.1122


  0%|          | 0/1000 [00:00<?, ?it/s]

Fine-training epoch:0 rmse: 2.7388 count : 0
Fine-training epoch:50 rmse: 1.0261 count : 3
Fine-training epoch:100 rmse: 0.9273 count : 0
Fine-training epoch:150 rmse: 0.9032 count : 0
Fine-training epoch:200 rmse: 0.8970 count : 0
Fine-training epoch:250 rmse: 0.8948 count : 4
Fine-training epoch:300 rmse: 0.8885 count : 0
Fine-training epoch:350 rmse: 0.8839 count : 4
Fine-training epoch:400 rmse: 0.8793 count : 0
Fine-training epoch:450 rmse: 0.8777 count : 4
Fine-training epoch:500 rmse: 0.8755 count : 2
Fine-training epoch:550 rmse: 0.8742 count : 1
Fine-training epoch:600 rmse: 0.8720 count : 0
Fine-training epoch:650 rmse: 0.8721 count : 0
Fine-training epoch:700 rmse: 0.8716 count : 0
Fine-training epoch:750 rmse: 0.8708 count : 0
Fine-training epoch:800 rmse: 0.8711 count : 1
Fine-training epoch:850 rmse: 0.8700 count : 1
Fine-training epoch:900 rmse: 0.8700 count : 4
Fine-training Early Stopping
Fine-training Finished! rmse: 0.8704
Learning completed : [(1682, 943)]

[GLocalK

Rating:   0%|          | 0/1586126 [00:00<?, ?it/s]


[PMF] Training started!

[PMF] Evaluation started!


Rating:   0%|          | 0/1586126 [00:00<?, ?it/s]


TEST:
...
        |   RMSE | Train (s) | Test (s)
------- + ------ + --------- + --------
GLocalK | 3.1958 |  189.7841 |  19.1725
PMF     | 0.4570 |   36.4787 |  36.2283



#  W/O Neg VALUE

In [None]:
numpy_data_train_wo = np.array([(t[0][0], t[1][0], t[2][0]) for t in train_uir_wo_neg.uir_iter()], dtype=int)
numpy_data_test_wo = np.array([(t[0][0], t[1][0], t[2][0]) for t in test_uir_wo_neg.uir_iter()], dtype=int)

In [None]:
# 평가방법 설정
eval_method = BaseMethod.from_splits(
    train_data=np.array(numpy_data_train_wo),
    test_data=np.array(numpy_data_test_wo),
    rating_threshold=1.0,
    exclude_unknowns=True,  # Unknown users and items will be ignored.
    verbose=True
)


rating_threshold = 1.0
exclude_unknowns = True
---
Training data:
Number of users = 943
Number of items = 1650
Number of ratings = 80000
Max rating = 5.0
Min rating = 1.0
Global mean = 3.5
---
Test data:
Number of users = 459
Number of items = 1410
Number of ratings = 20000
Number of unknown users = 0
Number of unknown items = 0
---
Total users = 943
Total items = 1650


In [None]:
seed_everything(SEED)

glocalk_model = GLocalK(seed=SEED,verbose=True)
pmf_model=  cornac.models.PMF(seed=SEED)
wmf_model = cornac.models.WMF(seed=SEED)

In [None]:
seed_everything(SEED)
# 평가척도 설정

# importlib.reload(cornac.metrics.ranking)
# importlib.reload(cornac.metrics)

# from cornac.metrics import Precision, Recall, NDCG, AUC, MAP,RMSE, NCRR



metrics = [RMSE()]

# 실험 수행
cornac.Experiment(
    eval_method=eval_method,
    models=[glocalk_model,pmf_model,wmf_model],
    metrics=metrics,
).run()



[GLocalK] Training started!
Learning...
trainset shape: (1650, 943)
pre_min 1.0 pre_max: 5.0


  0%|          | 0/500 [00:00<?, ?it/s]

Pre-training Early Stopping
Pre-training Finished! rmse: 1.1121


  0%|          | 0/1000 [00:00<?, ?it/s]

Fine-training epoch:0 rmse: 2.7388 count : 0
Fine-training epoch:50 rmse: 1.0211 count : 3
Fine-training epoch:100 rmse: 0.9210 count : 0
Fine-training epoch:150 rmse: 0.9034 count : 0
Fine-training epoch:200 rmse: 0.9001 count : 1
Fine-training epoch:250 rmse: 0.8970 count : 0
Fine-training epoch:300 rmse: 0.8932 count : 0
Fine-training epoch:350 rmse: 0.8882 count : 0
Fine-training epoch:400 rmse: 0.8836 count : 0
Fine-training epoch:450 rmse: 0.8797 count : 3
Fine-training epoch:500 rmse: 0.8769 count : 0
Fine-training epoch:550 rmse: 0.8746 count : 0
Fine-training epoch:600 rmse: 0.8740 count : 3
Fine-training epoch:650 rmse: 0.8724 count : 0
Fine-training epoch:700 rmse: 0.8723 count : 1
Fine-training epoch:750 rmse: 0.8711 count : 0
Fine-training Early Stopping
Fine-training Finished! rmse: 0.8721
Learning completed : [(1650, 943)]

[GLocalK] Evaluation started!


Rating:   0%|          | 0/20000 [00:00<?, ?it/s]


[PMF] Training started!

[PMF] Evaluation started!


Rating:   0%|          | 0/20000 [00:00<?, ?it/s]


[WMF] Training started!


  0%|          | 0/100 [00:00<?, ?it/s]

Learning completed!

[WMF] Evaluation started!


Rating:   0%|          | 0/20000 [00:00<?, ?it/s]


TEST:
...
        |   RMSE | Train (s) | Test (s)
------- + ------ + --------- + --------
GLocalK | 1.1845 |  155.6645 |   0.3206
PMF     | 1.1970 |    1.5086 |   0.4222
WMF     | 2.2717 |    9.0746 |   0.4978



In [None]:
# glocalk_model.fit(total_uir)s

In [None]:
# indices = np.where(array1 != array2)
# different_elements = array1[indices]
# print("다른 요소:", different_elements)