In [1]:
#====================== Import de librerias =====================#

from pathlib import Path
import json
import gzip
from urllib.request import urlopen
import datetime
import plotly.express as px
import plotly.graph_objects as go
import wget
import logging

import torch
import pandas as pd
import numpy as np
import csv
import os
import scipy.sparse as sp
from typing import Tuple, Dict, Any, List
from tqdm import tqdm, trange
from IPython import embed
from torch.utils.data import DataLoader, Dataset
from torch.utils.tensorboard import SummaryWriter
import sys 
from sklearn.preprocessing import LabelEncoder
import plotly.figure_factory as ff

current_filename = sys.argv[0].split("\\")[-1].split(".")[-2]
logfile = "project.log"
old_path = os.getcwd()
os.chdir("..")
execution_path = os.getcwd()

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
%load_ext tensorboard
#logs_base_dir = execution_path / Path("4_Modelling/runs")
logs_base_dir = "runs_random"
os.makedirs(f'{execution_path}/{"4_Modelling"}/{logs_base_dir}', exist_ok=True)
tb_fm = SummaryWriter(log_dir=f'{execution_path}/{"4_Modelling"}/{logs_base_dir}/{logs_base_dir}_FM/')
tb_rnd = SummaryWriter(log_dir=f'{execution_path}/{"4_Modelling"}/{logs_base_dir}/{logs_base_dir}_RANDOM/')

def save_data_configuration(text):
    save_data_dir = "data_config.txt" + current_filename  
    path = f'{execution_path}/{"4_Modelling"}/{save_data_dir}'
    with open(path, "a") as data_file:
        data_file.write(text+"\n")

    return text

In [3]:
# Let's define some hyper-parameters
hparams = {
    'batch_size':64,
    'num_epochs':12,
    'hidden_size': 32,
    'learning_rate':1e-4,
}

# we select to work on GPU if it is available in the machine, otherwise
# will run on CPU
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

In [4]:
#============ Definicion de valores de configuracion ============#

min_reviews, min_usuarios = [6,6]
col_names = {"col_id_reviewer": "reviewerID",
             "col_id_product": "asin",
             "col_unix_time": "unixReviewTime",
             "col_rating": "overall",
             "col_timestamp": "timestamp",
             "col_year": "year"}

csv_filename = execution_path/Path("3_DataPreparation/interactions_minR{}_minU{}.csv".format(min_reviews,min_usuarios))

In [5]:
df = pd.read_csv(csv_filename)
df.head()

Unnamed: 0,asin,reviewerID,overall,unixReviewTime,timestamp,year
0,0,9132,5.0,1477785600,2016-10-30 02:00:00,1970
1,0,10612,5.0,1467244800,2016-06-30 02:00:00,1970
2,0,257,1.0,1454716800,2016-02-06 01:00:00,1970
3,0,4425,5.0,1434844800,2015-06-21 02:00:00,1970
4,0,2523,4.0,1420329600,2015-01-04 01:00:00,1970


In [6]:
save_data_configuration(str(df.nunique()))
df.nunique()

asin               6178
reviewerID        14138
overall               5
unixReviewTime     3622
timestamp          3622
year                  1
dtype: int64

# Splitting dataset (TLOO strategy)

In [11]:
def split_train_test(data: np.ndarray,
                     n_users: int) -> Tuple[np.ndarray, np.ndarray]:
    # Split and remove timestamp
    train_x, test_x = [], []
    for u in trange(n_users, desc='spliting train/test and removing timestamp...'):
        user_data = data[data[:, 0] == u]
        sorted_data = user_data[user_data[:, -1].argsort()]
        if len(sorted_data) == 1:
            train_x.append(sorted_data[0][:-1])
        else:
            train_x.append(sorted_data[:-1][:, :-1])
            test_x.append(sorted_data[-1][:-1])
    return np.vstack(train_x), np.stack(test_x)

In [8]:
data = df[[*col_names.values()][:3]].astype('int32').to_numpy()
data

array([[      9132,          0, 1477785600],
       [     10612,          0, 1467244800],
       [       257,          0, 1454716800],
       ...,
       [      9051,       6177, 1530144000],
       [      3412,       6177, 1527465600],
       [      9805,       6177, 1527206400]])

In [9]:
add_dims=0
for i in range(data.shape[1] - 1):  # do not affect to timestamp
    # MAKE IT START BY 0
    data[:, i] -= np.min(data[:, i])
    # RE-INDEX
    data[:, i] += add_dims
    add_dims = np.max(data[:, i]) + 1
dims = np.max(data, axis=0) + 1
print("Dim of users: {}\nDim of items: {}\nDims of unixtime: {}".format(dims[0], dims[1], dims[2]))
data

Dim of users: 14138
Dim of items: 20316
Dims of unixtime: 1538006401


array([[      9132,      14138, 1477785600],
       [     10612,      14138, 1467244800],
       [       257,      14138, 1454716800],
       ...,
       [      9051,      20315, 1530144000],
       [      3412,      20315, 1527465600],
       [      9805,      20315, 1527206400]])

In [10]:
train_x, test_x = split_train_test(data, dims[0])
train_x

spliting train/test and removing timestamp...: 100%|██████████| 14138/14138 [00:05<00:00, 2655.70it/s]


array([[    0, 19248],
       [    0, 19249],
       [    0, 14823],
       ...,
       [14137, 14159],
       [14137, 18245],
       [14137, 18904]])

# Negative sampling

In [48]:
train_x = train_x[:, :2]
dims = dims[:2]
print("New dims:",dims)
print("New train_x:\n",train_x)

New dims: [14138 20316]
New train_x:
 [[    0 19248]
 [    0 19249]
 [    0 14823]
 ...
 [14137 14159]
 [14137 18245]
 [14137 18904]]


In [49]:
def build_adj_mx(n_feat:int, data:np.ndarray) -> sp.dok_matrix :
    train_mat = sp.dok_matrix((n_feat, n_feat), dtype=np.float32)
    for x in tqdm(data, desc=f"BUILDING ADJACENCY MATRIX..."):
        train_mat[x[0], x[1]] = 1.0
        train_mat[x[1], x[0]] = 1.0
        # IDEA: We treat features that are not user or item differently because we do not consider
        #  interactions between contexts
        if data.shape[1] > 2:
            for idx in range(len(x[2:])):
                train_mat[x[0], x[2 + idx]] = 1.0
                train_mat[x[1], x[2 + idx]] = 1.0
                train_mat[x[2 + idx], x[0]] = 1.0
                train_mat[x[2 + idx], x[1]] = 1.0
    return train_mat

In [50]:
def ng_sample(data: np.ndarray, dims: list, num_ng:int=4) -> Tuple[np.ndarray, sp.dok_matrix]:
    rating_mat = build_adj_mx(dims[-1], data)
    interactions = []
    min_item, max_item = dims[0], dims[1]
    for num, x in tqdm(enumerate(data), desc='perform negative sampling...'):
        interactions.append(np.append(x, 1))
        for t in range(num_ng):
            j = np.random.randint(min_item, max_item) #if not pop else random.sample(items_to_sample, 1)[0]
            # IDEA: Loop to exclude true interactions (set to 1 in adj_train) user - item
            while (x[0], j) in rating_mat or j == int(x[1]):
                j = np.random.randint(min_item, max_item) #if not pop else random.sample(items_to_sample, 1)[0]
            interactions.append(np.concatenate([[x[0], j], x[2:], [0]]))
    return np.vstack(interactions), rating_mat

In [51]:
train_x, rating_mat = ng_sample(train_x, dims)
print("Dimensions matrix:\n",dims)
print("\nRating matrix:")
rating_mat

BUILDING ADJACENCY MATRIX...: 100%|██████████| 123226/123226 [00:03<00:00, 34590.16it/s]
perform negative sampling...: 123226it [00:06, 20100.46it/s]


Dimensions matrix:
 [14138 20316]

Rating matrix:


<20316x20316 sparse matrix of type '<class 'numpy.float32'>'
	with 246452 stored elements in Dictionary Of Keys format>

In [52]:
dims[-1]-dims[0]

6178

In [16]:
# Exercise 2

## Evaristo
#### number of ones
print(np.count_nonzero(rating_mat.toarray())/(dims[-1]*dims[-1]))
### number of zeros
print(1 - np.count_nonzero(rating_mat.toarray())/(dims[-1]*dims[-1]))

# ## Brenda
# #### Who sparse is the matrix??
# print(1 - rating_mat.shape[0] / rating_mat.count_nonzero())

0.000597112191656141
0.9994028878083439


In [17]:
train_x[:10]

array([[    0, 19248,     1],
       [    0, 18119,     0],
       [    0, 18743,     0],
       [    0, 20009,     0],
       [    0, 15350,     0],
       [    0, 19249,     1],
       [    0, 14495,     0],
       [    0, 14829,     0],
       [    0, 14609,     0],
       [    0, 14508,     0]])

# Creating dataset class

In [53]:
class PointData(Dataset):
    def __init__(self,
                 data: np.ndarray,
                 dims: list) -> None:
        """
        Dataset formatter adapted point-wise algorithms
        Parameters
        """
        super(PointData, self).__init__()
        self.interactions = data
        self.dims = dims

    def __len__(self) -> int:
        return len(self.interactions)
        
    def __getitem__(self, 
                    index: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
        """
        Return the pairs user-item and the target.
        """
        return self.interactions[index][:-1], self.interactions[index][-1]

train_dataset = PointData(train_x, dims)

In [64]:
train_dataset[0]

(array([    0, 19248]), 1)

# Preparing the test set for inference

In [54]:
test_x

array([[    0, 17249],
       [    1, 18015],
       [    2, 14196],
       ...,
       [14135, 19938],
       [14136, 20214],
       [14137, 15542]])

In [55]:
import math
print(rating_mat.shape)
bits = math.ceil(math.log(rating_mat.shape[0],2))
print("rating_mat contains log2(rating_mat.shape[0]) = {} bits".format(bits))

(20316, 20316)
rating_mat contains log2(rating_mat.shape[0]) = 15 bits


In [None]:
def create_test_no_interactions_stratified_users(df: pd.DataFrame,
                                           test_x: np.ndarray,
                                           train_x: np.ndarray, 
                                           dims_usuarios_productos: Tuple[int, int], 
                                           num_estratos: int,
                                           num_samples: int,
                                           le_producto) -> np.ndarray:

  """
  Función: create_test_no_interactions_stratified

  Argumentos:
  - df: un objeto de tipo DataFrame de pandas que contiene las interacciones usuario-producto en entrenamiento.
  - test_x: un objeto de tipo ndarray de numpy que contiene las interacciones usuario-producto de prueba.
  - train_x: un objeto de tipo ndarray de numpy que contiene las interacciones usuario-producto en entrenamiento.
  - dims_usuarios_productos: una tupla de enteros que contiene el número de usuarios y productos en el conjunto de datos.
  - num_estratos: un entero que indica el número de estratos en los que dividir los productos según su popularidad.
  - num_samples: un entero que indica el número de muestras aleatorias que se tomarán de cada estrato para cada usuario.
  - le_producto: un objeto de tipo LabelEncoder de scikit-learn que se usa para transformar los IDs de los productos.

  Salida:
  - Un objeto de tipo ndarray de numpy que representa la matriz de interacciones usuario-producto para aquellos productos que el usuario no ha interactuado previamente. Cada fila representa una interacción usuario-producto y los valores de cada columna representan el id del usuario y el id del producto, respectivamente.

  """

  # Definicion de mensaje inicial
  mensaje_inicial="Iniciando muestreo estratificado basado en popularidad"
  print(mensaje_inicial)
  print("-"*len(mensaje_inicial), "\n")

  # Identificamos los usuarios presentes en test
  usuarios_test = np.unique(test_x[:, 0])

  # Identificamos el rango de productos disponibles
  total_productos = range(dims_usuarios_productos[0]-1, dims_usuarios_productos[1])

  # Calculamos la probabilidad como el numero de reviews por producto
  product_popularity = df.groupby('asin').size().reset_index(name='popularity')
  product_popularity["asin"]=le_producto.transform(product_popularity["asin"])
  product_popularity.loc[:, col_names["col_id_product"]] = product_popularity[col_names["col_id_product"]].apply(lambda x: x + dims_usuarios_productos[0]).astype('int32')

  # Definimos la funcion de densidad de donde vamos a samplear:
  # Create the density plot
  show_graph=True
  if show_graph==True:
    fig = ff.create_distplot([product_popularity['popularity']], ['Popularity'])
    fig.show()
  # Definimos el numero de estratos en los que dividir a os productos segun popularidad
  #num_estratos = 5
  product_popularity['stratum'] = pd.qcut(product_popularity['popularity'], num_estratos, labels=False)

  # Ordenar los estratos por popularidad media
  stratum_popularity = product_popularity.groupby('stratum')['popularity'].mean()
  stratum_popularity = stratum_popularity.sort_values(ascending=False)

  # Calcular las probabilidades de muestreo. Esta funcion puede ser definida de otra manera
  stratum_probs = stratum_popularity / stratum_popularity.sum()
  stratum_probs = stratum_probs.reset_index(drop=True)

  print("Probabilidad de los estratos: ")
  print( stratum_probs, "\n")

  # Definicion de sample por estrato
  #num_samples = 199
  stratum_test_sizes = (stratum_probs * num_samples)
  stratum_test_sizes = np.round(stratum_test_sizes).astype(int)

  # Si la suma de los tamaños de los estratos es mayor que num_samples, restamos uno al tamaño del estrato más grande
  while stratum_test_sizes.sum() > num_samples:
      stratum_test_sizes[stratum_test_sizes.argmin()] -= 1

  # Si la suma de los tamaños de los estratos es menor que num_samples, sumamos uno al tamaño del estrato más grande
  while stratum_test_sizes.sum() < num_samples:
      stratum_test_sizes[stratum_test_sizes.argmax()] += 1

  print("Tamaño de muestreo de los estratos de los estratos: " )
  print( stratum_test_sizes, "\n")

  # Definimos la matriz solucion y rellenamos
  zero_positions = np.zeros((num_samples * len(usuarios_test), 2))
  start_index = 0

  # Iteramos sobre los usuarios de prueba
  for usuario in tqdm(usuarios_test):
    
    # Iteramos sobre cada estrato y su tamaño correspondiente
    for stratum, size in stratum_test_sizes.items():
      
      # Identificamos los productos en los que el usuario ha interactuado previamente en entrenamiento y el producto de gt
      productos_train = np.unique(train_x[train_x[:, 0] == usuario][:, 1])
      productos_test = np.unique(test_x[test_x[:, 0] == usuario][:, 1])
      productos = np.concatenate((productos_train, productos_test))

      # Identificamos los productos del estrato actual
      products_in_stratum = product_popularity[product_popularity['stratum'] == stratum]['asin']

      # Filtramos los productos del estrato actual que el usuario ha interactuado previamente
      products_in_stratum_filtered = np.setdiff1d(products_in_stratum, productos)

      # Tomamos una muestra aleatoria sin reemplazo de los productos filtrados del estrato actual
      sampled_products = np.random.choice(products_in_stratum_filtered, size, replace=False)

      # Asignamos los índices de inicio y fin para los ceros que representan la interacción usuario-producto
      end_index = start_index + size
      zero_positions[start_index:end_index, 0] = usuario
      zero_positions[start_index:end_index, 1] = sampled_products
      start_index = end_index

  # Convertimos los valores de cero posiciones a entero
  zero_positions = zero_positions.astype(int)

  return(zero_positions)

In [57]:
def zero_positions_mode(mode, rating_mat, train_x, test_x, dims):
   
    if mode == 0:
        save_data_configuration("\n"+"#"*4+"  zero_positions: all data  "+"#"*4)
        return np.asarray(np.where(rating_mat.A==0)).T
    elif mode == 1:
        zero_true_matrix = np.where(rating_mat.A==0)
        save_data_configuration("\n"+"#"*4+"  zero_positions: all data separated by rows  "+"#"*4)
        return np.asarray([zero_true_matrix[0],zero_true_matrix[1]]).T
    else:
        save_data_configuration("\n"+"#"*4+"  zero_positions: random sampling  "+"#"*4)
        zp = create_test_no_interactions(train_x, test_x, dims,  num_samples=199)
        return zp

In [58]:
zp = zero_positions_mode(2, rating_mat, train_x, test_x, dims)
num_samples = 199
num_estratos = 5




le_product = LabelEncoder()
le_product.fit(data[col_names["col_id_product"]])
zp = create_test_no_interactions_stratified_users(df, test_x, train_x, dims, num_estratos, num_samples, le_product, show_graph=True)



100%|██████████| 14138/14138 [02:09<00:00, 109.03it/s]


In [26]:
# zero_positions = np.asarray(np.where(rating_mat.A==0)).T
zero_positions = zero_positions_mode(2, rating_mat, train_x, test_x, dims)
print(save_data_configuration(str(zero_positions.shape)+"\n"))
zero_positions

100%|██████████| 14138/14138 [01:50<00:00, 127.80it/s]

(2813462, 2)





array([[    0, 16821],
       [    0, 17652],
       [    0, 16736],
       ...,
       [14137, 19744],
       [14137, 16925],
       [14137, 18058]])

In [27]:
items2compute = []
for user in trange(dims[0]):
    aux = zero_positions[zero_positions[:, 0] == user][:, 1]
    items2compute.append(aux[aux >= dims[0]])
items2compute[0]

100%|██████████| 14138/14138 [02:13<00:00, 105.97it/s]


array([16821, 17652, 16736, 17241, 18263, 20013, 17964, 18347, 15142,
       16526, 18892, 15614, 18249, 16315, 19680, 16730, 17709, 15693,
       20017, 17571, 15748, 20011, 18969, 19803, 15554, 16150, 15359,
       19386, 17178, 17214, 19783, 14512, 17261, 14644, 18805, 15822,
       17527, 15019, 18958, 15953, 19044, 15149, 19653, 14901, 15285,
       16106, 18011, 19844, 14981, 18264, 14798, 16176, 15591, 15400,
       19800, 18471, 16154, 18479, 14992, 17000, 14778, 15133, 18532,
       19870, 16614, 17431, 15426, 15080, 18848, 18655, 14770, 19394,
       19453, 15073, 17937, 20225, 16991, 17896, 15233, 19310, 17926,
       14616, 16958, 19185, 15611, 19987, 19451, 20118, 18547, 16427,
       16521, 17490, 18361, 14268, 17798, 19048, 19695, 14380, 18990,
       15784, 18623, 16183, 19449, 17179, 16706, 14181, 19820, 15718,
       14439, 17253, 16456, 16160, 16442, 14664, 18420, 19165, 16288,
       15992, 15456, 16876, 20298, 17851, 15790, 18081, 14581, 15056,
       19359, 19397,

In [28]:
def build_test_set(itemsnoninteracted:list, gt_test_interactions: np.ndarray) -> list:
    #max_users, max_items = dims # number users (943), number items (2625)
    test_set = []
    for pair, negatives in tqdm(zip(gt_test_interactions, itemsnoninteracted), desc="BUILDING TEST SET..."):
        # APPEND TEST SETS FOR SINGLE USER
        negatives = np.delete(negatives, np.where(negatives == pair[1]))
        single_user_test_set = np.vstack([pair, ] * (len(negatives)+1))
        single_user_test_set[:, 1][1:] = negatives
        test_set.append(single_user_test_set.copy())
    return test_set

test_x = build_test_set(items2compute, test_x)
test_x[0]

BUILDING TEST SET...: 14138it [00:04, 3099.17it/s]


array([[    0, 17249],
       [    0, 16821],
       [    0, 17652],
       [    0, 16736],
       [    0, 17241],
       [    0, 18263],
       [    0, 20013],
       [    0, 17964],
       [    0, 18347],
       [    0, 15142],
       [    0, 16526],
       [    0, 18892],
       [    0, 15614],
       [    0, 18249],
       [    0, 16315],
       [    0, 19680],
       [    0, 16730],
       [    0, 17709],
       [    0, 15693],
       [    0, 20017],
       [    0, 17571],
       [    0, 15748],
       [    0, 20011],
       [    0, 18969],
       [    0, 19803],
       [    0, 15554],
       [    0, 16150],
       [    0, 15359],
       [    0, 19386],
       [    0, 17178],
       [    0, 17214],
       [    0, 19783],
       [    0, 14512],
       [    0, 17261],
       [    0, 14644],
       [    0, 18805],
       [    0, 15822],
       [    0, 17527],
       [    0, 15019],
       [    0, 18958],
       [    0, 15953],
       [    0, 19044],
       [    0, 15149],
       [   

# Building Factorization Machines model

In [29]:
class FM_operation(torch.nn.Module):

    def __init__(self, 
                 reduce_sum: bool=True) -> None:
        super().__init__()
        self.reduce_sum = reduce_sum

    def forward(self,
                x: torch.Tensor) -> float:
        """
        :param x: Float tensor of size ``(batch_size, num_fields, embed_dim)``
        """
        # square_of_sum = np.sum(x, dim=1) ** 2 # ...
        # sum_of_square = np.sum(x ** 2, dim=1) # ...
        
        square_of_sum = torch.pow(torch.sum(x, dim=1),2)
        sum_of_square = torch.sum(torch.pow(x,2), dim=1)
        ix = square_of_sum - sum_of_square
        if self.reduce_sum:
            ix = torch.sum(ix, dim=1, keepdim=True)
        return 0.5 * ix
        

In [30]:
class FactorizationMachineModel(torch.nn.Module):
    """
    A pytorch implementation of Factorization Machine.

    Reference:
        S Rendle, Factorization Machines, 2010.
    """

    def __init__(self, 
                 field_dims: list,
                 embed_dim: float) -> None:
        super().__init__()
        self.linear = torch.nn.Linear(len(field_dims), 1)
        self.embedding = torch.nn.Embedding(field_dims[-1], embed_dim)
        self.fm = FM_operation(reduce_sum=True)

        torch.nn.init.xavier_uniform_(self.embedding.weight.data)

    def forward(self, interaction_pairs: torch.Tensor) -> torch.Tensor:
        """
        :param interaction_pairs: Long tensor of size ``(batch_size, num_fields)``
        """
        out = self.linear(interaction_pairs.float()) + self.fm(self.embedding(interaction_pairs))
        return out.squeeze(1)
        
    def predict(self, 
                interactions: np.ndarray,
                device: torch.device) -> torch.Tensor:
        # return the score, inputs are numpy arrays, outputs are tensors
        test_interactions = torch.from_numpy(interactions).to(dtype=torch.long, device=device) #, dtype=torch.long)
        output_scores = self.forward(test_interactions)
        return output_scores

# Pipeline functions

## Training

In [31]:
from statistics import mean

def train_one_epoch(model: torch.nn.Module,
                    optimizer: torch.optim,
                    data_loader: torch.utils.data.DataLoader,
                    criterion: torch.nn.functional,
                    device: torch.device) -> float:
    model.train()
    total_loss = []

    for i, (interactions, targets) in enumerate(data_loader):
        interactions = interactions.to(device)
        targets = targets.to(device)

        predictions = model(interactions)
    
        loss = criterion(predictions, targets.float())
        model.zero_grad()
        loss.backward()
        optimizer.step()
        total_loss.append(loss.item())

    return mean(total_loss)

# Define metrics

In [32]:
import math

def getHitRatio(recommend_list: list,
                gt_item: int) -> bool:
    if gt_item in recommend_list:
        return 1
    else:
        return 0

def getNDCG(recommend_list: list,
            gt_item: int) -> float:
    idx = np.where(recommend_list == gt_item)[0]
    if len(idx) > 0:
        return math.log(2)/math.log(idx+2)
    else:
        return 0

# Inference


In [33]:
def test(model: torch.nn.Module,
         test_x: np.ndarray,
         device: torch.device,
         topk: int=10) -> Tuple[float, float]:
    # Test the HR and NDCG for the model @topK
    model.eval()

    HR, NDCG = [], []
    for user_test in test_x:
        gt_item = user_test[0][1]
        predictions = model.predict(user_test, device)
        _, indices = torch.topk(predictions, topk)
        recommend_list = user_test[indices.cpu().detach().numpy()][:, 1]

        HR.append(getHitRatio(recommend_list, gt_item))
        NDCG.append(getNDCG(recommend_list, gt_item))
    return mean(HR), mean(NDCG)

# PIPELINE
## Defining the model, the loss and the optimizer



In [34]:
dims = train_dataset.dims
model = FactorizationMachineModel(dims, hparams['hidden_size']).to(device)

criterion = torch.nn.BCEWithLogitsLoss(reduction='mean')
optimizer = torch.optim.Adam(params=model.parameters(), lr=hparams['learning_rate'])

## Random evaluation

In [35]:
import random
class RandomModel(torch.nn.Module):
    def __init__(self, 
                 dims: list) -> None:
        super(RandomModel, self).__init__()
        """
        Simple random based recommender system
        """
        self.all_items = list(range(dims[0], dims[1]))

    def forward(self) -> None:
        pass

    def predict(self,
                interactions: np.ndarray,
                device=None) -> torch.Tensor:
        return torch.FloatTensor(random.sample(self.all_items, len(interactions)))

rnd_model = RandomModel(dims)

## Final pipeline

In [36]:
data_loader = DataLoader(train_dataset, batch_size=hparams['batch_size'], shuffle=True, num_workers=0)

# Start training the model

In [38]:
# DO EPOCHS NOW
topk = 10
for epoch_i in range(hparams['num_epochs']):
    #data_loader.dataset.negative_sampling()
    train_loss = train_one_epoch(model, optimizer, data_loader, criterion, device)
    hr, ndcg = test(model, test_x, device, topk=topk)

    print(save_data_configuration(f'epoch {epoch_i}:'))
    print(save_data_configuration(f'training loss = {train_loss:.4f} | Eval: HR@{topk} = {hr:.4f}, NDCG@{topk} = {ndcg:.4f} '))
    print('\n')
 
    tb_fm.add_scalar('train/loss', train_loss, epoch_i)
    tb_fm.add_scalar('eval/HR@{topk}', hr, epoch_i)
    tb_fm.add_scalar('eval/NDCG@{topk}', ndcg, epoch_i)

    hr, ndcg = test(rnd_model, test_x, device, topk=topk)
    tb_rnd.add_scalar('eval/HR@{topk}', hr, epoch_i)
    tb_rnd.add_scalar('eval/NDCG@{topk}', ndcg, epoch_i)

epoch 0:
training loss = 309.8556 | Eval: HR@10 = 0.0395, NDCG@10 = 0.0181 


epoch 1:
training loss = 0.5132 | Eval: HR@10 = 0.1502, NDCG@10 = 0.0949 


epoch 2:
training loss = 0.5054 | Eval: HR@10 = 0.1086, NDCG@10 = 0.0683 


epoch 3:
training loss = 0.4907 | Eval: HR@10 = 0.1318, NDCG@10 = 0.0809 


epoch 4:
training loss = 0.4683 | Eval: HR@10 = 0.1715, NDCG@10 = 0.1036 


epoch 5:
training loss = 0.4477 | Eval: HR@10 = 0.1937, NDCG@10 = 0.1162 


epoch 6:
training loss = 0.4291 | Eval: HR@10 = 0.2109, NDCG@10 = 0.1257 


epoch 7:
training loss = 0.4143 | Eval: HR@10 = 0.2266, NDCG@10 = 0.1335 


epoch 8:
training loss = 0.4038 | Eval: HR@10 = 0.2379, NDCG@10 = 0.1396 


epoch 9:
training loss = 0.3973 | Eval: HR@10 = 0.2465, NDCG@10 = 0.1442 


epoch 10:
training loss = 0.3923 | Eval: HR@10 = 0.2525, NDCG@10 = 0.1478 


epoch 11:
training loss = 0.3867 | Eval: HR@10 = 0.2494, NDCG@10 = 0.1461 




# Visualization

In [42]:
!kill 2820

"kill" no se reconoce como un comando interno o externo,
programa o archivo por lotes ejecutable.


In [None]:
%tensorboard --logdir runs_random