# LightGCN with MSD Dataset and StrongGeneralization Scenario

In this notebook, the implementation of LightGCN in RecPack and the experimental part to generate the results of the algorithm will be presented. 
The notebook contains:
1. The implementation of LightGCN in RecPack.
2. The 10% of MSD Dataset from RecPack and the StrongGeneralization Scenario has been used to split the data.
3. The StrongGeneralization Scenario to split the data.
4. The RecPack Pipeline Builder to run the experiments, including the splitted dataset, the algorithms and metrics to run. Hyperparameter has been performed in the Pipeline.

Please make sure you have installed all the latest libraries in your Python environment, in order to have a successful run of the code.

In [None]:
!pip install recpack

In [2]:
import torch
from tqdm import tqdm

In [3]:
from torch_sparse import SparseTensor, matmul

## LightGCN implementation in RecPack

In [4]:
import torch
import torch.nn as nn
from torch_geometric.nn import MessagePassing
from torch_geometric.utils import add_self_loops, degree
import time
from typing import List, Tuple, Optional
from tqdm import tqdm
from recpack.algorithms.base import TorchMLAlgorithm
from recpack.matrix.interaction_matrix import InteractionMatrix
from recpack.matrix import to_csr_matrix
from recpack.algorithms.loss_functions import bpr_loss
from recpack.algorithms.samplers import PositiveNegativeSampler
from scipy.sparse import csr_matrix, lil_matrix
import torch.optim as optim
import logging

# logger = logging.getLogger(__name__)

# LightGCN model definition using MessagePassing from PyTorch Geometric
class LightGCN(MessagePassing):
    def __init__(self, num_users, num_items, embedding_dim=64, K=3, add_self_loops=False):
        """
        Initialize the LightGCN model with user and item embeddings.

        Args:
            num_users (int): Number of users.
            num_items (int): Number of items.
            embedding_dim (int): Dimension of the embedding vectors.
            K (int): Number of propagation layers.
            add_self_loops (bool): Whether to add self-loops to the adjacency matrix.
        """
        super(LightGCN, self).__init__(aggr='add')
        self.num_users, self.num_items = num_users, num_items
        self.embedding_dim, self.K = embedding_dim, K
        self.add_self_loops = add_self_loops

        # Initialize user and item embeddings
        self.users_emb = nn.Embedding(num_embeddings=self.num_users, embedding_dim=self.embedding_dim)
        self.items_emb = nn.Embedding(num_embeddings=self.num_items, embedding_dim=self.embedding_dim)

        # Initialize embeddings with normal distribution
        nn.init.normal_(self.users_emb.weight, std=0.1)
        nn.init.normal_(self.items_emb.weight, std=0.1)

    def forward(self, edge_index: SparseTensor):
        """
        Forward pass for the LightGCN model.

        Args:
            edge_index (SparseTensor): Sparse tensor representing the adjacency matrix.

        Returns:
            Tuple: Final user and item embeddings after propagation, and the initial embeddings.
        """
        if self.add_self_loops:
            edge_index, _ = add_self_loops(edge_index, num_nodes=self.num_users + self.num_items)
        
        # Normalize the adjacency matrix
        edge_index_norm = self.normalize_adj(edge_index)
        
        # Concatenate user and item embeddings
        emb_0 = torch.cat([self.users_emb.weight, self.items_emb.weight])
        embs = [emb_0]
        emb_k = emb_0

        # Perform K propagation steps
        for i in range(self.K):
            # Preventing CUDA/Library version error
            try:
                emb_k = self.propagate(edge_index_norm, x=emb_k)
            except RuntimeError as e:
                break
            embs.append(emb_k)

        # Stack and average embeddings from each propagation step
        embs = torch.stack(embs, dim=1)
        emb_final = torch.mean(embs, dim=1)

        # Split the final embeddings back into user and item embeddings
        users_emb_final, items_emb_final = torch.split(emb_final, [self.num_users, self.num_items])

        return users_emb_final, self.users_emb.weight, items_emb_final, self.items_emb.weight

    def message(self, x_j: torch.Tensor) -> torch.Tensor:
        """
        Message function that aggregates messages from neighboring nodes.

        Args:
            x_j (torch.Tensor): Features of the neighboring nodes.

        Returns:
            torch.Tensor: Aggregated messages.
        """
        return x_j

    def message_and_aggregate(self, adj_t: SparseTensor, x: torch.Tensor) -> torch.Tensor:
        """
        Message and aggregate function using matrix multiplication.

        Args:
            adj_t (SparseTensor): Transposed adjacency matrix.
            x (torch.Tensor): Node features.

        Returns:
            torch.Tensor: Result of multiplying adjacency matrix with node features.
        """
        return matmul(adj_t, x)

    def normalize_adj(self, edge_index: SparseTensor) -> SparseTensor:
        """
        Normalize the adjacency matrix.

        Args:
            edge_index (SparseTensor): Sparse tensor representing the adjacency matrix.

        Returns:
            SparseTensor: Normalized adjacency matrix.
        """
        row, col, value = edge_index.coo()
        row = row.long()  # Ensure indices are long type
        col = col.long()  # Ensure indices are long type
        deg = degree(row, num_nodes=edge_index.size(0), dtype=value.dtype)
        deg_inv_sqrt = deg.pow(-0.5)
        deg_inv_sqrt[deg_inv_sqrt == float('inf')] = 0
        value = deg_inv_sqrt[row] * value * deg_inv_sqrt[col]
        return SparseTensor(row=row, col=col, value=value, sparse_sizes=edge_index.sparse_sizes())


In [5]:
from recpack.algorithms.base import TorchMLAlgorithm
from recpack.matrix import Matrix
from recpack.matrix.interaction_matrix import InteractionMatrix
from recpack.algorithms.loss_functions import bpr_loss, bpr_max_loss
from recpack.algorithms.samplers import PositiveNegativeSampler
from recpack.algorithms.stopping_criterion import (
    EarlyStoppingException,
    StoppingCriterion,
)
from typing import List, Tuple, Optional
import numpy as np
from scipy.sparse import csr_matrix, lil_matrix, coo_matrix
import torch
import torch.optim as optim
import tempfile
import time
import logging

logger = logging.getLogger(__name__)

# Implementation of the LightGCN algorithm using TorchMLAlgorithm as a base class
class LightGCNAlgorithm(TorchMLAlgorithm):
    def __init__(
        self,
        batch_size: int = 256,
        max_epochs: int = 100,
        learning_rate: float = 0.001,
        embedding_dim: int = 64,
        n_layers: int = 3,
        reg_weight: float = 1e-5,
        stopping_criterion: str = "bpr",
        stop_early: bool = True,
        max_iter_no_change: int = 5,
        min_improvement: float = 0.01,
        seed: Optional[int] = None,
        save_best_to_file: bool = False,
        keep_last: bool = False,
        predict_topK: Optional[int] = None,
        validation_sample_size: Optional[int] = None,
    ):
        """
        Initialize the LightGCNAlgorithm with various hyperparameters.

        Args:
            batch_size (int): Number of samples per batch.
            max_epochs (int): Maximum number of training epochs.
            learning_rate (float): Learning rate for the optimizer.
            embedding_dim (int): Dimension of the embedding vectors.
            n_layers (int): Number of LightGCN layers.
            reg_weight (float): Regularization weight.
            stopping_criterion (str): Criterion to stop training early.
            stop_early (bool): Whether to enable early stopping.
            max_iter_no_change (int): Maximum iterations with no improvement for early stopping.
            min_improvement (float): Minimum improvement required for early stopping.
            seed (Optional[int]): Random seed for reproducibility.
            save_best_to_file (bool): Whether to save the best model to a file.
            keep_last (bool): Whether to keep the last model.
            predict_topK (Optional[int]): Number of top-K predictions to consider.
            validation_sample_size (Optional[int]): Size of the validation sample.
        """
        self.embedding_dim = embedding_dim
        self.n_layers = n_layers
        self.reg_weight = reg_weight
        super().__init__(
            batch_size=batch_size,
            max_epochs=max_epochs,
            learning_rate=learning_rate,
            stopping_criterion=stopping_criterion,
            stop_early=stop_early,
            max_iter_no_change=max_iter_no_change,
            min_improvement=min_improvement,
            seed=seed,
            save_best_to_file=save_best_to_file,
            keep_last=keep_last,
            predict_topK=predict_topK,
            validation_sample_size=validation_sample_size,
        )
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    def _init_model(self, train: InteractionMatrix) -> None:
        """
        Initialize the LightGCN model and optimizer.

        Args:
            train (InteractionMatrix): The training interaction matrix.
        """
        num_users, num_items = train.shape
        self.model_ = LightGCN(num_users, num_items, self.embedding_dim, self.n_layers).to(self.device)
        self.optimizer = optim.Adam(self.model_.parameters(), lr=self.learning_rate)

    def _create_sparse_graph(self, interaction_matrix: csr_matrix, num_users: int, num_items: int) -> SparseTensor:
        """
        Create a sparse graph from the interaction matrix.

        Args:
            interaction_matrix (csr_matrix): The interaction matrix in CSR format.
            num_users (int): Number of users.
            num_items (int): Number of items.

        Returns:
            SparseTensor: A sparse tensor representing the graph.
        """
        coo = interaction_matrix.tocoo()
        row = torch.tensor(coo.row, dtype=torch.long)
        col = torch.tensor(coo.col, dtype=torch.long)
        value = torch.tensor(coo.data, dtype=torch.float32)
        print(f"Graph - Rows: {row.shape}, Cols: {col.shape}, Values: {value.shape}")  # Debugging information
        return SparseTensor(row=row, col=col, value=value, sparse_sizes=(num_users + num_items, num_users + num_items)).to(self.device)

    def _train_epoch(self, train: csr_matrix) -> List[float]:
        """
        Train the model for one epoch.

        Args:
            train (csr_matrix): The training interaction matrix.

        Returns:
            List[float]: A list of losses for each batch.
        """
        self.model_.train()
        graph = self._create_sparse_graph(train, train.shape[0], train.shape[1]).to(self.device)
        total_loss = 0
        losses = []

        sampler = PositiveNegativeSampler(num_negatives=1, batch_size=self.batch_size)

        for user_indices, pos_item_indices, neg_item_indices in sampler.sample(train):
            user_indices = torch.tensor(user_indices).to(self.device)
            pos_item_indices = torch.tensor(pos_item_indices).to(self.device)
            neg_item_indices = torch.tensor(neg_item_indices).to(self.device).squeeze()

            self.optimizer.zero_grad()
            users_emb_final, _, items_emb_final, _ = self.model_(graph)  # Call model only once
            pos_scores = users_emb_final[user_indices] @ items_emb_final[pos_item_indices].t()
            neg_scores = users_emb_final[user_indices] @ items_emb_final[neg_item_indices].t()

            loss = bpr_loss(pos_scores, neg_scores)

            if torch.isnan(loss).any() or torch.isinf(loss).any():
                continue

            loss.backward()
            torch.nn.utils.clip_grad_norm_(self.model_.parameters(), max_norm=1.0)  # Gradient clipping
            self.optimizer.step()

            total_loss += loss.item()
            losses.append(loss.item())

        if len(losses) == 0:
            return [float('nan')]

        return losses

    def _batch_predict(self, X: csr_matrix, users: List[int]) -> csr_matrix:
        """
        Make batch predictions for a list of users.

        Args:
            X (csr_matrix): The interaction matrix.
            users (List[int]): List of user indices to make predictions for.

        Returns:
            csr_matrix: A sparse matrix with the prediction scores.
        """
        self.model_.eval()
        graph = self._create_sparse_graph(X, X.shape[0], X.shape[1]).to(self.device)
        user_indices = torch.tensor(users).to(self.device)
        item_indices = torch.arange(X.shape[1]).to(self.device)
        
        with torch.no_grad():
            user_emb_final, _, item_emb_final, _ = self.model_(graph)
            scores = user_emb_final[user_indices] @ item_emb_final.t()
            scores = scores.cpu().numpy()
        
        result = lil_matrix((X.shape[0], X.shape[1]))
        for i, user in enumerate(users):
            result[user] = scores[i]
        
        return result.tocsr()

    def fit(self, X: csr_matrix, validation_data: tuple):
        """
        Fit the model to the training data.

        Args:
            X (csr_matrix): The training interaction matrix.
            validation_data (tuple): Validation data used for early stopping.
        """
        super().fit(X, validation_data)

In [6]:
from recpack.datasets import Netflix, DummyDataset
from recpack.pipelines import PipelineBuilder
from recpack.scenarios import StrongGeneralization
from recpack.pipelines import ALGORITHM_REGISTRY
import pandas as pd

In [7]:
ALGORITHM_REGISTRY.register("LightGCNAlgorithm", LightGCNAlgorithm)

## RecPack Dataset Importing

In [20]:
from recpack.datasets import MillionSongDataset
dataset = MillionSongDataset()

In [21]:
dataset.fetch_dataset()

In [22]:
dataset

<recpack.datasets.million_song_dataset.MillionSongDataset at 0x7f8de57ac490>

In [23]:
df = dataset._load_dataframe()

## Datasets without Timestamps sampling

In [24]:
# Count interactions per user and per song
user_interactions = df['userId'].value_counts().reset_index()
user_interactions.columns = ['userId', 'user_interactions']

song_interactions = df['songId'].value_counts().reset_index()
song_interactions.columns = ['songId', 'song_interactions']

# Merge the interaction counts back to the original dataframe
df = df.merge(user_interactions, on='userId')
df = df.merge(song_interactions, on='songId')

# Calculate a combined interaction score
df['interaction_score'] = df['user_interactions'] + df['song_interactions']

# Rank based on the interaction score
df['rank'] = df['interaction_score'].rank(method='first', ascending=False)

# Select the top 10%
filtered_df = df[df['rank'] <= len(df) * 0.1]

# Drop helper columns
filtered_df = filtered_df.drop(columns=['user_interactions', 'song_interactions', 'interaction_score', 'rank'])

In [30]:
df

Unnamed: 0,userId,songId
0,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SOAKIMP12A8C130995
1,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SOAPDEY12A81C210A9
2,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SOBBMDR12A8C13253B
3,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SOBBMDR12A8C13253B
4,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SOBFNSP12AF72A0E22
...,...,...
138680238,b7815dbb206eb2831ce0fe040d0aa537e2e800f7,SOUSMXX12AB0185C24
138680239,b7815dbb206eb2831ce0fe040d0aa537e2e800f7,SOWYSKH12AF72A303A
138680240,b7815dbb206eb2831ce0fe040d0aa537e2e800f7,SOWYSKH12AF72A303A
138680241,b7815dbb206eb2831ce0fe040d0aa537e2e800f7,SOWYSKH12AF72A303A


In [25]:
filtered_df

Unnamed: 0,userId,songId
28,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SOFRQTD12A81C233C0
59,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SOMGIYR12AB0187973
60,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SOMGIYR12AB0187973
61,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SOMGIYR12AB0187973
62,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SOMGIYR12AB0187973
...,...,...
138680210,b7815dbb206eb2831ce0fe040d0aa537e2e800f7,SOOFYTN12A6D4F9B35
138680211,b7815dbb206eb2831ce0fe040d0aa537e2e800f7,SOOFYTN12A6D4F9B35
138680212,b7815dbb206eb2831ce0fe040d0aa537e2e800f7,SOOFYTN12A6D4F9B35
138680237,b7815dbb206eb2831ce0fe040d0aa537e2e800f7,SOUJVIT12A8C1451C1


## Dataset Preprocessing to Interaction Matrix

In [27]:
from recpack.matrix import InteractionMatrix
from recpack.preprocessing.preprocessors import DataFramePreprocessor
item_ix = 'songId'
user_ix = 'userId'

preprocessor = DataFramePreprocessor(item_ix=item_ix, user_ix=user_ix)
interaction_matrix = preprocessor.process(filtered_df)

  0%|          | 0/13868024 [00:00<?, ?it/s]

  0%|          | 0/13868024 [00:00<?, ?it/s]

In [45]:
interaction_matrix.values

<Compressed Sparse Row sparse matrix of dtype 'int32'
	with 3387777 stored elements and shape (663854, 127)>

In [27]:
interaction_matrix.num_interactions

13868024

In [28]:
interaction_matrix.properties

InteractionMatrix.InteractionMatrixProperties(num_users=663854, num_items=127, has_timestamps=False)

## StrongGeneralization Scenario Splitting of Data

In [28]:
scenario = StrongGeneralization(frac_users_train=0.8, frac_interactions_in=0.8, validation=True)
scenario.split(interaction_matrix)

0it [00:00, ?it/s]

0it [00:00, ?it/s]

## Experimental RecPack Pipeline

In [29]:
pipeline_builder = PipelineBuilder()
ok = (scenario._validation_data_in, scenario._validation_data_out)
pipeline_builder.set_data_from_scenario(scenario)


# Add the baseline algorithms
#pipeline_builder.add_algorithm('ItemKNN', grid={'K': [100, 200, 400, 800]})
#pipeline_builder.add_algorithm('EASE', grid={'l2': [10, 100, 1000], 'alpha': [0, 0.1, 0.5]})

# Add our LightGCN algorithm
pipeline_builder.add_algorithm(
    'LightGCNAlgorithm',
    grid={
        'learning_rate': [0.1, 0.01, 0.001],
        'embedding_dim': [100, 200, 400]
    },
    params={
        'max_epochs': 5,
        'batch_size': 1024,
        'n_layers': 3
    }
)

# Add NDCG, Recall, and HR metrics to be evaluated at 10, 20, and 50
pipeline_builder.add_metric('NDCGK', [10, 20, 50])
pipeline_builder.add_metric('RecallK', [10, 20, 50])
pipeline_builder.add_metric('HitK', [10, 20, 50])

# Set the optimisation metric
pipeline_builder.set_optimisation_metric('RecallK', 20)

# Construct pipeline
pipeline = pipeline_builder.build()

# Run pipeline, will first do optimisation, and then evaluation
pipeline.run()



  0%|          | 0/1 [00:00<?, ?it/s]

Graph - Rows: torch.Size([1891951]), Cols: torch.Size([1891951]), Values: torch.Size([1891951])


  user_indices = torch.tensor(user_indices).to(self.device)
  pos_item_indices = torch.tensor(pos_item_indices).to(self.device)
  neg_item_indices = torch.tensor(neg_item_indices).to(self.device).squeeze()


2024-07-31 07:09:42,201 - base - recpack - INFO - Processed epoch 0 in 30.07 s.Batch Training Loss = 0.6258
Graph - Rows: torch.Size([6752]), Cols: torch.Size([6752]), Values: torch.Size([6752])
Graph - Rows: torch.Size([6361]), Cols: torch.Size([6361]), Values: torch.Size([6361])
Graph - Rows: torch.Size([6575]), Cols: torch.Size([6575]), Values: torch.Size([6575])
Graph - Rows: torch.Size([6928]), Cols: torch.Size([6928]), Values: torch.Size([6928])
Graph - Rows: torch.Size([7263]), Cols: torch.Size([7263]), Values: torch.Size([7263])
Graph - Rows: torch.Size([6850]), Cols: torch.Size([6850]), Values: torch.Size([6850])
Graph - Rows: torch.Size([6858]), Cols: torch.Size([6858]), Values: torch.Size([6858])
Graph - Rows: torch.Size([6815]), Cols: torch.Size([6815]), Values: torch.Size([6815])
Graph - Rows: torch.Size([6797]), Cols: torch.Size([6797]), Values: torch.Size([6797])
Graph - Rows: torch.Size([6403]), Cols: torch.Size([6403]), Values: torch.Size([6403])
Graph - Rows: torch.Si

  user_indices = torch.tensor(user_indices).to(self.device)
  pos_item_indices = torch.tensor(pos_item_indices).to(self.device)
  neg_item_indices = torch.tensor(neg_item_indices).to(self.device).squeeze()


2024-07-31 07:31:33,481 - base - recpack - INFO - Processed epoch 0 in 29.77 s.Batch Training Loss = 0.5605
Graph - Rows: torch.Size([6752]), Cols: torch.Size([6752]), Values: torch.Size([6752])
Graph - Rows: torch.Size([6361]), Cols: torch.Size([6361]), Values: torch.Size([6361])
Graph - Rows: torch.Size([6575]), Cols: torch.Size([6575]), Values: torch.Size([6575])
Graph - Rows: torch.Size([6928]), Cols: torch.Size([6928]), Values: torch.Size([6928])
Graph - Rows: torch.Size([7263]), Cols: torch.Size([7263]), Values: torch.Size([7263])
Graph - Rows: torch.Size([6850]), Cols: torch.Size([6850]), Values: torch.Size([6850])
Graph - Rows: torch.Size([6858]), Cols: torch.Size([6858]), Values: torch.Size([6858])
Graph - Rows: torch.Size([6815]), Cols: torch.Size([6815]), Values: torch.Size([6815])
Graph - Rows: torch.Size([6797]), Cols: torch.Size([6797]), Values: torch.Size([6797])
Graph - Rows: torch.Size([6403]), Cols: torch.Size([6403]), Values: torch.Size([6403])
Graph - Rows: torch.Si

  user_indices = torch.tensor(user_indices).to(self.device)
  pos_item_indices = torch.tensor(pos_item_indices).to(self.device)
  neg_item_indices = torch.tensor(neg_item_indices).to(self.device).squeeze()


2024-07-31 07:53:49,271 - base - recpack - INFO - Processed epoch 0 in 29.51 s.Batch Training Loss = 0.5770
Graph - Rows: torch.Size([6752]), Cols: torch.Size([6752]), Values: torch.Size([6752])
Graph - Rows: torch.Size([6361]), Cols: torch.Size([6361]), Values: torch.Size([6361])
Graph - Rows: torch.Size([6575]), Cols: torch.Size([6575]), Values: torch.Size([6575])
Graph - Rows: torch.Size([6928]), Cols: torch.Size([6928]), Values: torch.Size([6928])
Graph - Rows: torch.Size([7263]), Cols: torch.Size([7263]), Values: torch.Size([7263])
Graph - Rows: torch.Size([6850]), Cols: torch.Size([6850]), Values: torch.Size([6850])
Graph - Rows: torch.Size([6858]), Cols: torch.Size([6858]), Values: torch.Size([6858])
Graph - Rows: torch.Size([6815]), Cols: torch.Size([6815]), Values: torch.Size([6815])
Graph - Rows: torch.Size([6797]), Cols: torch.Size([6797]), Values: torch.Size([6797])
Graph - Rows: torch.Size([6403]), Cols: torch.Size([6403]), Values: torch.Size([6403])
Graph - Rows: torch.Si

  user_indices = torch.tensor(user_indices).to(self.device)
  pos_item_indices = torch.tensor(pos_item_indices).to(self.device)
  neg_item_indices = torch.tensor(neg_item_indices).to(self.device).squeeze()


2024-07-31 08:16:30,184 - base - recpack - INFO - Processed epoch 0 in 51.11 s.Batch Training Loss = 0.6898
Graph - Rows: torch.Size([6752]), Cols: torch.Size([6752]), Values: torch.Size([6752])
Graph - Rows: torch.Size([6361]), Cols: torch.Size([6361]), Values: torch.Size([6361])
Graph - Rows: torch.Size([6575]), Cols: torch.Size([6575]), Values: torch.Size([6575])
Graph - Rows: torch.Size([6928]), Cols: torch.Size([6928]), Values: torch.Size([6928])
Graph - Rows: torch.Size([7263]), Cols: torch.Size([7263]), Values: torch.Size([7263])
Graph - Rows: torch.Size([6850]), Cols: torch.Size([6850]), Values: torch.Size([6850])
Graph - Rows: torch.Size([6858]), Cols: torch.Size([6858]), Values: torch.Size([6858])
Graph - Rows: torch.Size([6815]), Cols: torch.Size([6815]), Values: torch.Size([6815])
Graph - Rows: torch.Size([6797]), Cols: torch.Size([6797]), Values: torch.Size([6797])
Graph - Rows: torch.Size([6403]), Cols: torch.Size([6403]), Values: torch.Size([6403])
Graph - Rows: torch.Si

  user_indices = torch.tensor(user_indices).to(self.device)
  pos_item_indices = torch.tensor(pos_item_indices).to(self.device)
  neg_item_indices = torch.tensor(neg_item_indices).to(self.device).squeeze()


2024-07-31 08:40:35,949 - base - recpack - INFO - Processed epoch 0 in 50.98 s.Batch Training Loss = 0.5623
Graph - Rows: torch.Size([6752]), Cols: torch.Size([6752]), Values: torch.Size([6752])
Graph - Rows: torch.Size([6361]), Cols: torch.Size([6361]), Values: torch.Size([6361])
Graph - Rows: torch.Size([6575]), Cols: torch.Size([6575]), Values: torch.Size([6575])
Graph - Rows: torch.Size([6928]), Cols: torch.Size([6928]), Values: torch.Size([6928])
Graph - Rows: torch.Size([7263]), Cols: torch.Size([7263]), Values: torch.Size([7263])
Graph - Rows: torch.Size([6850]), Cols: torch.Size([6850]), Values: torch.Size([6850])
Graph - Rows: torch.Size([6858]), Cols: torch.Size([6858]), Values: torch.Size([6858])
Graph - Rows: torch.Size([6815]), Cols: torch.Size([6815]), Values: torch.Size([6815])
Graph - Rows: torch.Size([6797]), Cols: torch.Size([6797]), Values: torch.Size([6797])
Graph - Rows: torch.Size([6403]), Cols: torch.Size([6403]), Values: torch.Size([6403])
Graph - Rows: torch.Si

  user_indices = torch.tensor(user_indices).to(self.device)
  pos_item_indices = torch.tensor(pos_item_indices).to(self.device)
  neg_item_indices = torch.tensor(neg_item_indices).to(self.device).squeeze()


2024-07-31 09:04:44,730 - base - recpack - INFO - Processed epoch 0 in 50.98 s.Batch Training Loss = 0.5706
Graph - Rows: torch.Size([6752]), Cols: torch.Size([6752]), Values: torch.Size([6752])
Graph - Rows: torch.Size([6361]), Cols: torch.Size([6361]), Values: torch.Size([6361])
Graph - Rows: torch.Size([6575]), Cols: torch.Size([6575]), Values: torch.Size([6575])
Graph - Rows: torch.Size([6928]), Cols: torch.Size([6928]), Values: torch.Size([6928])
Graph - Rows: torch.Size([7263]), Cols: torch.Size([7263]), Values: torch.Size([7263])
Graph - Rows: torch.Size([6850]), Cols: torch.Size([6850]), Values: torch.Size([6850])
Graph - Rows: torch.Size([6858]), Cols: torch.Size([6858]), Values: torch.Size([6858])
Graph - Rows: torch.Size([6815]), Cols: torch.Size([6815]), Values: torch.Size([6815])
Graph - Rows: torch.Size([6797]), Cols: torch.Size([6797]), Values: torch.Size([6797])
Graph - Rows: torch.Size([6403]), Cols: torch.Size([6403]), Values: torch.Size([6403])
Graph - Rows: torch.Si

  user_indices = torch.tensor(user_indices).to(self.device)
  pos_item_indices = torch.tensor(pos_item_indices).to(self.device)
  neg_item_indices = torch.tensor(neg_item_indices).to(self.device).squeeze()


2024-07-31 09:29:33,525 - base - recpack - INFO - Processed epoch 0 in 88.80 s.Batch Training Loss = 0.8452
Graph - Rows: torch.Size([6752]), Cols: torch.Size([6752]), Values: torch.Size([6752])
Graph - Rows: torch.Size([6361]), Cols: torch.Size([6361]), Values: torch.Size([6361])
Graph - Rows: torch.Size([6575]), Cols: torch.Size([6575]), Values: torch.Size([6575])
Graph - Rows: torch.Size([6928]), Cols: torch.Size([6928]), Values: torch.Size([6928])
Graph - Rows: torch.Size([7263]), Cols: torch.Size([7263]), Values: torch.Size([7263])
Graph - Rows: torch.Size([6850]), Cols: torch.Size([6850]), Values: torch.Size([6850])
Graph - Rows: torch.Size([6858]), Cols: torch.Size([6858]), Values: torch.Size([6858])
Graph - Rows: torch.Size([6815]), Cols: torch.Size([6815]), Values: torch.Size([6815])
Graph - Rows: torch.Size([6797]), Cols: torch.Size([6797]), Values: torch.Size([6797])
Graph - Rows: torch.Size([6403]), Cols: torch.Size([6403]), Values: torch.Size([6403])
Graph - Rows: torch.Si

  user_indices = torch.tensor(user_indices).to(self.device)
  pos_item_indices = torch.tensor(pos_item_indices).to(self.device)
  neg_item_indices = torch.tensor(neg_item_indices).to(self.device).squeeze()


2024-07-31 09:56:24,253 - base - recpack - INFO - Processed epoch 0 in 96.69 s.Batch Training Loss = 0.5655
Graph - Rows: torch.Size([6752]), Cols: torch.Size([6752]), Values: torch.Size([6752])
Graph - Rows: torch.Size([6361]), Cols: torch.Size([6361]), Values: torch.Size([6361])
Graph - Rows: torch.Size([6575]), Cols: torch.Size([6575]), Values: torch.Size([6575])
Graph - Rows: torch.Size([6928]), Cols: torch.Size([6928]), Values: torch.Size([6928])
Graph - Rows: torch.Size([7263]), Cols: torch.Size([7263]), Values: torch.Size([7263])
Graph - Rows: torch.Size([6850]), Cols: torch.Size([6850]), Values: torch.Size([6850])
Graph - Rows: torch.Size([6858]), Cols: torch.Size([6858]), Values: torch.Size([6858])
Graph - Rows: torch.Size([6815]), Cols: torch.Size([6815]), Values: torch.Size([6815])
Graph - Rows: torch.Size([6797]), Cols: torch.Size([6797]), Values: torch.Size([6797])
Graph - Rows: torch.Size([6403]), Cols: torch.Size([6403]), Values: torch.Size([6403])
Graph - Rows: torch.Si

  user_indices = torch.tensor(user_indices).to(self.device)
  pos_item_indices = torch.tensor(pos_item_indices).to(self.device)
  neg_item_indices = torch.tensor(neg_item_indices).to(self.device).squeeze()


2024-07-31 10:24:28,262 - base - recpack - INFO - Processed epoch 0 in 96.63 s.Batch Training Loss = 0.5664
Graph - Rows: torch.Size([6752]), Cols: torch.Size([6752]), Values: torch.Size([6752])
Graph - Rows: torch.Size([6361]), Cols: torch.Size([6361]), Values: torch.Size([6361])
Graph - Rows: torch.Size([6575]), Cols: torch.Size([6575]), Values: torch.Size([6575])
Graph - Rows: torch.Size([6928]), Cols: torch.Size([6928]), Values: torch.Size([6928])
Graph - Rows: torch.Size([7263]), Cols: torch.Size([7263]), Values: torch.Size([7263])
Graph - Rows: torch.Size([6850]), Cols: torch.Size([6850]), Values: torch.Size([6850])
Graph - Rows: torch.Size([6858]), Cols: torch.Size([6858]), Values: torch.Size([6858])
Graph - Rows: torch.Size([6815]), Cols: torch.Size([6815]), Values: torch.Size([6815])
Graph - Rows: torch.Size([6797]), Cols: torch.Size([6797]), Values: torch.Size([6797])
Graph - Rows: torch.Size([6403]), Cols: torch.Size([6403]), Values: torch.Size([6403])
Graph - Rows: torch.Si

  user_indices = torch.tensor(user_indices).to(self.device)
  pos_item_indices = torch.tensor(pos_item_indices).to(self.device)
  neg_item_indices = torch.tensor(neg_item_indices).to(self.device).squeeze()


2024-07-31 10:51:25,913 - base - recpack - INFO - Processed epoch 0 in 29.48 s.Batch Training Loss = 0.5775
Graph - Rows: torch.Size([6752]), Cols: torch.Size([6752]), Values: torch.Size([6752])
Graph - Rows: torch.Size([6361]), Cols: torch.Size([6361]), Values: torch.Size([6361])
Graph - Rows: torch.Size([6575]), Cols: torch.Size([6575]), Values: torch.Size([6575])
Graph - Rows: torch.Size([6928]), Cols: torch.Size([6928]), Values: torch.Size([6928])
Graph - Rows: torch.Size([7263]), Cols: torch.Size([7263]), Values: torch.Size([7263])
Graph - Rows: torch.Size([6850]), Cols: torch.Size([6850]), Values: torch.Size([6850])
Graph - Rows: torch.Size([6858]), Cols: torch.Size([6858]), Values: torch.Size([6858])
Graph - Rows: torch.Size([6815]), Cols: torch.Size([6815]), Values: torch.Size([6815])
Graph - Rows: torch.Size([6797]), Cols: torch.Size([6797]), Values: torch.Size([6797])
Graph - Rows: torch.Size([6403]), Cols: torch.Size([6403]), Values: torch.Size([6403])
Graph - Rows: torch.Si

## Results

In [30]:
pipeline.get_metrics()

Unnamed: 0,NDCGK_10,NDCGK_20,NDCGK_50,RecallK_10,RecallK_20,RecallK_50,HitK_10,HitK_20,HitK_50
"LightGCNAlgorithm(batch_size=1024,embedding_dim=100,keep_last=False,learning_rate=0.001,max_epochs=5,max_iter_no_change=5,min_improvement=0.01,n_layers=3,predict_topK=None,reg_weight=1e-05,save_best_to_file=False,seed=1033913625,stop_early=True,stopping_criterion=<recpack.algorithms.stopping_criterion.StoppingCriterion object at 0x7f8f1c12da90>,validation_sample_size=None)",0.018736,0.023259,0.034909,0.026918,0.041416,0.086329,0.082021,0.131182,0.279164


In [31]:
pd.DataFrame.from_dict(pipeline.get_metrics()).T

Unnamed: 0,"LightGCNAlgorithm(batch_size=1024,embedding_dim=100,keep_last=False,learning_rate=0.001,max_epochs=5,max_iter_no_change=5,min_improvement=0.01,n_layers=3,predict_topK=None,reg_weight=1e-05,save_best_to_file=False,seed=1033913625,stop_early=True,stopping_criterion=<recpack.algorithms.stopping_criterion.StoppingCriterion object at 0x7f8f1c12da90>,validation_sample_size=None)"
NDCGK_10,0.018736
NDCGK_20,0.023259
NDCGK_50,0.034909
RecallK_10,0.026918
RecallK_20,0.041416
RecallK_50,0.086329
HitK_10,0.082021
HitK_20,0.131182
HitK_50,0.279164
