# Final project: Graph-based recommendation system

In [None]:
# Install required packages.
import os
import torch
os.environ['TORCH'] = torch.__version__
print(torch.__version__)

!pip install -q torch-scatter -f https://data.pyg.org/whl/torch-${TORCH}.html
!pip install -q torch-sparse -f https://data.pyg.org/whl/torch-${TORCH}.html
!pip install -q git+https://github.com/pyg-team/pytorch_geometric.git

1.12.1+cu113
[K     |████████████████████████████████| 7.9 MB 14.6 MB/s 
[K     |████████████████████████████████| 3.5 MB 16.5 MB/s 
[?25h  Building wheel for torch-geometric (setup.py) ... [?25l[?25hdone


In [None]:
# import required modules
import argparse

import numpy as np
import pandas as pd
import torch
import torch.nn.functional as F
from sklearn.model_selection import train_test_split
from torch import Tensor, nn, optim
from torch.utils.data import DataLoader, Dataset
from torch_geometric.utils import to_undirected
from tqdm.auto import tqdm, trange

In [None]:
! git clone https://github.com/gusye1234/LightGCN-PyTorch.git

Cloning into 'LightGCN-PyTorch'...
remote: Enumerating objects: 534, done.[K
remote: Counting objects: 100% (155/155), done.[K
remote: Compressing objects: 100% (22/22), done.[K
remote: Total 534 (delta 140), reused 133 (delta 133), pack-reused 379[K
Receiving objects: 100% (534/534), 81.83 MiB | 14.27 MiB/s, done.
Resolving deltas: 100% (320/320), done.


In [None]:
train_interaction, test_interaction = [], []

# load training data
with open("./LightGCN-PyTorch/data/gowalla/train.txt","r") as f:
    for line in f:
        if len(line) > 0:
            line = line.strip('\n').split(' ')
            items = [int(i) for i in line[1:]]
            uid = [int(line[0])] * len(items)
            train_interaction.extend(list(zip(uid,items)))
            
# load testing data
with open("./LightGCN-PyTorch/data/gowalla/test.txt","r") as f:
    for line in f:
        if len(line) > 0:
            line = line.strip('\n').split(' ')
            items = [int(i) for i in line[1:]]
            uid = [int(line[0])] * len(items)
            test_interaction.extend(list(zip(uid,items)))

In [None]:
# load interaction
train_interaction = pd.DataFrame(train_interaction, columns=["userId", "itemId"])
test_interaction = pd.DataFrame(test_interaction, columns=["userId", "itemId"])
print("Number of training data:", train_interaction.shape[0])
print("Number of testing data:", test_interaction.shape[0])

# create user mapping
user_mapping = {v: i for i, v in enumerate(
    train_interaction["userId"].unique())}
item_mapping = {v: i for i, v in enumerate(
    train_interaction["itemId"].unique())}

# filter unseen testing data
condition = (test_interaction["userId"].isin(user_mapping)) & (
    test_interaction["itemId"].isin(item_mapping))
test_interaction = test_interaction[condition]
print(f"After filtering, there are {test_interaction.shape[0]} data remains")

Number of training data: 810128
Number of testing data: 217242
After filtering, there are 217242 data remains


In [None]:
# relabel user & item index
train_interaction["userId"] = train_interaction["userId"].apply(
    user_mapping.get)
train_interaction["itemId"] = train_interaction["itemId"].apply(
    item_mapping.get)
test_interaction["userId"] = test_interaction["userId"].apply(user_mapping.get)
test_interaction["itemId"] = test_interaction["itemId"].apply(item_mapping.get)
assert train_interaction.isnull().sum().any() == 0
assert test_interaction.isnull().sum().any() == 0

In [None]:
def get_user_positive_items(interactions):
    """Generates dictionary of positive items for each user

    Args:
        interactions (torch.Tensor): N by 2 list of interaction

    Returns:
        dict: dictionary of positive items for each user
    """
    user_pos_items = {}
    for i in range(interactions.shape[0]):
        user = interactions[i][0].item()
        item = interactions[i][1].item()
        if user not in user_pos_items:
            user_pos_items[user] = []
        user_pos_items[user].append(item)
    return user_pos_items

In [None]:
num_users, num_items = len(user_mapping), len(item_mapping)
train_interaction = train_interaction.values
test_interaction = test_interaction.values

# get interacted item list
train_user_record = get_user_positive_items(train_interaction)
test_user_record = get_user_positive_items(train_interaction)

In [None]:
train_edge_index = torch.tensor(train_interaction).T
test_edge_index = torch.tensor(test_interaction).T

train_edge_index[1] = train_edge_index[1] + num_users
test_edge_index[1] = test_edge_index[1] + num_users
print(f"Number of users:{num_users} | Number of items:{num_items}")

# transform to undirected
train_edge_index = to_undirected(train_edge_index)
test_edge_index = to_undirected(test_edge_index)
assert train_edge_index.shape[0] == 2
assert test_edge_index.shape[0] == 2

Number of users:29858 | Number of items:40981


## Tools for evaluation

In [None]:
def RecallPrecision_ATk(groundTruth, r, k):
    """Computers recall @ k and precision @ k

    Args:
        groundTruth (list): list of lists containing highly rated items of each user
        r (list): list of lists indicating whether each top k item recommended to each user
            is a top k ground truth item or not
        k (intg): determines the top k items to compute precision and recall on

    Returns:
        tuple: recall @ k, precision @ k
    """
    num_correct_pred = torch.sum(
        r, dim=-1)  # number of correctly predicted items per user
    # number of items liked by each user in the test set
    user_num_liked = torch.Tensor([len(groundTruth[i])
                                  for i in range(len(groundTruth))])
    recall = torch.mean(num_correct_pred / user_num_liked)
    precision = torch.mean(num_correct_pred) / k
    return recall.item(), precision.item()


def NDCGatK_r(groundTruth, r, k):
    """Computes Normalized Discounted Cumulative Gain (NDCG) @ k

    Args:
        groundTruth (list): list of lists containing highly rated items of each user
        r (list): list of lists indicating whether each top k item recommended to each user
            is a top k ground truth item or not
        k (int): determines the top k items to compute ndcg on

    Returns:
        float: ndcg @ k
    """
    assert len(r) == len(groundTruth)

    test_matrix = torch.zeros((len(r), k))

    for i, items in enumerate(groundTruth):
        length = min(len(items), k)
        test_matrix[i, :length] = 1
    max_r = test_matrix
    idcg = torch.sum(max_r * 1. / torch.log2(torch.arange(2, k + 2)), axis=1)
    dcg = r * (1. / torch.log2(torch.arange(2, k + 2)))
    dcg = torch.sum(dcg, axis=1)
    idcg[idcg == 0.] = 1.
    ndcg = dcg / idcg
    ndcg[torch.isnan(ndcg)] = 0.
    return torch.mean(ndcg).item()


def get_metrics(predictions, interactions, k):
    """
    Computes the evaluation metrics: recall, precision, and ndcg @ k
    Args:
        predictions (np.array): a sorted array with size (num_user,k)
                                where predictions[i,j] denotes the j-th item user i might like
        interactions (list): list of lists indicating whether each top k item recommended to each user
            is a top k ground truth item or not
        k (int): determines the top k items to compute ndcg on
    """
    top_K_items = torch.tensor(predictions)
    interactions = torch.tensor(interactions)

    # get all unique users in evaluated split
    users = interactions[:, 0].unique()

    test_user_pos_items = get_user_positive_items(interactions)

    # convert test user pos items dictionary into a list
    test_user_pos_items_list = [
        test_user_pos_items[user.item()] for user in users]

    # determine the correctness of topk predictions
    r = []
    for user in users:
        ground_truth_items = test_user_pos_items[user.item()]
        label = list(map(lambda x: x in ground_truth_items, top_K_items[user]))
        r.append(label)
    r = torch.Tensor(np.array(r).astype('float'))

    recall, precision = RecallPrecision_ATk(test_user_pos_items_list, r, k)
    ndcg = NDCGatK_r(test_user_pos_items_list, r, k)

    return recall, precision, ndcg

In [None]:
# generate a random prediction
K = 10
predictions = np.array([list(range(K))] * num_users) # n_user * K 
recall, precision, ndcg = get_metrics(predictions,test_interaction,K)
print(f"[Popolarity] Recall@{K}: {recall:.4f}, Precision@{K}: {precision:.4f}, NDCG@{K}: {ndcg:.4f}")

[Popolarity] Recall@10: 0.0001, Precision@10: 0.0001, NDCG@10: 0.0001


## Dataloader

In [None]:
class TripletUniformPair(Dataset):
    def __init__(self, num_item, user_list, pair):
        self.num_item = num_item
        self.user_list = user_list
        self.pair = pair

    def __getitem__(self, idx):
        #idx = np.random.randint(len(self.pair))
        u = self.pair[idx][0]
        i = self.pair[idx][1]
        j = np.random.randint(self.num_item)
        while j in self.user_list[u]:
            j = np.random.randint(self.num_item)
        return u, i, j

    def __len__(self):
        return len(self.pair)

## Loss function: BPR

In [None]:
def bpr_loss(users_emb_final, pos_items_emb_final, neg_items_emb_final):
    """Bayesian Personalized Ranking Loss as described in https://arxiv.org/abs/1205.2618

    Args:
        users_emb_final (torch.Tensor): e_u_k
        pos_items_emb_final (torch.Tensor): positive e_i_k
        neg_items_emb_final (torch.Tensor): negative e_i_k

    Returns:
        torch.Tensor: scalar bpr loss value
    """

    pos_scores = torch.mul(users_emb_final, pos_items_emb_final)
    # predicted scores of positive samples
    pos_scores = torch.sum(pos_scores, dim=-1)
    neg_scores = torch.mul(users_emb_final, neg_items_emb_final)
    # predicted scores of negative samples
    neg_scores = torch.sum(neg_scores, dim=-1)
    loss = -torch.mean(torch.log(10e-8 + torch.sigmoid(pos_scores - neg_scores)))

    return loss

## Problem1: Implementing NGCF

### High-order Propagation in NGCF
Between each layer, NGCF uses the following propagation rule for user and item embeddings.

\begin{equation}
e_u^{(k)} = LeakyReLU \left(  m^{(k)}_{u \leftarrow u}  + \sum_{i\in N_u}   m^{(k)}_{u \leftarrow i} \right)\\
m^{(k)}_{u \leftarrow u} = W_1^{(k)}e_u^{(k-1)},\\
m^{(k)}_{u \leftarrow i} = \frac{1}{\sqrt{|N_u||N_i|}} \left( W_1^{(k)}e_i^{(k-1)} + W_2^{(k)} \left( e_i^{(k-1)} \odot e_u^{(k-1)} \right)  \right)
\end{equation}

$N_u$: the set of all neighbors of user $u$ (items liked by $u$)

$N_i$: the set of all neighbors of item $i$ (users who liked $i$)

$e_u^{(k)}$ : k-th layer user embedding

$e_i^{(k)}$ : k-th layer item embedding



### Layer Combination and Model Prediction
We combine the embeddings obtained at each layer of propagation to form the final embeddings for all user and item, $e_u$ and $e_i$ via the follwing equation.


\begin{equation}
e_u = e_u^{(0)} || \cdots || e_u^{(K)} \quad e_i = e_i^{(0)} || \cdots || e_i^{(K)}
\end{equation}

$||$ : denotes the concatenate operation

The model prediction is obtained by taking the inner product of the final user and item embeddings.

\begin{equation}
\hat{y}_{ui} = e_u^Te_i
\end{equation}



In [None]:
from torch_geometric.nn import MessagePassing
from torch import Tensor
from torch_geometric.nn.conv.gcn_conv import gcn_norm
from torch_sparse import SparseTensor, matmul

In [None]:
class NGCF(MessagePassing):
    def __init__(self, num_users, num_items, embedding_dim=8, n_layers=3, add_self_loops=False):
        """Initializes NGCF Model

        Args:
            num_users (int): Number of users
            num_items (int): Number of items
            embedding_dim (int, optional): Dimensionality of embeddings. Defaults to 8.
            K (int, optional): Number of message passing layers. Defaults to 3.
            add_self_loops (bool, optional): Whether to add self loops for message passing. Defaults to False.
        """
        super().__init__()
        self.num_users, self.num_items = num_users, num_items
        self.embedding_dim, self.n_layers = embedding_dim, n_layers
        self.add_self_loops = add_self_loops

        self.users_emb = nn.Embedding(
            num_embeddings=self.num_users, embedding_dim=self.embedding_dim)  # e_u^0
        self.items_emb = nn.Embedding(
            num_embeddings=self.num_items, embedding_dim=self.embedding_dim)  # e_i^0
        self.W1_list = nn.ModuleList(
            [nn.Linear(self.embedding_dim, self.embedding_dim, bias=False) for _ in range(n_layers)])
        self.W2_list = nn.ModuleList(
            [nn.Linear(self.embedding_dim, self.embedding_dim, bias=False) for _ in range(n_layers)])

    def forward(self, edge_index):
        """Forward propagation of NGCF Model.

        Args:
            edge_index (SparseTensor): adjacency matrix

        Returns:
            tuple (Tensor): e_u_k, e_i_k
        """
        edge_index, norm = gcn_norm(
            edge_index, add_self_loops=self.add_self_loops)

        emb_0 = torch.cat(
            [self.users_emb.weight, self.items_emb.weight])
        embs = [emb_0]
        emb_k = emb_0

        # multi-scale diffusion
        for i in range(self.n_layers):
            emb_k = self.propagate(edge_index, x=emb_k,
                                   K=i, norm=norm) + self.W1_list[i](emb_k)
            emb_k = F.leaky_relu(emb_k)
            embs.append(emb_k)

        emb_final = torch.cat(embs, dim=1)

        users_emb_final, items_emb_final = torch.split(
            emb_final, [self.num_users, self.num_items])

        return users_emb_final, items_emb_final

    def message(self, x_i: Tensor, x_j: Tensor, K: int, norm: Tensor) -> Tensor:
        return norm.view(-1, 1) * (self.W1_list[K](x_j) + self.W2_list[K](x_i * x_j))

In [None]:
def get_predictions(user_embedding, item_embedding, exclude_user_list, k):
    """Computes the evaluation metrics: recall, precision, and ndcg @ k

    Args:
        model (LighGCN): lightgcn model
        edge_index (torch.Tensor): 2 by N list of edges for split to evaluate
        exclude_edge_indices ([type]): 2 by N list of edges for split to discount from evaluation
        k (int): determines the top k items to compute metrics on

    Returns:
        tuple: recall @ k, precision @ k, ndcg @ k
    """

    # get ratings between every user and item - shape is num users x num movies
    rating = torch.matmul(user_embedding.cpu(), item_embedding.cpu().T)

    for user_pos_items in exclude_user_list:
        # gets all the positive items for each user from the edge index
        # get coordinates of all edges to exclude
        exclude_users = []
        exclude_items = []
        for user, items in user_pos_items.items():
            exclude_users.extend([user] * len(items))
            exclude_items.extend(items)

        # set ratings of excluded edges to large negative value
        rating[exclude_users, exclude_items] = -10e8

    # get the top k recommended items for each user
    _, top_K_items = torch.topk(rating, k=k)

    return top_K_items.numpy()

# Training

Your test set performance should be in line with the following (*K=20*):

In [None]:
def l2_reg_loss(reg, *args):
    emb_loss = 0
    for emb in args:
        emb_loss += torch.norm(emb, p=2)
    return emb_loss * reg

In [None]:
train_dataset = TripletUniformPair(
    num_item=num_items, user_list=train_user_record, pair=train_interaction)
train_loader = DataLoader(train_dataset, batch_size=2048, pin_memory=True,
                          num_workers=4, )

  cpuset_checked))


In [None]:
# training configs
device = "cuda"
EPOCHS = 10
K = 20
L2_reg = 1e-6
print(f"Using device {device}.")

model = NGCF(num_users, num_items,embedding_dim=32, n_layers=2)
model = model.to(device)
model.train()

# initialize parameters
for p in model.parameters():
    if p.dim() > 1:
        nn.init.xavier_uniform_(p)

optimizer = optim.Adam(model.parameters(), lr=1e-3)
train_edge_index = train_edge_index.to(device)

Using device cuda.


In [None]:
for epoch in trange(EPOCHS):
    running_loss = []
    for user_indices, pos_item_indices, neg_item_indices in train_loader:
        # forward propagation
        optimizer.zero_grad()
        users_emb_final, items_emb_final = model.forward(train_edge_index)

        # mini batching
        user_indices, pos_item_indices, neg_item_indices = user_indices.to(
            device), pos_item_indices.to(device), neg_item_indices.to(device)
        users_emb_final = users_emb_final[user_indices]
        pos_items_emb_final = items_emb_final[pos_item_indices]
        neg_items_emb_final = items_emb_final[neg_item_indices]

        # loss computation
        reg_loss = l2_reg_loss(L2_reg, users_emb_final, pos_items_emb_final)
        loss = bpr_loss(users_emb_final, pos_items_emb_final,neg_items_emb_final) + reg_loss

        loss.backward()
        optimizer.step()
        running_loss.append(loss.item())

    # evaluation
    running_loss = np.mean(running_loss)

    if (epoch + 1) % 1 == 0:
        users_emb_final, items_emb_final = model.forward(train_edge_index)
        predictions = get_predictions(
            users_emb_final, items_emb_final, exclude_user_list=[train_user_record], k=K)
        recall, precision, ndcg = get_metrics(
            predictions, test_interaction, k=K)
        print(f"Test_recall@{K}: {recall:.4f}, Test_precision@{K}: {precision:.4f}, Test_ndcg@{K}: {ndcg:.4f}")


  0%|          | 0/10 [00:00<?, ?it/s]

Test_recall@20: 0.0741, Test_precision@20: 0.0251, Test_ndcg@20: 0.0649
Test_recall@20: 0.0590, Test_precision@20: 0.0206, Test_ndcg@20: 0.0512
Test_recall@20: 0.0705, Test_precision@20: 0.0243, Test_ndcg@20: 0.0619
Test_recall@20: 0.0778, Test_precision@20: 0.0264, Test_ndcg@20: 0.0672
Test_recall@20: 0.0833, Test_precision@20: 0.0279, Test_ndcg@20: 0.0708
Test_recall@20: 0.0866, Test_precision@20: 0.0289, Test_ndcg@20: 0.0718
Test_recall@20: 0.0887, Test_precision@20: 0.0295, Test_ndcg@20: 0.0748
Test_recall@20: 0.0890, Test_precision@20: 0.0296, Test_ndcg@20: 0.0742
Test_recall@20: 0.0914, Test_precision@20: 0.0301, Test_ndcg@20: 0.0751
Test_recall@20: 0.0942, Test_precision@20: 0.0310, Test_ndcg@20: 0.0792


## Problem2: Implementing LightGCN

### Light Graph Convolution
Between each layer, LightGCN uses the following propagation rule for user and item embeddings.

\begin{equation}
e_u^{(k+1)} = \sum_{i \in N_u} \frac{1}{\sqrt{|N_u|}\sqrt{|N_i|}} e_i^{(k)} \quad e_i^{(k+1)} = \sum_{u \in N_i} \frac{1}{\sqrt{|N_i|}\sqrt{|N_u|}} e_u^{(k)}
\end{equation}

$N_u$: the set of all neighbors of user $u$ (items liked by $u$)

$N_i$: the set of all neighbors of item $i$ (users who liked $i$)

$e_u^{(k)}$ : k-th layer user embedding

$e_i^{(k)}$ : k-th layer item embedding



### Layer Combination and Model Prediction
The only trainable parameters of LightGCN are the 0-th layer embeddings $e_u^{(0)}$ and $e_i^{(0)}$ for each user and item. We combine the embeddings obtained at each layer of propagation to form the final embeddings for all user and item, $e_u$ and $e_i$ via the follwing equation.


\begin{equation}
e_u = \sum_{k = 0}^K \alpha_k e_u^{(k)} \quad e_i = \sum_{k = 0}^K \alpha_k e_i^{(k)}
\end{equation}

$\alpha_k$ : hyperparameter which weights the contribution of the k-th layer embedding to the final embedding

The model prediction is obtained by taking the inner product of the final user and item embeddings.

\begin{equation}
\hat{y}_{ui} = e_u^Te_i
\end{equation}


In [None]:
class LightGCN(MessagePassing):
    """LightGCN Model as proposed in https://arxiv.org/abs/2002.02126
    """

    def __init__(self, num_users, num_items, embedding_dim, n_layers, add_self_loops=False):
        """Initializes LightGCN Model

        Args:
            num_users (int): Number of users
            num_items (int): Number of items
            embedding_dim (int, optional): Dimensionality of embeddings. Defaults to 8.
            K (int, optional): Number of message passing layers. Defaults to 3.
            add_self_loops (bool, optional): Whether to add self loops for message passing. Defaults to False.
        """
        super().__init__()
        self.num_users, self.num_items = num_users, num_items
        self.embedding_dim, self.n_layers = embedding_dim, n_layers
        self.add_self_loops = add_self_loops

        self.users_emb = nn.Embedding(
            num_embeddings=self.num_users, embedding_dim=self.embedding_dim)  # e_u^0
        self.items_emb = nn.Embedding(
            num_embeddings=self.num_items, embedding_dim=self.embedding_dim)  # e_i^0

    def forward(self, edge_index: Tensor):
        edge_index, norm = gcn_norm(edge_index, add_self_loops=self.add_self_loops)

        emb_0 = torch.cat(
            [self.users_emb.weight, self.items_emb.weight])  # E^0
        embs = [emb_0]
        emb_k = emb_0

        # multi-scale diffusion
        for i in range(self.n_layers):
            emb_k = self.propagate(edge_index, x=emb_k,norm=norm)
            
            embs.append(emb_k)

        embs = torch.stack(embs, dim=1)
        emb_final = torch.mean(embs, dim=1)  # E^K

        users_emb_final, items_emb_final = torch.split(
            emb_final, [self.num_users, self.num_items])  # splits into e_u^K and e_i^K
        return users_emb_final, items_emb_final

    def message(self, x_j: Tensor, norm: Tensor) -> Tensor:

        return norm.view(-1,1) * x_j

## Training!
Let's see if your LightGCN outperforms NGCF!

In [None]:
# training configs
device = "cuda"
EPOCHS = 30
K = 20
L2_reg = 1e-6
print(f"Using device {device}.")

model = LightGCN(num_users, num_items,embedding_dim=32, n_layers=2)
model = model.to(device)
model.train()

# initialize parameters
for p in model.parameters():
    if p.dim() > 1:
        nn.init.xavier_uniform_(p)

optimizer = optim.Adam(model.parameters(), lr=1e-3)
train_edge_index = train_edge_index.to(device)

Using device cuda.


In [None]:
for epoch in trange(EPOCHS):
    running_loss = []
    for user_indices, pos_item_indices, neg_item_indices in train_loader:
        # forward propagation
        optimizer.zero_grad()
        users_emb_final, items_emb_final = model.forward(train_edge_index)

        # mini batching
        user_indices, pos_item_indices, neg_item_indices = user_indices.to(
            device), pos_item_indices.to(device), neg_item_indices.to(device)
        users_emb_final = users_emb_final[user_indices]
        pos_items_emb_final = items_emb_final[pos_item_indices]
        neg_items_emb_final = items_emb_final[neg_item_indices]

        # loss computation
        reg_loss  = l2_reg_loss(L2_reg, users_emb_final, pos_items_emb_final)
        loss = bpr_loss(users_emb_final, pos_items_emb_final,neg_items_emb_final) + reg_loss

        loss.backward()
        optimizer.step()
        running_loss.append(loss.item())

    # evaluation
    running_loss = np.mean(running_loss)

    if (epoch + 1) % 1 == 0:
        users_emb_final, items_emb_final = model.forward(train_edge_index)
        predictions = get_predictions(
            users_emb_final, items_emb_final, exclude_user_list=[train_user_record], k=K)
        recall, precision, ndcg = get_metrics(
            predictions, test_interaction, k=K)
        print(f"Test_recall@{K}: {recall:.4f}, Test_precision@{K}: {precision:.4f}, Test_ndcg@{K}: {ndcg:.4f}")


  0%|          | 0/30 [00:00<?, ?it/s]

Test_recall@20: 0.0668, Test_precision@20: 0.0227, Test_ndcg@20: 0.0576
Test_recall@20: 0.0709, Test_precision@20: 0.0240, Test_ndcg@20: 0.0611
Test_recall@20: 0.0739, Test_precision@20: 0.0248, Test_ndcg@20: 0.0640
Test_recall@20: 0.0769, Test_precision@20: 0.0257, Test_ndcg@20: 0.0667
Test_recall@20: 0.0800, Test_precision@20: 0.0266, Test_ndcg@20: 0.0694
Test_recall@20: 0.0820, Test_precision@20: 0.0272, Test_ndcg@20: 0.0712
Test_recall@20: 0.0847, Test_precision@20: 0.0280, Test_ndcg@20: 0.0734
Test_recall@20: 0.0862, Test_precision@20: 0.0284, Test_ndcg@20: 0.0752
Test_recall@20: 0.0875, Test_precision@20: 0.0288, Test_ndcg@20: 0.0765
Test_recall@20: 0.0891, Test_precision@20: 0.0292, Test_ndcg@20: 0.0778
Test_recall@20: 0.0902, Test_precision@20: 0.0295, Test_ndcg@20: 0.0789
Test_recall@20: 0.0915, Test_precision@20: 0.0299, Test_ndcg@20: 0.0800
Test_recall@20: 0.0927, Test_precision@20: 0.0303, Test_ndcg@20: 0.0811
Test_recall@20: 0.0938, Test_precision@20: 0.0306, Test_ndcg@20:

In [None]:
def InfoNCE(view1, view2, temperature):
    view1, view2 = F.normalize(view1, dim=1), F.normalize(view2, dim=1)
    pos_score = (view1 * view2).sum(dim=-1)
    pos_score = torch.exp(pos_score / temperature)
    ttl_score = torch.matmul(view1, view2.transpose(0, 1))
    ttl_score = torch.exp(ttl_score / temperature).sum(dim=1)
    cl_loss = -torch.log(pos_score / ttl_score)
    
    return torch.mean(cl_loss)

class SimGCL(MessagePassing):
    def __init__(self, num_users, num_items, embedding_dim, n_layers, add_self_loops=False, eps=0.2):
        """Initializes SimGCL Model

        Args:
            num_users (int): Number of users
            num_items (int): Number of items
            embedding_dim (int, optional): Dimensionality of embeddings. Defaults to 8.
            K (int, optional): Number of message passing layers. Defaults to 3.
            add_self_loops (bool, optional): Whether to add self loops for message passing. Defaults to False.
            eps: parameter of perterbation
        """
        super().__init__()
        self.num_users, self.num_items = num_users, num_items
        self.embedding_dim, self.n_layers = embedding_dim, n_layers
        self.add_self_loops = add_self_loops
        self.eps = eps

        self.users_emb = nn.Embedding(
            num_embeddings=self.num_users, embedding_dim=self.embedding_dim)  # e_u^0
        self.items_emb = nn.Embedding(
            num_embeddings=self.num_items, embedding_dim=self.embedding_dim)  # e_i^0

    def forward(self, edge_index: Tensor, perturbed=False):
        edge_index, norm = gcn_norm(edge_index, add_self_loops=self.add_self_loops)

        emb_0 = torch.cat(
            [self.users_emb.weight, self.items_emb.weight])  # E^0
        embs = []
        emb_k = emb_0

        # multi-scale diffusion
        for i in range(self.n_layers):
            emb_k = self.propagate(edge_index, x=emb_k,norm=norm)
            if perturbed:
                random_noise = torch.rand_like(emb_k).to(emb_k.device)
                emb_k += torch.sign(emb_k) * F.normalize(random_noise, dim=-1) * self.eps
            embs.append(emb_k)

        embs = torch.stack(embs, dim=1)
        emb_final = torch.mean(embs, dim=1)  # E^K

        users_emb_final, items_emb_final = torch.split(
            emb_final, [self.num_users, self.num_items])  # splits into e_u^K and e_i^K
        return users_emb_final, items_emb_final

    def message(self, x_j: Tensor, norm: Tensor) -> Tensor:

        return norm.view(-1,1) * x_j
    
    def cal_cl_loss(self, raw_sparse_edges, idx):
        # idx: user-item pair
        # [[uid1,uid2,uid3...],[iid1,iid2,....]]
        device = idx[0].device
        u_idx = torch.unique(idx[0]).to(device) #[3,4,5,4] -> [3,4,5]
        i_idx = torch.unique(idx[1]).to(device)
        user_view_1, item_view_1 = self.forward(raw_sparse_edges,perturbed=True)
        user_view_2, item_view_2 = self.forward(raw_sparse_edges,perturbed=True)
        user_cl_loss = InfoNCE(user_view_1[u_idx], user_view_2[u_idx], 0.2)
        item_cl_loss = InfoNCE(item_view_1[i_idx], item_view_2[i_idx], 0.2)
        return user_cl_loss + item_cl_loss

In [None]:
# training configs
device = "cuda"
EPOCHS = 30
K = 20
lmbda = 0.2
L2_reg = 1e-4
print(f"Using device {device}.")

model = SimGCL(num_users, num_items,embedding_dim=64, n_layers=2)
model = model.to(device)
model.train()

# initialize parameters
for p in model.parameters():
    if p.dim() > 1:
        nn.init.xavier_uniform_(p)

optimizer = optim.Adam(model.parameters(), lr=1e-3)
train_edge_index = train_edge_index.to(device)

Using device cuda.


In [None]:
for epoch in trange(EPOCHS):
    running_loss = []
    for user_indices, pos_item_indices, neg_item_indices in train_loader:
        # forward propagation
        optimizer.zero_grad()
        user_embeddings, item_embeddings = model(train_edge_index,False)

        # mini batching
        user_indices, pos_item_indices, neg_item_indices = user_indices.to(
            device), pos_item_indices.to(device), neg_item_indices.to(device)
        users_emb_final = user_embeddings[user_indices]
        pos_items_emb_final = item_embeddings[pos_item_indices]
        neg_items_emb_final = item_embeddings[neg_item_indices]

        # loss computation
        train_bpr_loss = bpr_loss(users_emb_final, pos_items_emb_final,
                                  neg_items_emb_final)

        infonce_loss = lmbda * model.cal_cl_loss(train_edge_index, [user_indices,pos_item_indices])
        reg_loss  = l2_reg_loss(L2_reg, users_emb_final, pos_items_emb_final)
        loss = train_bpr_loss + infonce_loss + reg_loss

        loss.backward()
        optimizer.step()
        running_loss.append(loss.item())

    # evaluation
    running_loss = np.mean(running_loss)

    if (epoch + 1) % 1 == 0:
        users_emb_final, items_emb_final = model(train_edge_index)
        predictions = get_predictions(
            users_emb_final, items_emb_final, exclude_user_list=[train_user_record], k=K)
        recall, precision, ndcg = get_metrics(
            predictions, test_interaction, k=K)
        print(f"Test_recall@{K}: {recall:.4f}, Test_precision@{K}: {precision:.4f}, Test_ndcg@{K}: {ndcg:.4f}")


  0%|          | 0/30 [00:00<?, ?it/s]

Test_recall@20: 0.1509, Test_precision@20: 0.0460, Test_ndcg@20: 0.1258
Test_recall@20: 0.1392, Test_precision@20: 0.0416, Test_ndcg@20: 0.1156
Test_recall@20: 0.1254, Test_precision@20: 0.0370, Test_ndcg@20: 0.1041
Test_recall@20: 0.1234, Test_precision@20: 0.0366, Test_ndcg@20: 0.1033
Test_recall@20: 0.1317, Test_precision@20: 0.0396, Test_ndcg@20: 0.1111
Test_recall@20: 0.1423, Test_precision@20: 0.0436, Test_ndcg@20: 0.1201
Test_recall@20: 0.1526, Test_precision@20: 0.0474, Test_ndcg@20: 0.1295
Test_recall@20: 0.1600, Test_precision@20: 0.0500, Test_ndcg@20: 0.1364
Test_recall@20: 0.1638, Test_precision@20: 0.0515, Test_ndcg@20: 0.1406
Test_recall@20: 0.1667, Test_precision@20: 0.0524, Test_ndcg@20: 0.1433
Test_recall@20: 0.1688, Test_precision@20: 0.0530, Test_ndcg@20: 0.1450
Test_recall@20: 0.1706, Test_precision@20: 0.0536, Test_ndcg@20: 0.1464
Test_recall@20: 0.1719, Test_precision@20: 0.0540, Test_ndcg@20: 0.1475
Test_recall@20: 0.1734, Test_precision@20: 0.0545, Test_ndcg@20: