In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys
from loguru import logger

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.sparse as sparse
import torch.optim as optim

import pandas as pd

sys.path.insert(0, '..')

# Implement

In [3]:
from src.train_utils import mse_loss, train
from src.model import LightGCN

In [4]:
# device = (
#     "cuda"
#     if torch.cuda.is_available()
#     else "mps"
#     if torch.backends.mps.is_available()
#     else "cpu"
# )
device = 'cpu'
logger.info(f"Using {device} device")

[32m2024-09-07 23:49:53.934[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m9[0m - [1mUsing cpu device[0m


# Test implementation

In [5]:
# Mock data
user_ids = [0, 0, 1, 2, 2]
item_ids = [0, 1, 2, 3, 1]
interaction_scores = [1, 4, 5, 3, 2]
n_users = len(set(user_ids))
n_items = len(set(item_ids))

val_user_ids = [0, 1, 2]
val_item_ids = [2, 1, 2]
val_interaction_scores = [2, 4, 5]

model = LightGCN(embedding_dim=64, n_layers=3, user_ids=user_ids, item_ids=item_ids, interaction_scores=interaction_scores, device=device)

# Example forward pass
users = torch.tensor([0, 1, 2])
items = torch.tensor([0, 1, 2])
predictions = model.predict(users, items)
print(predictions)

tensor([ 7.3578e-03,  5.6214e-03, -1.4180e-05], grad_fn=<SumBackward1>)


In [6]:
import random
import numpy as np
from torch.utils.data import Dataset, DataLoader

# Fixing random seed for reproducibility
random.seed(42)
np.random.seed(42)

embedding_dim = 8
n_layers = 3
batch_size = 4

# Display mock dataset
print("Mock User IDs:", user_ids)
print("Mock Item IDs:", item_ids)
print("Interaction Scores:", interaction_scores)

class RatingDataset(Dataset):
    def __init__(self, user_ids, item_ids, ratings):
        """
        Args:
            user_ids (list or array): List of user indices.
            item_ids (list or array): List of item indices.
            ratings (list or array): List of corresponding ratings.
        """
        self.user_ids = user_ids
        self.item_ids = item_ids
        self.ratings = ratings
    
    def __len__(self):
        return len(self.user_ids)
    
    def __getitem__(self, idx):
        user = self.user_ids[idx]
        item = self.item_ids[idx]
        rating = self.ratings[idx]
        return user, item, rating

rating_dataset = RatingDataset(user_ids, item_ids, interaction_scores)
dataloader = DataLoader(rating_dataset, batch_size=batch_size, shuffle=True)

val_rating_dataset = RatingDataset(val_user_ids, val_item_ids, val_interaction_scores)
val_dataloader = DataLoader(val_rating_dataset, batch_size=batch_size, shuffle=True)

# Instantiate LightGCN model
model = LightGCN(embedding_dim=embedding_dim, n_layers=n_layers,
                 user_ids=user_ids, item_ids=item_ids, interaction_scores=interaction_scores, device=device)

# Training loop
n_epochs = 50

train(model, dataloader, val_dataloader, epochs=n_epochs, patience=2, print_steps=1, lr=0.001, device=device, progress_bar_type='tqdm_notebook')

Mock User IDs: [0, 0, 1, 2, 2]
Mock Item IDs: [0, 1, 2, 3, 1]
Interaction Scores: [1, 4, 5, 3, 2]




Epochs:   0%|          | 0/50 [00:00<?, ?it/s]

Training Epoch 1:   0%|          | 0/2 [00:00<?, ?it/s]

[32m2024-09-07 23:49:54.576[0m | [1mINFO    [0m | [36msrc.train_utils[0m:[36mlog_gradients[0m:[36m56[0m - [1mStep 1, Gradient Norm for user_embedding.weight: 0.062471[0m
[32m2024-09-07 23:49:54.577[0m | [1mINFO    [0m | [36msrc.train_utils[0m:[36mlog_gradients[0m:[36m56[0m - [1mStep 1, Gradient Norm for item_embedding.weight: 0.065761[0m
[32m2024-09-07 23:49:54.577[0m | [1mINFO    [0m | [36msrc.train_utils[0m:[36mlog_gradients[0m:[36m59[0m - [1mStep 1, Total Gradient Norm: 0.090704[0m
[32m2024-09-07 23:49:54.578[0m | [1mINFO    [0m | [36msrc.train_utils[0m:[36mtrain[0m:[36m157[0m - [1mStep 1, Learning Rate: 0.001000[0m
[32m2024-09-07 23:49:54.578[0m | [1mINFO    [0m | [36msrc.train_utils[0m:[36mtrain[0m:[36m159[0m - [1mStep 1, Global Loss: 11.5081[0m
[32m2024-09-07 23:49:54.579[0m | [1mINFO    [0m | [36msrc.train_utils[0m:[36mlog_gradients[0m:[36m56[0m - [1mStep 2, Gradient Norm for user_embedding.weight: 0.074053[

Training Epoch 2:   0%|          | 0/2 [00:00<?, ?it/s]

[32m2024-09-07 23:49:54.587[0m | [1mINFO    [0m | [36msrc.train_utils[0m:[36mlog_gradients[0m:[36m56[0m - [1mStep 3, Gradient Norm for user_embedding.weight: 0.068096[0m
[32m2024-09-07 23:49:54.587[0m | [1mINFO    [0m | [36msrc.train_utils[0m:[36mlog_gradients[0m:[36m56[0m - [1mStep 3, Gradient Norm for item_embedding.weight: 0.069900[0m
[32m2024-09-07 23:49:54.588[0m | [1mINFO    [0m | [36msrc.train_utils[0m:[36mlog_gradients[0m:[36m59[0m - [1mStep 3, Total Gradient Norm: 0.097586[0m
[32m2024-09-07 23:49:54.588[0m | [1mINFO    [0m | [36msrc.train_utils[0m:[36mtrain[0m:[36m157[0m - [1mStep 3, Learning Rate: 0.001000[0m
[32m2024-09-07 23:49:54.588[0m | [1mINFO    [0m | [36msrc.train_utils[0m:[36mtrain[0m:[36m159[0m - [1mStep 3, Global Loss: 13.5060[0m
[32m2024-09-07 23:49:54.590[0m | [1mINFO    [0m | [36msrc.train_utils[0m:[36mlog_gradients[0m:[36m56[0m - [1mStep 4, Gradient Norm for user_embedding.weight: 0.020492[

Training Epoch 3:   0%|          | 0/2 [00:00<?, ?it/s]

[32m2024-09-07 23:49:54.596[0m | [1mINFO    [0m | [36msrc.train_utils[0m:[36mlog_gradients[0m:[36m56[0m - [1mStep 5, Gradient Norm for user_embedding.weight: 0.059056[0m
[32m2024-09-07 23:49:54.597[0m | [1mINFO    [0m | [36msrc.train_utils[0m:[36mlog_gradients[0m:[36m56[0m - [1mStep 5, Gradient Norm for item_embedding.weight: 0.065333[0m
[32m2024-09-07 23:49:54.597[0m | [1mINFO    [0m | [36msrc.train_utils[0m:[36mlog_gradients[0m:[36m59[0m - [1mStep 5, Total Gradient Norm: 0.088068[0m
[32m2024-09-07 23:49:54.597[0m | [1mINFO    [0m | [36msrc.train_utils[0m:[36mtrain[0m:[36m157[0m - [1mStep 5, Learning Rate: 0.001000[0m
[32m2024-09-07 23:49:54.597[0m | [1mINFO    [0m | [36msrc.train_utils[0m:[36mtrain[0m:[36m159[0m - [1mStep 5, Global Loss: 7.5072[0m
[32m2024-09-07 23:49:54.599[0m | [1mINFO    [0m | [36msrc.train_utils[0m:[36mlog_gradients[0m:[36m56[0m - [1mStep 6, Gradient Norm for user_embedding.weight: 0.138600[0

# Prep data

In [7]:
train_df = pd.read_parquet("../data/train.parquet")
val_df = pd.read_parquet("../data/val.parquet")

In [8]:
from src.id_mapper import IDMapper

In [9]:
user_ids = train_df['user_id'].values
item_ids = train_df['parent_asin'].values
unique_user_ids = list(set(user_ids))
unique_item_ids = list(set(item_ids))

logger.info(f"{len(unique_user_ids)=:,.0f}, {len(unique_item_ids)=:,.0f}")

[32m2024-09-07 23:49:54.687[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m6[0m - [1mlen(unique_user_ids)=5,223, len(unique_item_ids)=2,653[0m


In [10]:
idm = IDMapper()
idm.fit(unique_user_ids, unique_item_ids)

In [11]:
user_indices = [idm.get_user_index(user_id) for user_id in user_ids]
item_indices = [idm.get_item_index(item_id) for item_id in item_ids]
ratings = train_df['rating'].values.tolist()

In [12]:
val_user_indices = [idm.get_user_index(user_id) for user_id in val_df['user_id']]
val_item_indices = [idm.get_item_index(item_id) for item_id in val_df['parent_asin']]
val_ratings = val_df['rating'].values.tolist()

# Train

In [13]:
rating_dataset = RatingDataset(user_indices, item_indices, ratings)
dataloader = DataLoader(rating_dataset, batch_size=batch_size, shuffle=True)

val_rating_dataset = RatingDataset(val_user_indices, val_item_indices, val_ratings)
val_dataloader = DataLoader(val_rating_dataset, batch_size=batch_size, shuffle=True)

In [14]:
embedding_dim = 128
n_layers = 3
batch_size = 256

# Instantiate LightGCN model
model = LightGCN(embedding_dim=embedding_dim, n_layers=n_layers,
                 user_ids=user_indices, item_ids=item_indices, interaction_scores=ratings, device=device)

#### Predict before train

In [15]:
user_id = 'AEHW2B54HDLZ3APBEWXHYLZ6SSYQ'
val_df.loc[lambda df: df['user_id'].eq(user_id)]

Unnamed: 0,user_id,parent_asin,rating,timestamp
34367,AEHW2B54HDLZ3APBEWXHYLZ6SSYQ,B07MYVF61Y,4.0,1654225907045


In [16]:
item_id = 'B07MYVF61Y'
user_indice = idm.get_user_index(user_id)
item_indice = idm.get_item_index(item_id)

model.predict([user_indice], [item_indice])

tensor([0.0076], grad_fn=<SumBackward1>)

#### Training loop

In [17]:
n_epochs = 50

train(model, dataloader, val_dataloader, epochs=n_epochs, patience=2, print_steps=100, lr=0.03, device=device, progress_bar_type='tqdm_notebook')



Epochs:   0%|          | 0/50 [00:00<?, ?it/s]

Training Epoch 1:   0%|          | 0/4524 [00:00<?, ?it/s]

[32m2024-09-07 23:49:56.857[0m | [1mINFO    [0m | [36msrc.train_utils[0m:[36mlog_gradients[0m:[36m56[0m - [1mStep 100, Gradient Norm for user_embedding.weight: 0.001257[0m
[32m2024-09-07 23:49:56.858[0m | [1mINFO    [0m | [36msrc.train_utils[0m:[36mlog_gradients[0m:[36m56[0m - [1mStep 100, Gradient Norm for item_embedding.weight: 0.292150[0m
[32m2024-09-07 23:49:56.858[0m | [1mINFO    [0m | [36msrc.train_utils[0m:[36mlog_gradients[0m:[36m59[0m - [1mStep 100, Total Gradient Norm: 0.292153[0m
[32m2024-09-07 23:49:56.859[0m | [1mINFO    [0m | [36msrc.train_utils[0m:[36mtrain[0m:[36m157[0m - [1mStep 100, Learning Rate: 0.030000[0m
[32m2024-09-07 23:49:56.859[0m | [1mINFO    [0m | [36msrc.train_utils[0m:[36mtrain[0m:[36m159[0m - [1mStep 100, Global Loss: 19.1892[0m
[32m2024-09-07 23:49:58.897[0m | [1mINFO    [0m | [36msrc.train_utils[0m:[36mlog_gradients[0m:[36m56[0m - [1mStep 200, Gradient Norm for user_embedding.weight

Training Epoch 2:   0%|          | 0/4524 [00:00<?, ?it/s]

[32m2024-09-07 23:51:36.261[0m | [1mINFO    [0m | [36msrc.train_utils[0m:[36mlog_gradients[0m:[36m56[0m - [1mStep 4600, Gradient Norm for user_embedding.weight: 0.467167[0m
[32m2024-09-07 23:51:36.262[0m | [1mINFO    [0m | [36msrc.train_utils[0m:[36mlog_gradients[0m:[36m56[0m - [1mStep 4600, Gradient Norm for item_embedding.weight: 0.884168[0m
[32m2024-09-07 23:51:36.262[0m | [1mINFO    [0m | [36msrc.train_utils[0m:[36mlog_gradients[0m:[36m59[0m - [1mStep 4600, Total Gradient Norm: 0.999999[0m
[32m2024-09-07 23:51:36.262[0m | [1mINFO    [0m | [36msrc.train_utils[0m:[36mtrain[0m:[36m157[0m - [1mStep 4600, Learning Rate: 0.019683[0m
[32m2024-09-07 23:51:36.262[0m | [1mINFO    [0m | [36msrc.train_utils[0m:[36mtrain[0m:[36m159[0m - [1mStep 4600, Global Loss: 5.4515[0m
[32m2024-09-07 23:51:38.327[0m | [1mINFO    [0m | [36msrc.train_utils[0m:[36mlog_gradients[0m:[36m56[0m - [1mStep 4700, Gradient Norm for user_embedding.w

Training Epoch 3:   0%|          | 0/4524 [00:00<?, ?it/s]

[32m2024-09-07 23:53:17.410[0m | [1mINFO    [0m | [36msrc.train_utils[0m:[36mlog_gradients[0m:[36m56[0m - [1mStep 9100, Gradient Norm for user_embedding.weight: 0.776196[0m
[32m2024-09-07 23:53:17.411[0m | [1mINFO    [0m | [36msrc.train_utils[0m:[36mlog_gradients[0m:[36m56[0m - [1mStep 9100, Gradient Norm for item_embedding.weight: 0.630490[0m
[32m2024-09-07 23:53:17.411[0m | [1mINFO    [0m | [36msrc.train_utils[0m:[36mlog_gradients[0m:[36m59[0m - [1mStep 9100, Total Gradient Norm: 0.999999[0m
[32m2024-09-07 23:53:17.411[0m | [1mINFO    [0m | [36msrc.train_utils[0m:[36mtrain[0m:[36m157[0m - [1mStep 9100, Learning Rate: 0.011623[0m
[32m2024-09-07 23:53:17.412[0m | [1mINFO    [0m | [36msrc.train_utils[0m:[36mtrain[0m:[36m159[0m - [1mStep 9100, Global Loss: 3.4109[0m
[32m2024-09-07 23:53:19.730[0m | [1mINFO    [0m | [36msrc.train_utils[0m:[36mlog_gradients[0m:[36m56[0m - [1mStep 9200, Gradient Norm for user_embedding.w

Training Epoch 4:   0%|          | 0/4524 [00:00<?, ?it/s]

[32m2024-09-07 23:55:01.861[0m | [1mINFO    [0m | [36msrc.train_utils[0m:[36mlog_gradients[0m:[36m56[0m - [1mStep 13600, Gradient Norm for user_embedding.weight: 0.595938[0m
[32m2024-09-07 23:55:01.862[0m | [1mINFO    [0m | [36msrc.train_utils[0m:[36mlog_gradients[0m:[36m56[0m - [1mStep 13600, Gradient Norm for item_embedding.weight: 0.391141[0m
[32m2024-09-07 23:55:01.862[0m | [1mINFO    [0m | [36msrc.train_utils[0m:[36mlog_gradients[0m:[36m59[0m - [1mStep 13600, Total Gradient Norm: 0.712834[0m
[32m2024-09-07 23:55:01.863[0m | [1mINFO    [0m | [36msrc.train_utils[0m:[36mtrain[0m:[36m157[0m - [1mStep 13600, Learning Rate: 0.007626[0m
[32m2024-09-07 23:55:01.863[0m | [1mINFO    [0m | [36msrc.train_utils[0m:[36mtrain[0m:[36m159[0m - [1mStep 13600, Global Loss: 2.2870[0m
[32m2024-09-07 23:55:04.110[0m | [1mINFO    [0m | [36msrc.train_utils[0m:[36mlog_gradients[0m:[36m56[0m - [1mStep 13700, Gradient Norm for user_embed

Training Epoch 5:   0%|          | 0/4524 [00:00<?, ?it/s]

[32m2024-09-07 23:56:42.926[0m | [1mINFO    [0m | [36msrc.train_utils[0m:[36mlog_gradients[0m:[36m56[0m - [1mStep 18100, Gradient Norm for user_embedding.weight: 0.508699[0m
[32m2024-09-07 23:56:42.927[0m | [1mINFO    [0m | [36msrc.train_utils[0m:[36mlog_gradients[0m:[36m56[0m - [1mStep 18100, Gradient Norm for item_embedding.weight: 0.341213[0m
[32m2024-09-07 23:56:42.928[0m | [1mINFO    [0m | [36msrc.train_utils[0m:[36mlog_gradients[0m:[36m59[0m - [1mStep 18100, Total Gradient Norm: 0.612536[0m
[32m2024-09-07 23:56:42.928[0m | [1mINFO    [0m | [36msrc.train_utils[0m:[36mtrain[0m:[36m157[0m - [1mStep 18100, Learning Rate: 0.004503[0m
[32m2024-09-07 23:56:42.928[0m | [1mINFO    [0m | [36msrc.train_utils[0m:[36mtrain[0m:[36m159[0m - [1mStep 18100, Global Loss: 0.8676[0m
[32m2024-09-07 23:56:45.010[0m | [1mINFO    [0m | [36msrc.train_utils[0m:[36mlog_gradients[0m:[36m56[0m - [1mStep 18200, Gradient Norm for user_embed

Training Epoch 6:   0%|          | 0/4524 [00:00<?, ?it/s]

[32m2024-09-07 23:58:22.516[0m | [1mINFO    [0m | [36msrc.train_utils[0m:[36mlog_gradients[0m:[36m56[0m - [1mStep 22700, Gradient Norm for user_embedding.weight: 0.933609[0m
[32m2024-09-07 23:58:22.517[0m | [1mINFO    [0m | [36msrc.train_utils[0m:[36mlog_gradients[0m:[36m56[0m - [1mStep 22700, Gradient Norm for item_embedding.weight: 0.293201[0m
[32m2024-09-07 23:58:22.517[0m | [1mINFO    [0m | [36msrc.train_utils[0m:[36mlog_gradients[0m:[36m59[0m - [1mStep 22700, Total Gradient Norm: 0.978567[0m
[32m2024-09-07 23:58:22.518[0m | [1mINFO    [0m | [36msrc.train_utils[0m:[36mtrain[0m:[36m157[0m - [1mStep 22700, Learning Rate: 0.002954[0m
[32m2024-09-07 23:58:22.518[0m | [1mINFO    [0m | [36msrc.train_utils[0m:[36mtrain[0m:[36m159[0m - [1mStep 22700, Global Loss: 1.9047[0m
[32m2024-09-07 23:58:24.754[0m | [1mINFO    [0m | [36msrc.train_utils[0m:[36mlog_gradients[0m:[36m56[0m - [1mStep 22800, Gradient Norm for user_embed

# Predict

In [18]:
train_df

Unnamed: 0,user_id,parent_asin,rating,timestamp
1,AEVPPTMG43C6GWSR7I2UGRQN7WFQ,B0863MT183,4.0,1613701986538
2,AEVPPTMG43C6GWSR7I2UGRQN7WFQ,B08P8P7686,5.0,1613702112995
3,AEVPPTMG43C6GWSR7I2UGRQN7WFQ,B0B7LV3DN2,4.0,1617641445475
4,AEVPPTMG43C6GWSR7I2UGRQN7WFQ,B09WMQ6DXG,5.0,1620231368468
70,AHV6QCNBJNSGLATP56JAWJ3C4G2A,B019WRM1IA,5.0,1451860309000
...,...,...,...,...
735704,AHS2PQ33BWQLXC5NNUZS2BFXD34Q,B07TZT67KX,5.0,1622844181866
735800,AFO5SNKILFVJMSJJ2E3BRLDGE4NA,B09T5VN7D1,4.0,1601154352542
735801,AFO5SNKILFVJMSJJ2E3BRLDGE4NA,B09918MSTF,5.0,1602615880364
736772,AEFPHMM7CLX4UJNXJFQF4ZF5GNAA,B07P27XFP7,5.0,1599585146628


In [19]:
user_id = 'AEHW2B54HDLZ3APBEWXHYLZ6SSYQ'
val_df.loc[lambda df: df['user_id'].eq(user_id)]

Unnamed: 0,user_id,parent_asin,rating,timestamp
34367,AEHW2B54HDLZ3APBEWXHYLZ6SSYQ,B07MYVF61Y,4.0,1654225907045


In [20]:
item_id = 'B07MYVF61Y'
user_indice = idm.get_user_index(user_id)
item_indice = idm.get_item_index(item_id)

model.predict([user_indice], [item_indice])

tensor([4.6992], grad_fn=<SumBackward1>)