# LightGCN model RecSys

In [1]:
# import matplotlib.pyplot as plt
# import networkx as nx
# import numpy as np
# import pandas as pd
import yaml
from lightgcn import LightGCN
from sklearn.model_selection import train_test_split
from utils import *
pd.set_option('display.max_colwidth', None)

In [2]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device # = "cpu"

device(type='cpu')

In [3]:
# Load Config
%cd ..
with open("params.yaml") as config_file:
    config = yaml.safe_load(config_file)
config

/Users/yingkang/4thBrain/GNN-eCommerce


{'base': {'random_seed': 42},
 'data': {'cosmetic_shop': 'data/raw/cosmetic-shop-ecommerce-events/',
  'preprocessed': 'data/preprocessed/'},
 'training': {'event_type_weights_v1': {'view': 0.01,
   'cart': 0.1,
   'remove_from_cart': -0.09,
   'purchase': 1.0},
  'event_type_weights_v2': {'view': 0.15,
   'cart': 0.35,
   'remove_from_cart': -0.2,
   'purchase': 1.0}},
 'reports': None}

# Load Interaction Matrix from csv

In [4]:
interaction_matrix = pd.read_csv(config['data']['preprocessed'] + "interaction_matrix.csv")
interaction_matrix = interaction_matrix.rename(columns={"product_id": "item_id"})

In [5]:
im = interaction_matrix[['user_id', 'item_id', 'weight']].copy()

In [6]:
print('Total data size: ', len(im), ', unique user: ', im.user_id.nunique(), ', unique items: ', im.item_id.nunique())

Total data size:  10157408 , unique user:  1639358 , unique items:  54571


In [19]:
# ?? Should user never purchase removed at the beginning ??? NO for now!!
# mini_im = purchase_users(im)
mini_im = im.sample(200000)  #100000

In [14]:
print('Mini dataset size: ', len(mini_im), ', Users at least purchased once: ', len(purchase_users(mini_im)))
# print('Valid data percentage: ', f'{len(mini_im)/len(im):.2%}')

Mini dataset size:  10000 , Users at least purchased once:  1298


### Prepare Train/ Val/ Test dataset

In [15]:
train_df, test_df = train_test_split(mini_im, test_size=0.2)
test_df, val_df = train_test_split(test_df, test_size=0.5)

In [16]:
print('Mini set unique user: ', mini_im.user_id.nunique(), ', unique items: ', mini_im.item_id.nunique())
print("Train Size  : ", len(train_df))
print("Val Size : ", len (val_df))
print("Test Size : ", len (test_df))

Mini set unique user:  9541 , unique items:  6533
Train Size  :  8000
Val Size :  1000
Test Size :  1000


In [11]:
n_users, n_items, train_df, train_pos_list_df, val_pos_list_df, test_pos_list_df = prepare_val_test(train_df, val_df, test_df)  # , val_u_i_matrix, test_u_i_matrix

In [12]:
print("After data pipline")
print("n_users : ", n_users, ", n_items : ", n_items)
print("train_df Size  : ", len(train_df))
print("val_pos_list_df Size : ", len (val_pos_list_df))
print("test_pos_list_df Size : ", len (test_pos_list_df))

Users :  97855
Items :  29177
Train Size  :  140000
Val Size :  1763
Test Size :  1742


### Instantiate model and train/val the model

In [17]:
latent_dim = 80     # aim to 128
n_layers = 3
LR = 0.005

model = LightGCN(num_nodes=n_users+n_items, embedding_dim=latent_dim, num_layers=n_layers)
optimizer = torch.optim.Adam(model.parameters(), lr=LR)
print("Size of Learnable Embedding : ", [x.shape for x in list(model.parameters())])

Size of Learnable Embedding :  [torch.Size([127032, 64])]


In [18]:
bpr_loss, reg_loss, final_loss, recall, precision = \
    train_and_evl(n_users, n_items, 100, train_df, train_pos_list_df, val_pos_list_df, model, optimizer, device=device, EPOCHS = 50, BATCH_SIZE = 1024, K = 20, DECAY = 0.0001)

bpr_loss | reg_loss | final_loss | precision | recall


  0%|          | 0/50 [00:00<?, ?it/s]

0.0225 0.0007 0.0231 0.0006 0.0116
0.0002 0.0008 0.001 0.0005 0.0102
0.0002 0.0007 0.0009 0.0008 0.015
0.0002 0.0006 0.0008 0.0007 0.0135
0.0002 0.0005 0.0007 0.0007 0.0128
0.0001 0.0005 0.0006 0.0006 0.0116
0.0001 0.0004 0.0006 0.0006 0.0108
0.0001 0.0004 0.0005 0.0006 0.0122
0.0001 0.0004 0.0005 0.0006 0.0119
0.0001 0.0003 0.0005 0.0007 0.0128
0.0001 0.0003 0.0005 0.0006 0.0111
0.0001 0.0003 0.0004 0.0007 0.0139
0.0001 0.0003 0.0004 0.0006 0.0119
0.0001 0.0003 0.0004 0.0005 0.0102
0.0001 0.0003 0.0004 0.0005 0.0094
0.0001 0.0003 0.0004 0.0005 0.0088
0.0001 0.0003 0.0004 0.0006 0.0111
0.0001 0.0003 0.0004 0.0004 0.0073
0.0001 0.0003 0.0004 0.0005 0.0094
0.0001 0.0002 0.0004 0.0005 0.009
0.0001 0.0002 0.0004 0.0005 0.0101
0.0001 0.0002 0.0003 0.0005 0.0105
0.0001 0.0002 0.0003 0.0005 0.0085
0.0001 0.0002 0.0003 0.0005 0.0091
0.0001 0.0002 0.0004 0.0005 0.0102
0.0001 0.0002 0.0004 0.0004 0.0076
0.0001 0.0002 0.0003 0.0005 0.0091
0.0001 0.0002 0.0003 0.0005 0.0087
0.0001 0.0002 0.0003 0.