# ABOUT 
- this notebook trains LightGCN on Modcloth dataset and saves the model

In [1]:
# from recommenders.models.sar import SAR
from recommenders.datasets.python_splitters import python_stratified_split
from recommenders.utils.constants import SEED as DEFAULT_SEED
from recommenders.evaluation.python_evaluation import precision_at_k
from recommenders.models.deeprec.DataModel.ImplicitCF import ImplicitCF
from recommenders.models.deeprec.deeprec_utils import prepare_hparams
from recommenders.models.deeprec.models.graphrec.lightgcn import LightGCN
from recommenders.utils.timer import Timer

import pandas as pd
import os
# import gzip
# import json
# import seaborn as sb

### read data

In [2]:
path = r"C:\Users\tanch\Documents\NTU\NTU Year 4\FYP - GNN\Recommender API\data\interaction_data.pkl"
modcloth_data = pd.read_pickle(path)

### data preparation
- preparation for cf training

In [3]:
COLS = ["user_id", "item_id", "rating"]
COL_USER, COL_ITEM = "user_id", "item_id"
COL_RATING = "rating"
SPLIT_RATIO = 0.75
SEED = 0 
MIN_INTERACTIONS = 5
TOP_K = 10
BATCH_SIZE = 1024
save_model = False
save_epoch = 50
loss_type = "AmpBPR2"
loss_neg_weight = 1.5
log_wandb = False

In [4]:
train, test = python_stratified_split(modcloth_data, ratio=SPLIT_RATIO,
                                      min_rating=1, filter_by='user', 
                                      col_user=COL_USER, col_item=COL_ITEM, 
                                      seed=SEED)

## Train and Evaluate: LightGCN

In [5]:
yaml_file = r"./models/lightgcn.yml"
# MODEL_DIR = r"C:\Users\tanch\Documents\NTU\NTU Year 4\FYP - GNN\models"

In [6]:
data = ImplicitCF(train = train, test=test, 
                  adj_dir=None, 
                  col_user=COL_USER, col_item=COL_ITEM, 
                  col_rating = COL_RATING,
                  seed=SEED)

  df = train if test is None else train.append(test)


In [9]:
for i in range(3):
    hparams = prepare_hparams(yaml_file,
                                  n_layers=2,
                                  loss_type = loss_type, 
                                  loss_neg_weight = loss_neg_weight, 
                                  log_wandb = log_wandb,
                                  batch_size=BATCH_SIZE,
                                  epochs=50,
                                  learning_rate=0.005,
                                  eval_epoch=1,
                                  top_k=TOP_K,
                                  COL_USER = COL_USER,
                                  COL_ITEM = COL_ITEM,
                                  COL_RATING = COL_RATING,

                                  )
    # initiate model
    model = LightGCN(hparams, data, seed=SEED)

Already create adjacency matrix.
Already normalize adjacency matrix.
Using xavier initialization.


## load and predict

In [10]:
save_path_str = r"C:\Users\tanch\Documents\NTU\NTU Year 4\FYP - GNN\Recommender API\models\epoch_50"
model.load(save_path_str)
top_k = model.recommend_k_items(test, top_k=TOP_K, remove_seen=True)
eval_precision = precision_at_k(test, top_k, col_user=COL_USER, col_item=COL_ITEM, col_rating=COL_RATING, k=TOP_K)
eval_precision     

INFO:tensorflow:Restoring parameters from C:\Users\tanch\Documents\NTU\NTU Year 4\FYP - GNN\Recommender API\models\epoch_50


0.03786069651741293

In [15]:
model.recommend_k_items(pd.DataFrame({"user_id":["999697"]}), top_k=TOP_K, remove_seen=True)

Unnamed: 0,user_id,item_id,prediction
0,999697,645822,3.555731
1,999697,412737,3.384269
2,999697,391519,2.968252
3,999697,401773,2.734417
4,999697,200824,2.700134
5,999697,486643,2.668071
6,999697,539980,2.380388
7,999697,406357,2.226636
8,999697,175771,2.195363
9,999697,654585,2.061988
