# ABOUT:
- this notebook simply test runs lightgcn using *microsoft recommenders* package

In [18]:
from recommenders.datasets.amazon_reviews import download_and_extract
from recommenders.datasets import movielens
from recommenders.utils.constants import SEED as DEFAULT_SEED
from recommenders.datasets.python_splitters import python_stratified_split
from recommenders.models.deeprec.DataModel.ImplicitCF import ImplicitCF
from recommenders.models.deeprec.deeprec_utils import prepare_hparams
from recommenders.models.deeprec.models.graphrec.lightgcn import LightGCN
from recommenders.utils.timer import Timer

import pandas as pd

In [22]:
TOP_K = 10
MOVIELENS_DATA_SIZE = '100k'
EPOCHS = 50
BATCH_SIZE = 1024
SEED = DEFAULT_SEED  
COL_USER = "userID"
COL_ITEM = "itemID"

yaml_file = r"C:\Users\tanch\Documents\NTU\NTU Year 4\FYP - GNN\code1\config\lightgcn.yml"
# user_file = "../../tests/resources/deeprec/lightgcn/user_embeddings.csv"
# item_file = "../../tests/resources/deeprec/lightgcn/item_embeddings.csv"

### read data

In [23]:
path = r"C:\Users\tanch\Documents\NTU\NTU Year 4\FYP - GNN\data\ml-latest-small\ml-latest-small\ratings.csv"
df = pd.read_csv(path)
df.columns = ["userID", "itemID", "rating", "timestamp"] # better to follow the default colnames in the package

### stratified split

In [26]:
train, test = python_stratified_split(df, ratio=0.8,
                                      min_rating=1, filter_by='user', 
                                      col_user=COL_USER, col_item=COL_ITEM, 
                                      seed=SEED)

### generate csr matrix

In [27]:
data = ImplicitCF(train = train, test=test, 
                      adj_dir=None, 
                      col_user=COL_USER, col_item=COL_ITEM, 
                      seed=SEED)

  df = train if test is None else train.append(test)


### instantiate model

In [35]:
hparams = prepare_hparams(yaml_file,
                          n_layers=3,
                          batch_size=BATCH_SIZE,
                          epochs=EPOCHS,
                          learning_rate=0.005,
                          eval_epoch=5,
                          top_k=TOP_K,
                         )

In [29]:
model = LightGCN(hparams, data, seed=SEED)

Already create adjacency matrix.
Already normalize adjacency matrix.
Using xavier initialization.


### train

In [30]:
with Timer() as train_time:
    model.fit()

print("Took {} seconds for training.".format(train_time.interval))

Epoch 1 (train)5.1s: train loss = 0.46225 = (mf)0.46209 + (embed)0.00016
Epoch 2 (train)4.9s: train loss = 0.22795 = (mf)0.22740 + (embed)0.00055
Epoch 3 (train)4.8s: train loss = 0.21432 = (mf)0.21365 + (embed)0.00067
Epoch 4 (train)4.8s: train loss = 0.20868 = (mf)0.20795 + (embed)0.00073
Epoch 5 (train)4.8s + (eval)0.3s: train loss = 0.19808 = (mf)0.19727 + (embed)0.00080, recall = 0.05682, ndcg = 0.15062, precision = 0.13082, map = 0.02728
Epoch 6 (train)4.9s: train loss = 0.19255 = (mf)0.19167 + (embed)0.00089
Epoch 7 (train)4.9s: train loss = 0.17989 = (mf)0.17892 + (embed)0.00098
Epoch 8 (train)4.8s: train loss = 0.16522 = (mf)0.16413 + (embed)0.00109
Epoch 9 (train)4.9s: train loss = 0.15915 = (mf)0.15795 + (embed)0.00120
Epoch 10 (train)4.9s + (eval)0.2s: train loss = 0.15102 = (mf)0.14970 + (embed)0.00132, recall = 0.08448, ndcg = 0.19700, precision = 0.17066, map = 0.04386
Epoch 11 (train)5.2s: train loss = 0.14483 = (mf)0.14338 + (embed)0.00145
Epoch 12 (train)4.8s: train l

In [17]:
# from tensorflow.config import list_physical_devices
# list_physical_devices('GPU')

[]

In [36]:
hparams

HParams object with values {'use_entity': True, 'use_context': True, 'cross_activation': 'identity', 'user_dropout': False, 'dropout': [0.0], 'attention_dropout': 0.0, 'load_saved_model': False, 'fast_CIN_d': 0, 'use_Linear_part': False, 'use_FM_part': False, 'use_CIN_part': False, 'use_DNN_part': False, 'init_method': 'tnormal', 'init_value': 0.01, 'embed_l2': 0.0, 'embed_l1': 0.0, 'layer_l2': 0.0, 'layer_l1': 0.0, 'cross_l2': 0.0, 'cross_l1': 0.0, 'reg_kg': 0.0, 'learning_rate': 0.005, 'lr_rs': 1, 'lr_kg': 0.5, 'kg_training_interval': 5, 'max_grad_norm': 2, 'is_clip_norm': 0, 'dtype': 32, 'optimizer': 'adam', 'epochs': 50, 'batch_size': 1024, 'enable_BN': False, 'show_step': 1, 'save_model': False, 'save_epoch': 100, 'write_tfevents': False, 'train_num_ngs': 4, 'need_sample': True, 'embedding_dropout': 0.0, 'EARLY_STOP': 100, 'min_seq_length': 1, 'slots': 5, 'cell': 'SUM', 'model_type': 'lightgcn', 'embed_size': 32, 'n_layers': 3, 'decay': 0.0001, 'eval_epoch': 5, 'top_k': 10, 'metri