### Imports Libraries

In [1]:
import sys
import os
import scrapbook as sb
import pandas as pd
import numpy as np

from recommenders.utils.timer import Timer
from recommenders.models.deeprec.models.graphrec.lightgcn import LightGCN
from recommenders.models.deeprec.DataModel.ImplicitCF import ImplicitCF
from recommenders.datasets import movielens
from recommenders.datasets.python_splitters import python_stratified_split
from recommenders.evaluation.python_evaluation import map_at_k, ndcg_at_k, precision_at_k, recall_at_k
from recommenders.utils.constants import SEED as DEFAULT_SEED
from recommenders.models.deeprec.deeprec_utils import prepare_hparams

print("System version: {}".format(sys.version))
print("Pandas version: {}".format(pd.__version__))

System version: 3.8.16 (default, Mar  2 2023, 03:18:16) [MSC v.1916 64 bit (AMD64)]
Pandas version: 1.5.3


### Load Data

In [2]:
# Load the dataset from csv file
df1 = pd.read_csv('rats.csv')
df2 = pd.read_csv('icat.csv')
merged_df = pd.merge(df1, df2, on='itemId')

# create new dataframe
df3 = pd.DataFrame(merged_df)
df3['Category'] = df3['Category'].astype('category').cat.codes
df3['userID'] = df3['userId']
df3['itemID'] = df3['itemId']
df3 = df3.drop(['userId', 'itemId'], axis=1)
df3.head()

Unnamed: 0,rating,ItemName,Category,Quality,userID,itemID
0,1.533462,Restaurant Fake,5,1.647351,13,0
1,1.283205,Restaurant Fake,5,1.647351,15,0
2,1.506836,Restaurant Fake,5,1.647351,17,0
3,1.260289,Restaurant Fake,5,1.647351,19,0
4,3.203168,Restaurant Fake,5,1.647351,23,0


In [3]:
# top k items to recommend
TOP_K = 10

# Model parameters
EPOCHS = 15
BATCH_SIZE = 1024

SEED = DEFAULT_SEED  # Set None for non-deterministic results

yaml_file = "lightgcn.yaml"

header = {
    "userID": "userID",
    "itemID": "itemID",
    "rating": "rating",
}

### Split Train, Test

In [5]:
train, test = python_stratified_split(df3, ratio=0.75, col_user=header["userID"], col_item=header["itemID"], seed=42)

In [6]:
data = ImplicitCF(train=train, test=test, seed=SEED)

  df = train if test is None else train.append(test)


In [7]:
hparams = prepare_hparams(yaml_file,
                          n_layers=3,
                          batch_size=BATCH_SIZE,
                          epochs=EPOCHS,
                          learning_rate=0.015,
                          eval_epoch=5,
                          top_k=TOP_K,
                          decay = 0.0001,
                          embed_size = 64
                         )

# Model LightGCN

In [8]:
model = LightGCN(hparams, data, seed=SEED)

Already create adjacency matrix.
Already normalize adjacency matrix.
Using xavier initialization.


### Model Training

In [9]:
with Timer() as train_time:
    model.fit()

print("Took {} seconds for training.".format(train_time.interval))

Epoch 1 (train)241.2s: train loss = 0.03209 = (mf)0.03114 + (embed)0.00095
Epoch 2 (train)242.3s: train loss = 0.00060 = (mf)0.00026 + (embed)0.00034
Epoch 3 (train)248.0s: train loss = 0.00046 = (mf)0.00014 + (embed)0.00032
Epoch 4 (train)252.2s: train loss = 0.00040 = (mf)0.00007 + (embed)0.00032
Epoch 5 (train)247.6s + (eval)34.6s: train loss = 0.00037 = (mf)0.00006 + (embed)0.00031, recall = 0.44415, ndcg = 0.20566, precision = 0.05273, map = 0.12741
Epoch 6 (train)243.4s: train loss = 0.00037 = (mf)0.00007 + (embed)0.00031
Epoch 7 (train)249.1s: train loss = 0.00036 = (mf)0.00006 + (embed)0.00030
Epoch 8 (train)243.4s: train loss = 0.00033 = (mf)0.00004 + (embed)0.00029
Epoch 9 (train)250.1s: train loss = 0.00034 = (mf)0.00006 + (embed)0.00028
Epoch 10 (train)250.0s + (eval)34.2s: train loss = 0.00031 = (mf)0.00004 + (embed)0.00027, recall = 0.46330, ndcg = 0.21596, precision = 0.05525, map = 0.13466
Epoch 11 (train)296.3s: train loss = 0.00029 = (mf)0.00003 + (embed)0.00026
Epoch

In [10]:
topk_scores = model.recommend_k_items(test, top_k=TOP_K, remove_seen=True)

topk_scores.head()

Unnamed: 0,userID,itemID,prediction
0,0,0,-0.707874
1,0,9,-0.784958
2,0,7,-0.894917
3,0,12,-1.025793
4,0,2,-1.049444


In [11]:
eval_map = map_at_k(test, topk_scores, k=TOP_K)
eval_ndcg = ndcg_at_k(test, topk_scores, k=TOP_K)
eval_precision = precision_at_k(test, topk_scores, k=TOP_K)
eval_recall = recall_at_k(test, topk_scores, k=TOP_K)

print("MAP:\t%f" % eval_map,
      "NDCG:\t%f" % eval_ndcg,
      "Precision@K:\t%f" % eval_precision,
      "Recall@K:\t%f" % eval_recall, sep='\n')

MAP:	0.118861
NDCG:	0.193370
Precision@K:	0.050437
Recall@K:	0.420329
