In [1]:
import sys
import os
import papermill as pm
import scrapbook as sb
import pandas as pd
import numpy as np
import tensorflow as tf
tf.get_logger().setLevel('ERROR') # only show error messages

from recommenders.utils.timer import Timer
from recommenders.models.deeprec.models.graphrec.lightgcn import LightGCN
from recommenders.models.deeprec.DataModel.ImplicitCF import ImplicitCF
from recommenders.datasets import movielens
from recommenders.datasets.python_splitters import python_stratified_split
from recommenders.evaluation.python_evaluation import map_at_k, ndcg_at_k, precision_at_k, recall_at_k
from recommenders.utils.constants import SEED as DEFAULT_SEED
from recommenders.models.deeprec.deeprec_utils import prepare_hparams

print("System version: {}".format(sys.version))
print("Pandas version: {}".format(pd.__version__))
print("Tensorflow version: {}".format(tf.__version__))

System version: 3.8.5 (default, Sep  4 2020, 07:30:14) 
[GCC 7.3.0]
Pandas version: 1.4.1
Tensorflow version: 2.2.0


In [None]:
# top k items to recommend
TOP_K = 10

# Model parameters
EPOCHS = 10
BATCH_SIZE = 1024

SEED = DEFAULT_SEED  # Set None for non-deterministic results

yaml_file = "./lightgcn/lightgcn.yaml"
user_file = "./lightgcn/user_embeddings.csv"
item_file = "./lightgcn/item_embeddings.csv"

In [3]:
COL_USER = "userID"
COL_ITEM = "itemID"
COL_RATING = "rating"
COL_PREDICTION = "rating"
COL_TIMESTAMP = "timestamp"

root_dir = '/opt/ml/input/data/train/'
df = pd.read_csv(os.path.join(root_dir,'train_ratings.csv'), names=[COL_USER, COL_ITEM, COL_TIMESTAMP], header=0)
df.head()

Unnamed: 0,userID,itemID,timestamp
0,11,4643,1230782529
1,11,170,1230782534
2,11,531,1230782539
3,11,616,1230782542
4,11,2140,1230782563


In [4]:
df['rating'] = 1

In [5]:
train, test = python_stratified_split(df, ratio=0.75)

In [6]:
data = ImplicitCF(train=train, test=test, seed=SEED)

  df = train if test is None else train.append(test)


In [7]:
hparams = prepare_hparams(yaml_file,
                          n_layers=3,
                          batch_size=BATCH_SIZE,
                          epochs=EPOCHS,
                          learning_rate=0.005,
                          eval_epoch=1,
                          top_k=TOP_K,
                         )

In [13]:
model = LightGCN(hparams, data, seed=SEED)

Already create adjacency matrix.
Already normalize adjacency matrix.
Using xavier initialization.


2022-04-03 15:55:33.462350: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-04-03 15:55:33.463450: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1561] Found device 0 with properties: 
pciBusID: 0000:00:05.0 name: Tesla V100-SXM2-32GB computeCapability: 7.0
coreClock: 1.53GHz coreCount: 80 deviceMemorySize: 31.75GiB deviceMemoryBandwidth: 836.37GiB/s
2022-04-03 15:55:33.463658: W tensorflow/stream_executor/platform/default/dso_loader.cc:55] Could not load dynamic library 'libcudart.so.10.1'; dlerror: libcudart.so.10.1: cannot open shared object file: No such file or directory
2022-04-03 15:55:33.463742: W tensorflow/stream_executor/platform/default/dso_loader.cc:55] Could not load dynamic library 'libcublas.so.10'; dlerror: libcublas.so.10: cannot open shared object file: No such file or directory
2022-04-03 15:55:33.463766: I tensorfl

In [14]:
with Timer() as train_time:
    model.fit()

print("Took {} seconds for training.".format(train_time.interval))

Epoch:  1


100%|████████████████████████████████████████████████████████████████| 3776/3776 [1:22:17<00:00,  1.31s/it]


Epoch 1 (train)4937.3s: train loss = 0.24428 = (mf)0.24263 + (embed)0.00164
Epoch:  2


100%|█████████████████████████████████████████████████████████████████████████████████████████| 3776/3776 [1:21:05<00:00,  1.29s/it]


Epoch 2 (train)4865.9s: train loss = 0.18374 = (mf)0.17981 + (embed)0.00393
Epoch:  3


100%|█████████████████████████████████████████████████████████████████████████████████████████| 3776/3776 [1:20:31<00:00,  1.28s/it]


Epoch 3 (train)4831.6s: train loss = 0.15729 = (mf)0.15176 + (embed)0.00553
Epoch:  4


100%|█████████████████████████████████████████████████████████████████████████████████████████| 3776/3776 [1:20:27<00:00,  1.28s/it]


Epoch 4 (train)4827.2s: train loss = 0.14442 = (mf)0.13783 + (embed)0.00659
Epoch:  5


100%|█████████████████████████████████████████████████████████████████████████████████████████| 3776/3776 [1:20:24<00:00,  1.28s/it]


Epoch 5 (train)4824.2s + (eval)33.7s: train loss = 0.13741 = (mf)0.13011 + (embed)0.00730, recall = 0.10938, ndcg = 0.38397, precision = 0.35345, map = 0.06979
Epoch:  6


100%|█████████████████████████████████████████████████████████████████████████████████████████| 3776/3776 [1:20:25<00:00,  1.28s/it]


Epoch 6 (train)4826.0s: train loss = 0.13176 = (mf)0.12385 + (embed)0.00790
Epoch:  7


100%|████████████████████████████████████████████████████████████████| 3776/3776 [1:20:26<00:00,  1.28s/it]


Epoch 7 (train)4826.7s: train loss = 0.12663 = (mf)0.11812 + (embed)0.00850
Epoch:  8


100%|████████████████████████████████████████████████████████████████| 3776/3776 [1:21:47<00:00,  1.30s/it]


Epoch 8 (train)4907.2s: train loss = 0.12130 = (mf)0.11221 + (embed)0.00909
Epoch:  9


100%|████████████████████████████████████████████████████████████████| 3776/3776 [1:21:13<00:00,  1.29s/it]


Epoch 10 (train)4873.5s + (eval)33.0s: train loss = 0.11354 = (mf)0.10335 + (embed)0.01019, recall = 0.11712, ndcg = 0.40580, precision = 0.37496, map = 0.07591
Epoch:  11


100%|████████████████████████████████████████████████████████████████| 3776/3776 [1:24:48<00:00,  1.35s/it]


Epoch 11 (train)5089.0s: train loss = 0.10983 = (mf)0.09910 + (embed)0.01074
Epoch:  12


100%|████████████████████████████████████████████████████████████████| 3776/3776 [1:28:29<00:00,  1.41s/it]


Epoch 12 (train)5309.2s: train loss = 0.10706 = (mf)0.09585 + (embed)0.01122
Epoch:  13


100%|████████████████████████████████████████████████████████████████| 3776/3776 [1:30:53<00:00,  1.44s/it]


Epoch 13 (train)5453.5s: train loss = 0.10420 = (mf)0.09249 + (embed)0.01171
Epoch:  14


100%|████████████████████████████████████████████████████████████████| 3776/3776 [1:20:44<00:00,  1.28s/it]


Epoch 14 (train)4844.9s: train loss = 0.10111 = (mf)0.08894 + (embed)0.01218
Epoch:  15


 20%|████████████▉                                                    | 749/3776 [16:22<1:06:10,  1.31s/it]


KeyboardInterrupt: 

In [15]:
topk_scores = model.recommend_k_items(test, top_k=TOP_K, remove_seen=True)

topk_scores.head()

Unnamed: 0,userID,itemID,prediction
0,11,58559,9.783643
1,11,4370,9.741414
2,11,44191,8.84045
3,11,480,8.630714
4,11,33794,8.592502


In [16]:
eval_recall = recall_at_k(test, topk_scores, k=TOP_K)

print("Recall@K:\t%f" % eval_recall)

Recall@K:	0.120391


In [17]:
topk_scores = model.recommend_k_items(df, top_k=TOP_K, remove_seen=True)

In [20]:
topk_scores

Unnamed: 0,userID,itemID,prediction
0,11,58559,9.783643
1,11,4370,9.741414
2,11,44191,8.840450
3,11,480,8.630714
4,11,33794,8.592502
...,...,...,...
313595,138493,1270,7.550168
313596,138493,2712,7.490360
313597,138493,6365,7.348804
313598,138493,4370,7.333226


In [22]:
submission_df = topk_scores[['userID', 'itemID']]
submission_df.rename(columns = {"userID": "user", "itemID": "item"}, inplace=True)
submission_df.to_csv('/opt/ml/input/submission.csv', index=False)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  submission_df.rename(columns = {"userID": "user", "itemID": "item"}, inplace=True)
