# Import libraries

In [1]:
import pandas as pd
from torch.utils.data import DataLoader
from dataset import NCFDataset
from recom_ncf import NCFRecommender
from evaluation import Evaluation
from helpers.index_manager import IndexManager
from helpers.dataloader_custom_functions import collate_fn
import json
import ast

# Loading data

In [2]:
df_train = pd.read_csv('../data/train-leave2.csv')
df_val = pd.read_csv('../data/val-leave2.csv')
df_test = pd.read_csv('../data/test-leave2.csv')
df_features = pd.read_csv('../data/metadata.csv')
index_manager = IndexManager()
index_manager.load('../data/index.pkl')
with open('../data/feature-dims.json') as file:
    input_dims = json.load(file)

In [3]:
features = ['tags']
feature_dims = {}
for feature in features:
    input_dim = input_dims.get(feature, 1)
    feature_dims[feature] = (input_dim, 8)

In [4]:
def str_to_list(df, cols):
    for col in cols:
        df[col] = df[col].apply(ast.literal_eval)
    return df

category_cols = ['tags']
# category_cols = ['tags', 'publishers', 'developers', 'genres', 'categories', 'supported_languages']
df_train = str_to_list(df_train, category_cols)
df_val = str_to_list(df_val, category_cols)
df_test = str_to_list(df_test, category_cols)
df_features = str_to_list(df_features, category_cols)

# Creating datasets

In [5]:
train_dataset = NCFDataset(df_train, feature_dims=feature_dims, df_features=df_features)
val_dataset = NCFDataset(df_val, feature_dims=feature_dims, df_features=df_features)

In [6]:
dataloader_params = {
    'batch_size': 2**13,
    'num_workers': 4,
    'persistent_workers': True,
    'prefetch_factor': 4,
    'pin_memory': True,
    'pin_memory_device': 'cuda',
    'collate_fn': collate_fn,
}
train_dataloader = DataLoader(train_dataset, shuffle=True, **dataloader_params)
val_dataloader = DataLoader(val_dataset, shuffle=False, **dataloader_params)

# Training model

In [7]:
params = {'factors': 32, 'mlp_user_item_dim': 64, 'learning_rate': 0.003, 'epochs': 10, 'optimizer': 'adagrad', 'dropout': 0.2, 'weight_decay': 0.0001, 'loss_fn': 'mse'}

unique_users = index_manager.get_indexed_values('user_id')
unique_items = index_manager.get_indexed_values('item_id')

model = NCFRecommender(unique_users, unique_items, mlp_feature_dims=feature_dims, df_features=df_features[['item_id'] + features], **params)
model.fit(train_dataloader, val_dataloader)

Epoch 1/10
Train loss: 0.213629, Validation loss: 0.212007
Epoch 2/10
Train loss: 0.148121, Validation loss: 0.207954
Epoch 3/10
Train loss: 0.137663, Validation loss: 0.202144
Epoch 4/10
Train loss: 0.129062, Validation loss: 0.197151
Epoch 5/10
Train loss: 0.115756, Validation loss: 0.189991
Epoch 6/10
Train loss: 0.092461, Validation loss: 0.187018
Epoch 7/10
Train loss: 0.074627, Validation loss: 0.189036
Epoch 8/10
Train loss: 0.066250, Validation loss: 0.190561
Epoch 9/10
Train loss: 0.061095, Validation loss: 0.191442
Early stopping triggered after 9 epochs
Training completed!


# Evaluating model

In [8]:
evaluator = Evaluation(recommender=model, test_data=df_test)
metrics = evaluator.evaluate(user_batch_size=512, item_batch_size=4096)

Creating ground truth sets...
Generating predictions...
Processing 1 of 121682 users... (0.00%)
Processing 513 of 121682 users... (0.42%)
Processing 1025 of 121682 users... (0.84%)
Processing 1537 of 121682 users... (1.26%)
Processing 2049 of 121682 users... (1.68%)
Processing 2561 of 121682 users... (2.10%)
Processing 3073 of 121682 users... (2.52%)
Processing 3585 of 121682 users... (2.95%)
Processing 4097 of 121682 users... (3.37%)
Processing 4609 of 121682 users... (3.79%)
Processing 5121 of 121682 users... (4.21%)
Processing 5633 of 121682 users... (4.63%)
Processing 6145 of 121682 users... (5.05%)
Processing 6657 of 121682 users... (5.47%)
Processing 7169 of 121682 users... (5.89%)
Processing 7681 of 121682 users... (6.31%)
Processing 8193 of 121682 users... (6.73%)
Processing 8705 of 121682 users... (7.15%)
Processing 9217 of 121682 users... (7.57%)
Processing 9729 of 121682 users... (7.99%)
Processing 10241 of 121682 users... (8.42%)
Processing 10753 of 121682 users... (8.84%)


In [9]:
metrics

{'Hit Ratio@10': 0.040120971055702566,
 'NDCG@10': 0.019648531350272405,
 'Recall@10': 0.040120971055702566}