# Import libraries

In [1]:
import pandas as pd
from torch.utils.data import DataLoader
from dataset import NCFDataset
from ncf import ModelType
from recom_ncf import NCFRecommender
from evaluation import Evaluation
from helpers.index_manager import IndexManager
from helpers.dataloader_custom_functions import collate_fn
import json
import ast

# Loading data

In [2]:
df_train = pd.read_csv('../data/train-leave2.csv')
df_val = pd.read_csv('../data/val-leave2.csv')
df_test = pd.read_csv('../data/test-leave2.csv')
df_features = pd.read_csv('../data/metadata.csv')
index_manager = IndexManager()
index_manager.load('../data/index.pkl')
with open('../data/feature-dims.json') as file:
    input_dims = json.load(file)

In [3]:
features = ['log_total_recommendation', 'genres', 'developers']
# feature_dims = {'mm_total_recommendation': [1, 16], 'developers': [43743, 128], 'genres': [28, 32]}
feature_dims = {'log_total_recommendation': [1, 16], 'genres': [28, 32], 'developers': [43743, 128]}

In [4]:
def str_to_list(df, cols):
    for col in cols:
        df[col] = df[col].apply(ast.literal_eval)
    return df

category_cols = ['genres', 'developers']
df_train = str_to_list(df_train, category_cols)
df_val = str_to_list(df_val, category_cols)
df_test = str_to_list(df_test, category_cols)
df_features = str_to_list(df_features, category_cols)

# Creating datasets

In [5]:
train_dataset = NCFDataset(df_train, feature_dims=feature_dims, df_features=df_features)
val_dataset = NCFDataset(df_val, feature_dims=feature_dims, df_features=df_features)

In [6]:
dataloader_params = {
    'batch_size': 4096,
    'num_workers': 4,
    'persistent_workers': True,
    'prefetch_factor': 4,
    'pin_memory': True,
    'pin_memory_device': 'cuda',
    'collate_fn': collate_fn,
}
train_dataloader = DataLoader(train_dataset, shuffle=True, **dataloader_params)
val_dataloader = DataLoader(val_dataset, shuffle=False, **dataloader_params)

# Training model

In [7]:
params =  {'factors': 32, 'mlp_user_item_dim': 64, 'learning_rate': 0.0001, 'epochs': 50, 'optimizer': 'adagrad', 'dropout': 0.5, 'weight_decay': 0.005, 'loss_fn': 'bce'}

unique_users = index_manager.get_indexed_values('user_id')
unique_items = index_manager.get_indexed_values('item_id')

model = NCFRecommender(unique_users, unique_items, mlp_feature_dims=feature_dims, df_features=df_features[['item_id'] + features], model_type=ModelType.EARLY_FUSION, **params)
model.fit(train_dataloader, val_dataloader)

Epoch 1/50
Train loss: 0.674671, Validation loss: 0.677949
Epoch 2/50
Train loss: 0.657085, Validation loss: 0.668007
Epoch 3/50
Train loss: 0.639986, Validation loss: 0.656741
Epoch 4/50
Train loss: 0.622640, Validation loss: 0.645895
Epoch 5/50
Train loss: 0.605239, Validation loss: 0.636232
Epoch 6/50
Train loss: 0.589543, Validation loss: 0.628316
Epoch 7/50
Train loss: 0.575622, Validation loss: 0.622340
Epoch 8/50
Train loss: 0.564283, Validation loss: 0.618272
Epoch 9/50
Train loss: 0.555161, Validation loss: 0.615809
Epoch 10/50
Train loss: 0.547614, Validation loss: 0.614628
Epoch 11/50
Train loss: 0.541960, Validation loss: 0.614416
Epoch 12/50
Train loss: 0.537410, Validation loss: 0.614873
Epoch 13/50
Train loss: 0.533775, Validation loss: 0.615766
Epoch 14/50
Train loss: 0.531806, Validation loss: 0.616861
Epoch 15/50
Train loss: 0.529255, Validation loss: 0.618087
Epoch 16/50
Train loss: 0.527695, Validation loss: 0.619296
Early stopping triggered after 16 epochs
Training

# Evaluating model

In [8]:
evaluator = Evaluation(recommender=model, test_data=df_test)
metrics = evaluator.evaluate()

Creating ground truth sets...
Generating predictions...
Processing 1 of 121682 users... (0.00%)
Memory usage: 0.22216796875
Increased user batch size from 128 to 141
Increased item batch size from 1024 to 1126
Processing 129 of 121682 users... (0.11%)
Memory usage: 0.22216796875
Increased user batch size from 141 to 155
Increased item batch size from 1126 to 1239
Processing 270 of 121682 users... (0.22%)
Memory usage: 0.24951171875
Increased user batch size from 155 to 170
Increased item batch size from 1239 to 1363
Processing 425 of 121682 users... (0.35%)
Memory usage: 0.282470703125
Increased user batch size from 170 to 187
Increased item batch size from 1363 to 1499
Processing 595 of 121682 users... (0.49%)
Memory usage: 0.322265625
Increased user batch size from 187 to 206
Increased item batch size from 1499 to 1649
Processing 782 of 121682 users... (0.64%)
Memory usage: 0.370361328125
Increased user batch size from 206 to 227
Increased item batch size from 1649 to 1814
Processing

In [9]:
metrics

{'Hit Ratio@10': 0.033891619138409955,
 'NDCG@10': 0.014659096929684732,
 'Recall@10': 0.033891619138409955,
 'ARP@10': 1280034.6000029354,
 'Pop Ratio@10': 1.0}