# Load the Data

In [1]:
import pandas as pd
import numpy as np
from pathlib import Path

PATH_HOME = Path.home()
PATH_PROJ = PATH_HOME/'pyGRU4REC' 
PATH_DATA = PATH_PROJ/'data'
PATH_MODEL = PATH_PROJ/'models'
train = 'train.tsv'
test = 'test.tsv'
PATH_TRAIN = PATH_DATA / train
PATH_TEST = PATH_DATA / test

df_train = pd.read_csv(PATH_TRAIN, sep='\t', names=['SessionId','ItemId','TimeStamp'])
df_test = pd.read_csv(PATH_TEST, sep='\t', names=['SessionId','ItemId', 'TimeStamp'])

# Train

In [None]:
from modules.model import GRU4REC
import torch

session_key = 'SessionId'
item_key = 'ItemId'
time_key = 'TimeStamp'

input_size = df_train[item_key].nunique()
hidden_size = 100
num_layers = 1
output_size = input_size
batch_size = 50
dropout_input = 0
dropout_hidden = .5

optimizer_type = 'Adagrad'
lr = .01
weight_decay = 0
momentum = 0
eps = 1e-6

loss_type = 'TOP1'

n_epochs = 10
time_sort = False
n_samples = -1 # if -1, use all samples
n_samples = 10000
use_cuda = True

torch.manual_seed(7)
torch.cuda.manual_seed(7)

train_data = df_train[:n_samples] if n_samples != -1 else df_train

model = GRU4REC(input_size, hidden_size, output_size,
                num_layers=num_layers,
                batch_size=batch_size,
                dropout_input=dropout_input,
                dropout_hidden=dropout_hidden,
                optimizer_type=optimizer_type,
                lr=lr,
                weight_decay=weight_decay,
                momentum=momentum,
                eps=eps,
                loss_type=loss_type,
                time_sort=time_sort,
                use_cuda=use_cuda)

model.init_data(train_data, df_test, session_key, time_key, item_key)
model_name = 'GRU4REC'
model.train(n_epochs=n_epochs, model_name=model_name, save_dir=PATH_MODEL)

## Evaluate using the trained models

In [17]:
from pathlib import Path
import pandas as pd
import numpy as np
from modules.layer import GRU
from modules.model import GRU4REC
import torch

PATH_HOME = Path.home()
PATH_PROJ = PATH_HOME/'pyGRU4REC' 
PATH_DATA = PATH_PROJ/'data'
PATH_MODEL = PATH_PROJ/'models'
train = 'train.tsv'
test = 'test.tsv'
PATH_TRAIN = PATH_DATA / train
PATH_TEST = PATH_DATA / test

session_key = 'SessionId'
time_key = 'Time'
item_key = 'ItemId'

df_train = pd.read_csv(PATH_TRAIN, sep='\t', names=['SessionId','ItemId', 'Time'])
df_test = pd.read_csv(PATH_TEST, sep='\t', names=['SessionId','ItemId', 'Time'])

## 1.Load the Common Parameters

In [18]:
input_size = df_train[item_key].nunique()
output_size = input_size
hidden_size = 100
num_layers = 1

use_cuda = True
time_sort = False

optimizer_type = 'Adagrad'

## 2. Evaluation(TOP1 Loss)

In [19]:
model_name = 'GRU4REC_TOP1_Adagrad_0.01_epoch10'
model_file = PATH_MODEL/model_name

loss_type = 'TOP1'
lr = 0.01

dropout_hidden = .5
dropout_input = 0
batch_size = 50
momentum = 0

gru = GRU(input_size, hidden_size, output_size,
          num_layers = num_layers,
          dropout_input = dropout_input,
          dropout_hidden = dropout_hidden,
          batch_size = batch_size,
          use_cuda = use_cuda)

gru.load_state_dict(torch.load(model_file))

model = GRU4REC(input_size, hidden_size, output_size,
                num_layers = num_layers,
                dropout_input = dropout_input,
                dropout_hidden = dropout_hidden,
                batch_size = batch_size,
                use_cuda = use_cuda,
                loss_type = loss_type,
                optimizer_type = optimizer_type,
                lr=lr,
                momentum=momentum,
                time_sort=time_sort,
                pretrained=gru)

model.init_data(df_train, df_test, session_key=session_key, time_key=time_key, item_key=item_key)

k = 20
recall, mrr = model.test(k = k, batch_size = batch_size)
result = f'Recall@{k}:{recall:.7f},MRR@{k}:{mrr:.7f}'
print(result)

Switching into Testing mode.
Switching into Training mode.
Recall@20:0.6171223,MRR@20:0.2565030


## 3. Evaluation(BPR Loss)

In [20]:
model_name = 'GRU4REC_BPR_Adagrad_0.05_epoch10'
model_file = PATH_MODEL/model_name

loss_type = 'BPR'
lr = 0.05

dropout_hidden = .2
dropout_input = 0
batch_size = 50
momentum = 0.2

gru = GRU(input_size, hidden_size, output_size,
          num_layers = num_layers,
          dropout_input = dropout_input,
          dropout_hidden = dropout_hidden,
          batch_size = batch_size,
          use_cuda = use_cuda)

gru.load_state_dict(torch.load(model_file))

model = GRU4REC(input_size, hidden_size, output_size,
                num_layers = num_layers,
                dropout_input = dropout_input,
                dropout_hidden = dropout_hidden,
                batch_size = batch_size,
                use_cuda = use_cuda,
                loss_type = loss_type,
                optimizer_type = optimizer_type,
                lr=lr,
                momentum=momentum,
                time_sort=time_sort,
                pretrained=gru)

model.init_data(df_train, df_test, session_key=session_key, time_key=time_key, item_key=item_key)

k = 20
recall, mrr = model.test(k = k, batch_size = batch_size)
result = f'Recall@{k}:{recall:.7f},MRR@{k}:{mrr:.7f}'
print(result)

Switching into Testing mode.
Switching into Training mode.
Recall@20:0.5984532,MRR@20:0.2378255


## 4. Evaluation(CrossEntropyLoss)

In [21]:
model_name = 'GRU4REC_CrossEntropy_Adagrad_0.01_epoch10'
model_file = PATH_MODEL/model_name

loss_type = 'CrossEntropy'
lr = 0.01

dropout_hidden = 0
dropout_input = 0
batch_size = 500
momentum = 0

gru = GRU(input_size, hidden_size, output_size,
          num_layers = num_layers,
          dropout_input = dropout_input,
          dropout_hidden = dropout_hidden,
          batch_size = batch_size,
          use_cuda = use_cuda)

gru.load_state_dict(torch.load(model_file))

model = GRU4REC(input_size, hidden_size, output_size,
                num_layers = num_layers,
                dropout_input = dropout_input,
                dropout_hidden = dropout_hidden,
                batch_size = batch_size,
                use_cuda = use_cuda,
                loss_type = loss_type,
                optimizer_type = optimizer_type,
                lr=lr,
                momentum=momentum,
                time_sort=time_sort,
                pretrained=gru)

model.init_data(df_train, df_test, session_key=session_key, time_key=time_key, item_key=item_key)

k = 20
recall, mrr = model.test(k = k, batch_size = batch_size)
result = f'Recall@{k}:{recall:.7f},MRR@{k}:{mrr:.7f}'
print(result)

Switching into Testing mode.
Switching into Training mode.
Recall@20:0.6271887,MRR@20:0.2570316
