In [1]:
import numpy as np 
import pandas as pd 
import numpy as np

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

from pathlib import Path
from tqdm import tqdm_notebook

In [2]:
torch.cuda.set_device(5)

 # Dataset

In [3]:
context = ['dow', 'hour', 'month'
           , 'timediff', 'event'
          ]
feat_context = {}
for feat in context:
    feat_context[feat] = [feat+'_'+str(i+1) for i in range(20)]
x_item = ['item_'+str(i+1) for i in range(19)]

In [4]:
data_pth = Path('../data/CRNN')

In [5]:
df_train = pd.read_csv(data_pth/'train_tu.csv')
df_val = pd.read_csv(data_pth/'valid_tu.csv')
df_test = pd.read_csv(data_pth/'test_tu.csv')

In [6]:
item_uniq = np.unique(df_train[['item_19', 'item_20']].values.astype(int))
index = np.argwhere(item_uniq==-1)
item_uniq = np.delete(item_uniq, index)

In [7]:
df_val = df_val.loc[df_val['item_20'].isin(item_uniq)].reset_index(drop=True)
df_test = df_test.loc[df_test['item_20'].isin(item_uniq)].reset_index(drop=True)

In [8]:
item2index = {-1:0, 'UNK':1}
items = [-1, 'UNK']
for item in item_uniq:
    item2index[item] = len(items)
    items.append(item)

In [9]:
class recDataset(Dataset):
    def __init__(self, df):
        self.n_feat = len(context)
        self.df = df
        self.x_item = df[x_item].values
        self.y = df['item_20'].values
        
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        x = np.array([item2index.get(k, item2index["UNK"]) for k in self.x_item[idx]])
        y = item2index.get(self.y[idx], item2index["UNK"])
        feats = {}
        for feat in feat_context:
            feats[feat] = (self.df.loc[idx, feat_context[feat]].values+1).astype(int)
        return x, y, feats

In [19]:
train_ds = recDataset(df_train)
val_ds = recDataset(df_val)
test_ds = recDataset(df_test)

# Model: GRU

In [11]:
class GRUModel(torch.nn.Module) :
    def __init__(self, vocab_size, embedding_dim, hidden_dim) :
        super(GRUModel,self).__init__()
        self.hidden_dim = hidden_dim
        self.embeddings = nn.Embedding(vocab_size, embedding_dim, padding_idx=0)
        self.month_embed = nn.Embedding(13, 2, padding_idx=0)
        self.hour_embed = nn.Embedding(25, 2, padding_idx=0)
        self.dow_embed = nn.Embedding(8, 2, padding_idx=0)
        self.timediff_embed = nn.Embedding(21, 2, padding_idx=0)
        self.event_embed = nn.Embedding(3, 2, padding_idx=0)
        
        self.context_transform = nn.Linear(10, embedding_dim)
        self.gru = nn.GRU(embedding_dim+10, hidden_dim, batch_first=True)
        self.linear = nn.Linear(hidden_dim+10, vocab_size)
        self.dropout = nn.Dropout(0.5)
        
    def forward(self, x, feats):
        x = self.embeddings(x)
        month = self.month_embed(feats['month'].long())
        hour = self.hour_embed(feats['hour'].long())
        dow = self.dow_embed(feats['dow'].long())
        timediff = self.timediff_embed(feats['timediff'].long())
        event = self.event_embed(feats['event'].long())
        context = torch.cat((month[:,:-1,:], hour[:,:-1,:], dow[:,:-1,:], 
                             timediff[:,:-1,:], event[:,:-1,:]), 2)
        x = x*(self.context_transform(context))
        x = torch.cat((x, context), 2)
        x = self.dropout(x)
        
        out, ht = self.gru(x)
        final = ht[-1]
        y_context = torch.cat((month[:,-1,:], hour[:,-1,:], dow[:,-1,:], 
                             timediff[:,-1,:], event[:,-1,:]), 1)
        final = final*(self.context_transform(y_context))
        final = torch.cat((final, y_context), 1)     
        return self.linear(final)

# Training

4.3.1 Hyper-parameters. We fixed the size of item embedding vector and the size of RNN hidden state to 100. For optimization of the loss function in Equation 5, we use the Adam algorithm with squared root decay of learning rate from 0.01 to 0.001. For all models, the batch size was set 256 and number of training iterations to 10,000.

In [12]:
model_pth = Path('../model/CRNN')
p = model_pth/'best_model.pth'
best_recall = 0

In [13]:
def save_model(m, p): torch.save(m.state_dict(), p)
def load_model(m, p): m.load_state_dict(torch.load(p))

In [14]:
def update_optimizer(optimizer, lr):
    for i, param_group in enumerate(optimizer.param_groups):
        param_group["lr"] = lr

In [15]:
def train_epocs(model, optimizer, train_dl, val_dl, best_recall, epochs=10):
    for i in range(epochs):
        model.train()
        sum_loss = 0.0
        total = 0
        for x, y, feats in tqdm_notebook(train_dl):
            x = x.cuda()
            y = y.cuda()
            for key, value in feats.items():
                feats[key] = value.cuda()
            y_pred = model(x, feats)
            optimizer.zero_grad()
            loss = F.cross_entropy(y_pred, y)
            loss.backward()
            optimizer.step()
            sum_loss += loss.item()*y.shape[0]
            total += y.shape[0]
        val_loss, val_recall = val_metrics(model, val_dl)
        if val_recall>best_recall:
            best_recall = val_recall
            save_model(model, p)
        if i%5==1:
            print("train loss %.3f val loss %.3f and val recall@10 %.3f" % (sum_loss/total, val_loss, val_recall))
    return best_recall

In [16]:
def val_metrics(model, valid_dl):
    model.eval()
    recall = 0
    total = 0
    sum_loss = 0.0
    for x, y, feats in tqdm_notebook(valid_dl):
        x = x.cuda()
        y = y.cuda()
        for key, value in feats.items():
            feats[key] = value.cuda()
        y_hat = model(x, feats)
        loss = F.cross_entropy(y_hat, y)
        total += y.shape[0]
        sum_loss += loss.item()*y.shape[0]
        _, indices = torch.topk(y_hat, k=10, dim=1)
        for i, k in enumerate(y):
            if k in indices[i]:
                recall += 1
    return sum_loss/total, recall/total

In [20]:
batch_size = 5000
train_dl = DataLoader(train_ds, batch_size=batch_size, shuffle=True)
val_dl = DataLoader(val_ds, batch_size=batch_size)
test_dl = DataLoader(test_ds, batch_size=batch_size)

In [21]:
model = GRUModel(len(item2index), 100, 100).cuda()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=1e-3)

In [40]:
best_recall = train_epocs(model, optimizer, train_dl, val_dl, best_recall, epochs=1)
save_model(model, model_pth/'model2.pth')

HBox(children=(IntProgress(value=0, max=47), HTML(value='')))

HBox(children=(IntProgress(value=0, max=2), HTML(value='')))

In [42]:
best_recall

0.2388238424693986

In [None]:
best_recall = train_epocs(model, optimizer, train_dl, val_dl, best_recall, epochs=20)
save_model(model, model_pth/'model20.pth')

In [44]:
best_recall

0.42030335284725917

In [None]:
update_optimizer(optimizer, lr=0.001)
best_recall = train_epocs(model, optimizer, train_dl, val_dl, best_recall, epochs=20)
save_model(model, model_pth/'model40.pth')

HBox(children=(IntProgress(value=0, max=47), HTML(value='')))

In [23]:
load_model(model, model_pth/'model40.pth')
recall_40 = val_metrics(model, test_dl)

HBox(children=(IntProgress(value=0, max=2), HTML(value='')))




In [24]:
load_model(model, model_pth/'best_model.pth')
recall_best = val_metrics(model, test_dl)

HBox(children=(IntProgress(value=0, max=2), HTML(value='')))




In [26]:
print(f'{recall_40[1]:.4f}, {recall_best[1]:.4f}')

0.4048, 0.4231
