In [1]:
import argparse
from tqdm import tqdm
from pathlib import Path
import os
import pickle
import numpy as np
import time

import torch 
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, SequentialSampler

from srgnn import SRGNN
from collate import (collate_fn_factory, seq_to_session_graph)
import metric
from dataset import load_data,RecSysDataset
# from train import TrainRunner
import warnings 
warnings.filterwarnings('ignore')

  from .autonotebook import tqdm as notebook_tqdm
Using backend: pytorch


In [2]:
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument(
    '--dataset-dir', default='../YOOCHOOSE_data/yoochoose1_64/', help='the dataset directory'
)
parser.add_argument('--n_items', type=int, default=37484, help='number of unique items. 37484 for yoochoose')
parser.add_argument('--embedding-dim', type=int, default=256, help='the embedding size')
parser.add_argument('--num-layers', type=int, default=1, help='the number of layers')
parser.add_argument('--feat-drop', type=float, default=0.1, help='the dropout ratio for features')
parser.add_argument('--lr', type=float, default=1e-3, help='the learning rate')
parser.add_argument(
    '--batch-size', type=int, default=512, help='the batch size for training'
)
parser.add_argument(
    '--epochs', type=int, default=30, help='the number of training epochs'
)
parser.add_argument(
    '--weight-decay',
    type=float,
    default=1e-4,
    help='the parameter for L2 regularization',
)

parser.add_argument(
    '--valid-split',
    type=float,
    default=0.1,
    help='the fraction for the validation set',
)


parser.add_argument(
    '--topk', 
    type=int, 
    default=20, 
    help='number of top score items selected for calculating recall and mrr metrics',
)

parser.add_argument(
    '--log_aggr', 
    type=int, 
    default=1, 
    help='print the loss after this number of iterations',
)

parser.add_argument(
    '--num-workers',
    type=int,
    default=0,
    help='the number of processes to load the input graphs',
)
    
args,_ = parser.parse_known_args()

args.dataset_dir="../dataset/amex_explorepoi-poi_category/"
args.batch_size=64
args.n_items=556
args.epochs=3
print(args)


Namespace(batch_size=64, dataset_dir='../dataset/amex_explorepoi-poi_category/', embedding_dim=256, epochs=3, feat_drop=0.1, log_aggr=1, lr=0.001, n_items=556, num_layers=1, num_workers=0, topk=20, valid_split=0.1, weight_decay=0.0001)


In [3]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [4]:
train, valid, test = load_data(args.dataset_dir, valid_portion=args.valid_split)

train_data = RecSysDataset(train)
valid_data = RecSysDataset(valid)
test_data = RecSysDataset(test)

--------------------------------------------------
Dataset info:
Number of sessions: 3182
--------------------------------------------------
--------------------------------------------------
Dataset info:
Number of sessions: 354
--------------------------------------------------
--------------------------------------------------
Dataset info:
Number of sessions: 305
--------------------------------------------------


In [19]:
collate_fn = collate_fn_factory(seq_to_session_graph)

train_loader = DataLoader(
    train_data,
    batch_size=args.batch_size,
    # shuffle=True,
    # drop_last=True,
    num_workers=args.num_workers,
    collate_fn=collate_fn,
    pin_memory=True,
    sampler=SequentialSampler(train_data)
)

valid_loader = DataLoader(
    valid_data,
    batch_size=args.batch_size,
    # shuffle=True,
    # drop_last=True,
    num_workers=args.num_workers,
    collate_fn=collate_fn,
    pin_memory=True,
    sampler=SequentialSampler(valid_data)
)

test_loader = DataLoader(
    test_data,
    batch_size=args.batch_size,
    # shuffle=True,
    num_workers=args.num_workers,
    collate_fn=collate_fn
)

print('{:<30}{:<10,} '.format("training mini-batch",len(train_loader)))
print('{:<30}{:<10,} '.format("validation mini-batch",len(valid_loader)))
print('{:<30}{:<10,} '.format("test mini-batch",len(test_loader)))

training mini-batch           50         
validation mini-batch         6          
test mini-batch               5          


In [6]:
model = SRGNN(args.n_items, args.embedding_dim, args.num_layers, feat_drop=args.feat_drop)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# device=torch.device("cpu")
model = model.to(device)
print("{:<30}{:<20,}".format("Number of parameters",np.sum([p.nelement() for p in model.parameters()])))

Number of parameters          1,127,424           


In [7]:
def prepare_batch(batch, device):
    inputs, labels = batch
    # inputs, labels = batch
    inputs_gpu  = [x.to(device) for x in inputs]
    labels_gpu  = labels.to(device)
   
    return inputs_gpu, labels_gpu 

def trainForEpoch(train_loader, model, optimizer, epoch, num_epochs, criterion,device,log_aggr=10):
    model.train()

    sum_epoch_loss = 0

    start = time.time()
    for i, batch in tqdm(enumerate(train_loader), total=len(train_loader),position=0,leave=True):
        inputs, labels = prepare_batch(batch, device)
        
        optimizer.zero_grad()
        logits = model(*inputs)
        # loss = criterion(logits, labels)
        loss = nn.functional.nll_loss(logits, labels)
        loss.backward()
        optimizer.step() 
        
        loss_val = loss.item()
        sum_epoch_loss += loss_val

        if i%(len(train_loader)//log_aggr) == 0 and not i==0:
            print('[TRAIN] epoch %d/%d batch loss: %.4f (avg %.4f) (%.2f im/s)'
                % (epoch + 1, num_epochs, loss_val, sum_epoch_loss / (i + 1),
                  len(inputs) / (time.time() - start)))

        start = time.time()


def validate(valid_loader, model,device):
    model.eval()
    recalls = []
    mrrs = []
    losses=[]
    with torch.no_grad():
        for step, batch in tqdm(enumerate(valid_loader), total=len(valid_loader),position=0,leave=True):
            inputs, labels = prepare_batch(batch, device)
            outputs = model(*inputs)
            # loss = criterion(outputs, labels)
            loss = nn.functional.nll_loss(outputs, labels)
            logits = F.softmax(outputs, dim = 1)
            recall, mrr = metric.evaluate(logits, labels, k = args.topk)
            recalls.append(recall)
            mrrs.append(mrr)
            losses.append(loss.item())
    
    mean_recall = np.mean(recalls)
    mean_mrr = np.mean(mrrs)
    mean_loss=np.mean(losses)
    
    return mean_recall, mean_mrr, mean_loss

In [16]:
def fix_weight_decay(model):
    decay = []
    no_decay = []
    for name, param in model.named_parameters():
        if not param.requires_grad:
            continue
        if any(map(lambda x: x in name, ['bias', 'batch_norm', 'activation'])):
            no_decay.append(param)
        else:
            decay.append(param)
    params = [{'params': decay}, {'params': no_decay, 'weight_decay': 0}]
    return params

if args.weight_decay > 0:
    params = fix_weight_decay(model)
else:
    params = model.parameters()
    
optimizer = optim.Adam(model.parameters(), args.lr, weight_decay=args.weight_decay)
# criterion = nn.CrossEntropyLoss()
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)

best_metric = float('inf') ## if cross-entropy loss is selected

TRAIN_LOSS=[]
VALID_LOSS=[]
TEST_LOSS=[]

TRAIN_MRR=[]
VALID_MRR=[]
TEST_MRR=[]

TRAIN_RECALL=[]
VALID_RECALL=[]
TEST_RECALL=[]

for epoch in tqdm(range(args.epochs)): #before: no leave param, now , leave=False

    scheduler.step(epoch = epoch)
    # trainForEpoch(train_loader, model, optimizer, epoch, args.epochs, criterion, device, log_aggr = 1)
    model.train()
    sum_epoch_loss = 0
    start = time.time()
    for step, batch in tqdm(enumerate(train_loader), total=len(train_loader),position=0,leave=True):
        inputs, labels = prepare_batch(batch, device)
        
        optimizer.zero_grad()
        logits = model(*inputs)
        # loss = criterion(logits, labels)
        loss = nn.functional.nll_loss(logits, labels)
        loss.backward()
        optimizer.step()  
        loss_val = loss.item()
        sum_epoch_loss += loss_val
        
        if (step+1)%(len(train_loader)//args.log_aggr) == 0:
            print('Epoch {:05d} |  Loss {:.4f} | Speed (samples/sec) {:.2f}'
                  .format(epoch + 1, sum_epoch_loss / (step + 1), labels.shape[0] / (time.time() - start)))
            
        start = time.time()
        
    if not os.path.exists(os.path.join(os.getcwd(),"output_metrics")):
        os.makedirs("output_metrics")
    
    train_recall, train_mrr, train_loss = validate(train_loader, model, device)
    TRAIN_LOSS.append(train_loss)
    TRAIN_MRR.append(train_mrr)
    TRAIN_RECALL.append(train_recall)
    print()
    print('Epoch {} training--loss: {:.4f}, Recall@{}: {:.4f}, MRR@{}: {:.4f} \n'\
          .format(epoch, train_loss,args.topk, train_recall, args.topk, train_mrr))
    
    dataset_name = args.dataset_dir.split('/')[-2]
    with open(os.path.join(os.getcwd(),"output_metrics",dataset_name+"_train_metrics.txt"),'a') as f:
        f.write(f'{epoch+1},{train_loss},{train_recall},{train_mrr}\n')

    valid_recall, valid_mrr, valid_loss = validate(valid_loader, model, device)
    VALID_LOSS.append(valid_loss)
    VALID_MRR.append(valid_mrr)
    VALID_RECALL.append(valid_recall)
    print('Epoch {} validation--loss: {:.4f}, Recall@{}: {:.4f}, MRR@{}: {:.4f} \n'\
          .format(epoch, valid_loss,args.topk, valid_recall, args.topk, valid_mrr))
    
    with open(os.path.join(os.getcwd(),"output_metrics",dataset_name+"_valid_metrics.txt"),'a') as f:
        f.write(f'{epoch+1},{valid_loss},{valid_recall},{valid_mrr}\n')
        
    test_recall, test_mrr, test_loss = validate(test_loader, model, device)
    TEST_LOSS.append(test_loss)
    TEST_MRR.append(test_mrr)
    TEST_RECALL.append(test_recall)
    print('Epoch {} test--loss: {:.4f}, Recall@{}: {:.4f}, MRR@{}: {:.4f} \n'\
          .format(epoch, test_loss,args.topk, test_recall, args.topk, test_mrr))

    with open(os.path.join(os.getcwd(),"output_metrics",dataset_name+"_test_metrics.txt"),'a') as f:
        f.write(f'{epoch+1},{test_loss},{test_recall},{test_mrr}\n')
        
    # writer.add_scalar("Recall/train", test_recall, epoch)

    # store best loss and save a model checkpoint
    ckpt_dict = {
        'epoch': epoch + 1,
        'state_dict': model.state_dict(),
        'optimizer': optimizer.state_dict()
    }

    selected_metric=valid_loss
    if selected_metric<best_metric:
        best_metric=selected_metric
        dataset_name = args.dataset_dir.split('/')[-2] 
        torch.save(ckpt_dict, dataset_name + '_' + 'latest_checkpoint.pth')


100%|██████████| 50/50 [00:01<00:00, 39.18it/s]


Epoch 00001 |  Loss 1.6379 | Speed (samples/sec) 2069.88


100%|██████████| 50/50 [00:01<00:00, 44.14it/s]



Epoch 0 training--loss: 1.3648, Recall@20: 0.9863, MRR@20: 0.7373 



100%|██████████| 6/6 [00:00<00:00, 45.75it/s]


Epoch 0 validation--loss: 2.2234, Recall@20: 0.9153, MRR@20: 0.6358 



100%|██████████| 5/5 [00:00<00:00, 40.86it/s]
 33%|███▎      | 1/3 [00:02<00:05,  2.69s/it]

Epoch 0 test--loss: 3.6011, Recall@20: 0.7964, MRR@20: 0.4331 



100%|██████████| 50/50 [00:01<00:00, 37.50it/s]


Epoch 00002 |  Loss 1.3860 | Speed (samples/sec) 2057.22


100%|██████████| 50/50 [00:01<00:00, 43.29it/s]



Epoch 1 training--loss: 1.2291, Recall@20: 0.9930, MRR@20: 0.7520 



100%|██████████| 6/6 [00:00<00:00, 44.81it/s]


Epoch 1 validation--loss: 2.1232, Recall@20: 0.9257, MRR@20: 0.6500 



100%|██████████| 5/5 [00:00<00:00, 40.29it/s]
 67%|██████▋   | 2/3 [00:05<00:02,  2.74s/it]

Epoch 1 test--loss: 3.3978, Recall@20: 0.8205, MRR@20: 0.4603 



100%|██████████| 50/50 [00:01<00:00, 39.05it/s]


Epoch 00003 |  Loss 1.2922 | Speed (samples/sec) 2062.58


100%|██████████| 50/50 [00:01<00:00, 40.73it/s]



Epoch 2 training--loss: 1.1513, Recall@20: 0.9969, MRR@20: 0.7616 



100%|██████████| 6/6 [00:00<00:00, 45.52it/s]


Epoch 2 validation--loss: 2.0932, Recall@20: 0.9358, MRR@20: 0.6654 



100%|██████████| 5/5 [00:00<00:00, 40.58it/s]
100%|██████████| 3/3 [00:08<00:00,  2.75s/it]

Epoch 2 test--loss: 3.2142, Recall@20: 0.8318, MRR@20: 0.4706 






In [14]:
os.path.join(os.getcwd(),"output_metrics",dataset_name+"_train_metrics.txt")

'/home/ec2-user/SageMaker/sequence-based-recommendation/SRGNN/output_metrics/amex_explorepoi-poi_category_train_metrics.txt'

In [13]:
if not os.path.exists(os.path.join(os.getcwd(),"output_metrics")):
    os.makedirs("output_metrics")

In [None]:
1%651

In [None]:
runner = TrainRunner(
    args.dataset_dir,
    model,
    train_loader,
    test_loader,
    device=device,
    lr=args.lr,
    weight_decay=args.weight_decay,
    patience=args.patience,
)

In [None]:
print('start training')
mrr, hit = runner.train(args.epochs, args.log_interval)
print('MRR@20\tHR@20')
print(f'{mrr * 100:.3f}%\t{hit * 100:.3f}%')

### model desection

In [None]:
import math

import torch as th
import torch.nn as nn
import torch.nn.functional as F

import dgl
import dgl.ops as F
import dgl.function as fn

class SRGNNLayer(nn.Module):
    def __init__(self, input_dim, output_dim, batch_norm=False, feat_drop=0.0, activation=None):
        super().__init__()
        self.dropout    = nn.Dropout(feat_drop)
        self.gru        = nn.GRUCell(2 * input_dim, output_dim)
        self.W1         = nn.Linear(input_dim, output_dim, bias=False)
        self.W2         = nn.Linear(input_dim, output_dim, bias=False)
        self.activation = activation
        
    def messager(self, edges):
        
        return {'m': edges.src['ft'] * edges.data['w'].unsqueeze(-1), 'w': edges.data['w']}

    def reducer(self, nodes):
        
        m = nodes.mailbox['m']
        w = nodes.mailbox['w']
        hn = m.sum(dim=1) / w.sum(dim=1).unsqueeze(-1)
        
        return {'neigh': hn}
    
    def forward(self, mg, feat):
        with mg.local_scope():
            mg.ndata['ft'] = self.dropout(feat)
            if mg.number_of_edges() > 0:
                mg.update_all(self.messager, self.reducer)
                neigh1 = mg.ndata['neigh']
                mg1 = mg.reverse(copy_edata=True)
                mg1.update_all(self.messager, self.reducer)
                neigh2 = mg1.ndata['neigh']
                neigh1 = self.W1(neigh1)
                neigh2 = self.W2(neigh2)
                hn = torch.cat((neigh1, neigh2), dim=1)
                rst = self.gru(hn, feat) 
            else:
                rst = feat
        if self.activation is not None:
            rst = self.activation(rst)
        return rst
    
class AttnReadout(nn.Module):
    def __init__(
        self,
        input_dim,
        hidden_dim,
        output_dim,
        batch_norm=True,
        feat_drop=0.0,
        activation=None,
    ):
        super().__init__()
        self.batch_norm = nn.BatchNorm1d(input_dim) if batch_norm else None
        self.feat_drop = nn.Dropout(feat_drop)
        self.fc_u = nn.Linear(input_dim, hidden_dim, bias=False)
        self.fc_v = nn.Linear(input_dim, hidden_dim, bias=True)
        self.fc_e = nn.Linear(hidden_dim, 1, bias=False)
        self.fc_out = (
            nn.Linear(input_dim, output_dim, bias=False)
            if output_dim != input_dim
            else None
        )
        self.activation = activation

    def forward(self, g, feat, last_nodes):
        if self.batch_norm is not None:
            feat = self.batch_norm(feat)
        feat = self.feat_drop(feat)
        feat_u = self.fc_u(feat)
        feat_v = self.fc_v(feat[last_nodes])
        feat_v = dgl.broadcast_nodes(g, feat_v)
        e = self.fc_e(torch.sigmoid(feat_u + feat_v)) 
        alpha = F.segment.segment_softmax(g.batch_num_nodes(), e) 
        feat_norm = feat * alpha
        rst = F.segment.segment_reduce(g.batch_num_nodes(), feat_norm, 'sum')
        if self.fc_out is not None:
            rst = self.fc_out(rst)
        if self.activation is not None:
            rst = self.activation(rst)
        return rst
    
class NISER(nn.Module):
    
    def __init__(self, num_items, embedding_dim, num_layers, feat_drop=0.0, norm=True, scale=12):
        super().__init__()
        self.embedding = nn.Embedding(num_items, embedding_dim)
        self.register_buffer('indices', torch.arange(num_items, dtype=torch.long))
        self.embedding_dim = embedding_dim
        self.num_layers = num_layers
        self.layers = nn.ModuleList()
        self.norm = norm
        self.scale = scale
        input_dim = embedding_dim
        for i in range(num_layers):
            layer = SRGNNLayer(
                input_dim,
                embedding_dim,
                batch_norm=None,
                feat_drop=feat_drop
            )
            self.layers.append(layer)
        self.readout = AttnReadout(
            input_dim,
            embedding_dim,
            embedding_dim,
            batch_norm=None,
            feat_drop=feat_drop,
            activation=None,
        )

        self.feat_drop = nn.Dropout(feat_drop)
        self.fc_sr = nn.Linear(input_dim + embedding_dim, embedding_dim, bias=False)
        
        self.reset_parameters()
        
    def reset_parameters(self):
        stdv = 1.0 / math.sqrt(self.embedding_dim)
        for weight in self.parameters():
            weight.data.uniform_(-stdv, stdv)
        
    def forward(self, mg, sg=None):
        iid = mg.ndata['iid']
        
        feat = self.feat_drop(self.embedding(iid))
        if self.norm:
            feat = feat.div(torch.norm(feat, p=2, dim=-1, keepdim=True) + 1e-12)
        out = feat
        for i, layer in enumerate(self.layers):
            out = layer(mg, out)
            
        last_nodes = mg.filter_nodes(lambda nodes: nodes.data['last'] == 1)
        if self.norm:
            feat = feat.div(torch.norm(out, p=2, dim=-1, keepdim=True))
        sr_g = self.readout(mg, feat, last_nodes)
        sr_l = feat[last_nodes]
        sr = torch.cat([sr_l, sr_g], dim=1)
        sr = self.fc_sr(sr)
        if self.norm:
            sr = sr.div(torch.norm(sr, p=2, dim=-1, keepdim=True) + 1e-12)
        target = self.embedding(self.indices)
        if self.norm:
            target = target.div(torch.norm(target, p=2, dim=-1, keepdim=True) + 1e-12)
        logits = sr @ target.t()
        if self.scale:
            logits = torch.log(nn.functional.softmax(self.scale * logits, dim=-1))
        else:
            logits = torch.log(nn.functional.softmax(logits, dim=-1))
        return logits# , 0

In [None]:
# model = NISER(num_items, args.embedding_dim, args.num_layers, feat_drop=args.feat_drop)
num_items, args.embedding_dim, args.num_layers, args.feat_drop

In [None]:
model = NISER(num_items, args.embedding_dim, args.num_layers, feat_drop=args.feat_drop)
model.layers[0]

In [None]:
model.readout

In [None]:
model.fc_sr.weight.shape, model.embedding_dim

In [None]:
np.sum([p.nelement() for p in model.parameters()])

In [None]:
batch=next(iter(test_loader))
batch

In [None]:
def prepare_batch(batch, device):
    inputs, labels = batch
    # inputs, labels = batch
    inputs_gpu  = [x.to(device) for x in inputs]
    labels_gpu  = labels.to(device)
   
    return inputs_gpu, labels_gpu 
# inputs, labels = prepare_batch(batch, device)

In [None]:
inputs, labels = batch
mg=inputs[0]
mg

In [None]:
mg.batch_num_nodes()

In [None]:
iid=mg.ndata['iid']

In [None]:
feat = model.feat_drop(model.embedding(iid))
feat = feat.div(torch.norm(feat, p=2, dim=-1, keepdim=True) + 1e-12)
feat.shape

In [None]:
layer=model.layers[0]
layer

In [None]:
mg.ndata['ft']=layer.dropout(feat)
mg

In [None]:
mg.edges_src['ft'].shape, mg.edata['w'].shape

In [None]:
import torch.nn as nn
import torch

gru = nn.GRU(input_size = 8, hidden_size = 50, num_layers = 3, batch_first = True)
inp = torch.randn(1024, 112, 8)
out, hn = gru(inp)
out.shape, hn.shape

In [None]:
torch.equal(out[:,-1],hn[-1,:,:])