In [1]:
### Load preprocessed data

In [None]:
#!python ../src/download_ml20.py

In [19]:
import numpy as np
fh = np.load('data/dataset_ml20_wide.npz')
# We have a bunch of feature columns and last column is the y-target
max_seq_len = 768 + 1
train_items = fh['train_items'].astype(np.int64)[:, :max_seq_len]
train_ratng = fh['train_ratng'].astype(np.int64)[:, :max_seq_len]
test_items = fh['test_items'].astype(np.int64)[:, :max_seq_len]
test_ratng = fh['test_ratng'].astype(np.int64)[:, :max_seq_len]

n_user = train_items.shape[0]
n_rank = train_items.shape[1]
n_item = int(train_items.max() + 1)
n_resp = int(train_ratng.max() + 1)

In [40]:
np.unique(train_ratng[train_items > 0])

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [20]:
np.unique(train_items)

array([     0,      2,      3, ..., 131170, 131237, 131262])

In [21]:
!pip install -q reformer_pytorch



In [22]:
import random
import torch
import numpy as np
import pandas as pd
from torch import nn
from torch import from_numpy
import pytorch_lightning as pl
from torch.nn import functional as F
from reformer_pytorch import Reformer
from pytorch_lightning.callbacks import ModelCheckpoint
from pytorch_lightning.loggers import TensorBoardLogger

In [23]:
from abstract_model import AbstractModel

In [41]:
class AR(AbstractModel):
    def __init__(self, n_item, n_dim, n_resp, n_rank, 
                 train_x, train_y, test_x, test_y,
                 heads=2, depth=2, batch_size=32):
        super().__init__()
        self.n_dim = n_dim
        self.n_item = n_item
        self.n_resp = n_resp
        # This means that item=0 will always yield the zero vector
        self.item = nn.Embedding(n_item, n_dim, padding_idx=0)
        self.resp = nn.Embedding(n_resp, n_dim)
        self.reformer = Reformer(dim=n_dim, depth=depth, heads=heads, causal=True, max_seq_len=n_rank)
        self.user_lin = nn.Linear(n_dim, n_dim * n_resp)
        self.item_lin = nn.Linear(n_dim, n_dim * n_resp)
        self.save_data(train_x, train_y, test_x, test_y)
        self.batch_size = batch_size
    
    def forward(self, items, ratng):
        item_vec = self.item(items)
        resp_vec = self.resp(ratng)
        intx_vec = item_vec * resp_vec
        mask = items != -1
        user_vec = self.reformer(intx_vec, input_mask=mask)
        return user_vec
    
    def likelihood(self, user_vec, items, ratg):
        batchsize, window, n_dim = user_vec.shape
        item_vec = self.item(items)
        # Linearly map one user & item vector to one user &  item vector
        # per possible response type
        user_vec_resp = self.user_lin(user_vec).reshape((batchsize, window, n_dim, self.n_resp))
        item_vec_resp = self.item_lin(user_vec).reshape((batchsize, window, n_dim, self.n_resp))
        # Sum interactions  across n_dim back down to  (batchsize, window, n_resp)
        intx = (user_vec_resp * item_vec_resp).sum(dim=2)
        intx_flat = intx.reshape((batchsize * window, self.n_resp))
        ratg_flat = ratg.reshape((batchsize * window))
        loss = F.cross_entropy(intx_flat, ratg_flat, ignore_index=0, reduction='mean')
        return loss
        
    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=1e-4, weight_decay=0.01)
    
    def training_step(self, batch, batch_nb):
        (items,), ratng = batch
        # Pass in leading arrays, missing the last element
        # for every user that's to  be predicted
        user_vec = self.forward(items[:,  :-1], ratng[:, :-1])
        # Given previous token, predict the next interaction
        loss = self.likelihood(user_vec, items[:, 1:],  ratng[:, 1:])
        tensorboard_logs = {'train_loss': loss}
        return {'loss': loss, 'log': tensorboard_logs}

    def test_step(self, batch, batch_nb):
        dat = self.training_step(batch, batch_nb)
        dat['test_loss'] = dat.pop('loss')
        return dat

    
n_dim = 16
model = AR(n_item, n_dim, n_resp, n_rank, train_items, train_ratng, test_items, test_ratng)

# add a logger
logger = TensorBoardLogger("tb_logs", name="ar_model")

# We could have turned on multiple GPUs here, for example
# trainer = pl.Trainer(gpus=8, precision=16)    
trainer = pl.Trainer(max_epochs=5,
                     reload_dataloaders_every_epoch=True,
                     logger=logger)    

GPU available: False, used: False
TPU available: False, using: 0 TPU cores


### Train model

In [42]:
trainer.test(model)

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Testing', layout=Layout(flex='2'), max=…

KeyboardInterrupt: 

In [38]:
debug

> [0;32m/opt/conda/lib/python3.6/site-packages/torch/nn/functional.py[0m(1838)[0;36mnll_loss[0;34m()[0m
[0;32m   1836 [0;31m                         .format(input.size(0), target.size(0)))
[0m[0;32m   1837 [0;31m    [0;32mif[0m [0mdim[0m [0;34m==[0m [0;36m2[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m-> 1838 [0;31m        [0mret[0m [0;34m=[0m [0mtorch[0m[0;34m.[0m[0m_C[0m[0;34m.[0m[0m_nn[0m[0;34m.[0m[0mnll_loss[0m[0;34m([0m[0minput[0m[0;34m,[0m [0mtarget[0m[0;34m,[0m [0mweight[0m[0;34m,[0m [0m_Reduction[0m[0;34m.[0m[0mget_enum[0m[0;34m([0m[0mreduction[0m[0;34m)[0m[0;34m,[0m [0mignore_index[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m   1839 [0;31m    [0;32melif[0m [0mdim[0m [0;34m==[0m [0;36m4[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m   1840 [0;31m        [0mret[0m [0;34m=[0m [0mtorch[0m[0;34m.[0m[0m_C[0m[0;34m.[0m[0m_nn[0m[0;34m.[0m[0mnll_loss2d[0m[0;34m([0m[0mi

ipdb>  u


> [0;32m/opt/conda/lib/python3.6/site-packages/torch/nn/functional.py[0m(2021)[0;36mcross_entropy[0;34m()[0m
[0;32m   2019 [0;31m    [0;32mif[0m [0msize_average[0m [0;32mis[0m [0;32mnot[0m [0;32mNone[0m [0;32mor[0m [0mreduce[0m [0;32mis[0m [0;32mnot[0m [0;32mNone[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m   2020 [0;31m        [0mreduction[0m [0;34m=[0m [0m_Reduction[0m[0;34m.[0m[0mlegacy_get_string[0m[0;34m([0m[0msize_average[0m[0;34m,[0m [0mreduce[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m-> 2021 [0;31m    [0;32mreturn[0m [0mnll_loss[0m[0;34m([0m[0mlog_softmax[0m[0;34m([0m[0minput[0m[0;34m,[0m [0;36m1[0m[0;34m)[0m[0;34m,[0m [0mtarget[0m[0;34m,[0m [0mweight[0m[0;34m,[0m [0;32mNone[0m[0;34m,[0m [0mignore_index[0m[0;34m,[0m [0;32mNone[0m[0;34m,[0m [0mreduction[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m   2022 [0;31m[0;34m[0m[0m
[0m[0;32m   2023 [0;31m[0;34m[0m[

ipdb>  


> [0;32m<ipython-input-36-be74371ee154>[0m(39)[0;36mlikelihood[0;34m()[0m
[0;32m     37 [0;31m        [0mweig_flat[0m [0;34m=[0m [0mweig[0m[0;34m.[0m[0mreshape[0m[0;34m([0m[0;34m([0m[0mbatchsize[0m [0;34m*[0m [0mwindow[0m[0;34m)[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     38 [0;31m        [0mratg_flat[0m [0;34m=[0m [0mratg[0m[0;34m.[0m[0mreshape[0m[0;34m([0m[0;34m([0m[0mbatchsize[0m [0;34m*[0m [0mwindow[0m[0;34m)[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m---> 39 [0;31m        [0mloss[0m [0;34m=[0m [0mF[0m[0;34m.[0m[0mcross_entropy[0m[0;34m([0m[0mintx_flat[0m[0;34m,[0m [0mratg_flat[0m[0;34m,[0m [0mweight[0m[0;34m=[0m[0mweig_flat[0m[0;34m,[0m [0mreduction[0m[0;34m=[0m[0;34m'mean'[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     40 [0;31m        [0;32mreturn[0m [0mloss[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     41 [0;31m[0;34m[0m[0m
[0m


ipdb>  p intx_flat.shape


torch.Size([24576, 11])


ipdb>  p ratg.shape


torch.Size([32, 768])


ipdb>  p  ratg_flat.shape


torch.Size([24576])


ipdb>  p  weig_flat.shape


torch.Size([24576])


ipdb>  p weig_flat


tensor([1., 1., 1.,  ..., 1., 1., 1.], dtype=torch.float64)


ipdb>  p intx_flat


tensor([[-0.3003, -0.1107,  0.0469,  ..., -0.0896,  0.0403,  0.1123],
        [-0.3001, -0.1108,  0.0468,  ..., -0.0895,  0.0403,  0.1123],
        [-0.3001, -0.1108,  0.0468,  ..., -0.0895,  0.0403,  0.1123],
        ...,
        [-0.3001, -0.1108,  0.0468,  ..., -0.0895,  0.0403,  0.1123],
        [-0.3001, -0.1108,  0.0468,  ..., -0.0895,  0.0403,  0.1123],
        [-0.3001, -0.1108,  0.0468,  ..., -0.0895,  0.0403,  0.1123]])


ipdb>  p F.cross_entropy(intx_flat, ratg_flat, weight=weig_flat, reduction='mean')


*** RuntimeError: weight tensor should be defined either for all 11 classes or no classes but got weight tensor of shape: [24576]


ipdb>  p F.cross_entropy(intx_flat, ratg_flat, weight=weig_flat[:, None], reduction='mean')


*** RuntimeError: weight tensor should be defined either for all 11 classes or no classes but got weight tensor of shape: [24576, 1]


ipdb>  p  ratg_flat


tensor([0, 0, 0,  ..., 0, 0, 0])


ipdb>  p items


tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0]])


ipdb>  q
