In [1]:
### Load preprocessed data

In [2]:
# conda  uninstall tensorboard; pip uninstall -y tensorboard; conda install tensorboard; conda install pytorch-lightning -c conda-forge

In [3]:
#!python ../src/download_ml20.py

In [2]:
import numpy as np
from sklearn.model_selection import train_test_split
fh = np.load('data/dataset_ml20_wide.npz')
# We have a bunch of feature columns and last column is the y-target
max_seq_len = 768 + 1
train_items = fh['train_items'].astype(np.int64)[:, :max_seq_len]
# Note that ratings are modified are on scale (1, 2, ... 10) 
train_ratng = fh['train_ratng'].astype(np.int64)[:, :max_seq_len]
test_items = fh['test_items'].astype(np.int64)[:, :max_seq_len]
test_ratng = fh['test_ratng'].astype(np.int64)[:, :max_seq_len]

n_user = train_items.shape[0]
n_rank = train_items.shape[1]
n_item = int(train_items.max() + 1)
n_resp = int(train_ratng.max() + 1)

train_items, val_items, train_ratng, val_ratng = train_test_split(train_items, train_ratng)

In [3]:
from torch import from_numpy
from torch.utils.data import DataLoader
from torch.utils.data import TensorDataset
from torch.utils.data import BatchSampler
from torch.utils.data import SequentialSampler

def dataloader(*arrs, batch_size=64):
    dataset = TensorDataset(*arrs)
    arr_size = len(arrs[0])
    bs = BatchSampler(SequentialSampler(range(arr_size)),
                      batch_size=batch_size, drop_last=False)
    return DataLoader(dataset, batch_sampler=bs, shuffle=False)
 
train = dataloader(from_numpy(train_items), from_numpy(train_ratng))
val = dataloader(from_numpy(val_items), from_numpy(val_ratng))
test = dataloader(from_numpy(test_items), from_numpy(test_ratng))

#### Data Structure

Notice that the inputs are now 2D. Each row in `train_items` represents is a 1D stream of items seen by a single user. Different rows will be from different user streams. Note that each stream is padded with zeros so it is a fixed input size. `train_ratng` is a similar structure, but gives the categorical rating output (scaled from [0.0, 0.5, ... 4.5, 5.0] to [0, 1,2, ...10]) that that user gave that item.

In [6]:
train_items

array([[ 150,  296,  380, ...,    0,    0,    0],
       [6377, 2470, 4896, ...,    0,    0,    0],
       [   3, 1198, 2779, ...,    0,    0,    0],
       ...,
       [ 592,  296,  380, ...,    0,    0,    0],
       [ 158,   22, 2502, ...,    0,    0,    0],
       [ 150,  296,  592, ...,    0,    0,    0]])

In [7]:
train_ratng

array([[ 6, 10,  6, ...,  0,  0,  0],
       [ 8,  6,  8, ...,  0,  0,  0],
       [ 4, 10,  8, ...,  0,  0,  0],
       ...,
       [ 6,  8, 10, ...,  0,  0,  0],
       [ 6,  6,  8, ...,  0,  0,  0],
       [ 6,  8,  6, ...,  0,  0,  0]])

In [8]:
np.unique(train_ratng[train_items > 0])

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [9]:
np.unique(train_items)

array([     0,      2,      3, ..., 131158, 131237, 131262])

In [10]:
train_items.shape, test_items.shape

((93483, 769), (13849, 769))

In [10]:
!pip install -q reformer_pytorch

You should consider upgrading via the '/opt/conda/bin/python -m pip install --upgrade pip' command.[0m


In [11]:
import random
import torch
import numpy as np
import pandas as pd
from torch import nn
from torch import from_numpy
import pytorch_lightning as pl
from torch.nn import functional as F
from reformer_pytorch import Reformer
from pytorch_lightning.callbacks import ModelCheckpoint
from pytorch_lightning.loggers import TensorBoardLogger

In [12]:
from abstract_model import AbstractModel

In [13]:
class AR(AbstractModel):
    def __init__(self, n_item, n_dim, n_resp, n_rank, 
                 heads=2, depth=2, batch_size=32, weight_decay=1e-6):
        super().__init__()
        self.n_dim = n_dim
        self.n_item = n_item
        self.n_resp = n_resp
        self.save_hyperparameters()
        
        # This means that item=0 will always yield the zero vector
        self.item = nn.Embedding(n_item, n_dim, padding_idx=0)
        self.resp = nn.Embedding(n_resp, n_dim)
        self.reformer = Reformer(dim=n_dim, depth=depth, heads=heads, causal=True, max_seq_len=n_rank)
        self.user_lin = nn.Linear(n_dim, n_dim * n_resp)
        self.item_lin = nn.Linear(n_dim, n_dim * n_resp)
        self.batch_size = batch_size
        self.weight_decay = weight_decay
    
    def forward(self, items, ratng):
        item_vec = self.item(items)
        resp_vec = self.resp(ratng)
        intx_vec = item_vec * resp_vec
        mask = items != 0
        user_vec = self.reformer(intx_vec, input_mask=mask)
        return user_vec
    
    def loss(self, user_vec, items, ratg):
        batchsize, window, n_dim = user_vec.shape
        item_vec = self.item(items)
        # Linearly map one user & item vector to one user &  item vector
        # per possible response type
        user_raw_resp = self.user_lin(user_vec).reshape((batchsize, window, n_dim, self.n_resp))
        item_raw_resp = self.item_lin(item_vec).reshape((batchsize, window, n_dim, self.n_resp))
        # remove first user element and call it the  bias, the  rest is the user vector
        # same for items
        user_bas_resp, user_vec_resp = user_raw_resp[:, :, 0, :], user_raw_resp[:, :, 1:, :]
        item_bas_resp, item_vec_resp = item_raw_resp[:, :, 0, :], item_raw_resp[:, :, 1:, :]
        # Sum interactions  across n_dim back down to  (batchsize, window, n_resp)
        scor = user_bas_resp + item_bas_resp + (user_vec_resp * item_vec_resp).sum(dim=2)
        scor_flat = scor.reshape((batchsize * window, self.n_resp))
        ratg_flat = ratg.reshape((batchsize * window))
        loss = F.cross_entropy(scor_flat, ratg_flat, ignore_index=0, reduction='mean')
        return loss, {"cross_entropy": loss}
        
    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=1e-4, 
                                weight_decay=self.weight_decay)

    def step(self, batch, batch_nb, prefix='train', add_reg=True):
        items, ratng = batch
        # Pass in leading arrays, missing the last element
        # (hence the [:-1])
        # for every user that's to  be predicted
        user_vec = self.forward(items[:,  :-1], ratng[:, :-1])
        # Given previous tokens, predict the next interaction
        loss, log = self.loss(user_vec, items[:, 1:],  ratng[:, 1:])
        log[f'{prefix}_loss'] = loss
        return {f'{prefix}_loss': loss, 'loss':loss, 'log': log}
        
    def reg(self):
        # Regularize via weight decay instead of explicitly
        return 0.0, {}

In [14]:
from pytorch_lightning.logging import WandbLogger

n_dim = 32
model = AR(n_item, n_dim, n_resp, n_rank, 
           heads=8, depth=6, batch_size=64)
model.save_data(train_items, train_ratng, test_items, test_ratng)
logger = WandbLogger(name="10_mf", project="simple_mf")

trainer = pl.Trainer(max_epochs=100, logger=logger,
                     early_stop_callback=True,
                     gpus=1, progress_bar_refresh_rate=1) 

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
CUDA_VISIBLE_DEVICES: [0]


In [None]:
trainer.fit(model)

In [None]:
trainer.test(model)

In [None]:
results = trainer.test(model)
results['avg_test_loss']