A Jupyther notebook is used to rapidly iterate through the development of data ingestion and pre-processing, transformer model architecture and hyperparameter tuning.

The MB2016 Data Module processes the raw 2016 Moonboard dataset into train, validation and test datasets ready as inputs for the transformer deep learning model.

Debug MB2016 prepare_data and setup.

In [24]:
from grade_predictor.data.mb2016 import MB2016
mb2016 = MB2016()
mb2016.prepare_data()
mb2016.setup()

In [25]:
from IPython.display import display

In [78]:
from typing import Tuple
import pytorch_lightning as pl
import torch
import logging  # import some stdlib components to control what's display
import textwrap
import traceback
from grade_predictor.lit_models.base import BaseLitModel


class LinearRegression(pl.LightningModule):

    def __init__(self):
        super().__init__()  # just like in torch.nn.Module, we need to call the parent class __init__

        # attach torch.nn.Modules as top level attributes during init, just like in a torch.nn.Module
        n, d_model = 199, 3
        nhead, nlayers = 1, 1
        self.embedding = torch.nn.Embedding(n, d_model, max_norm=True)
        encoder_layers = torch.nn.TransformerEncoderLayer(d_model, nhead)
        self.transformer_encoder = torch.nn.TransformerEncoder(encoder_layers, nlayers)
        # self.position_embedding = torch.nn.Embedding(n, d, max_norm=True)
        self.linear = torch.nn.Linear(in_features=d_model, out_features=1)
        self.flatten = torch.nn.Flatten()
        self.linear2 = torch.nn.Linear(in_features=15, out_features=1)
        # we like to define the entire model as one torch.nn.Module -- typically in a separate class


    # optionally, define a forward method
    def forward(self, xs):
        # xs = xs[0]
        # print(xs.size())
        # display(xs)
        # print(xs.type())
        xs = self.embedding(xs)
        xs = self.transformer_encoder(xs)  # we like to just call the model's forward method
        xs = self.linear(xs)
        xs = self.flatten(xs)
        xs = self.linear2(xs)
        return xs

def training_step(self: pl.LightningModule, batch: Tuple[torch.Tensor, torch.Tensor], batch_idx: int) -> torch.Tensor:
    xs, ys = batch  # unpack the batch
    xs = xs[:,0]
    ys = ys.unsqueeze(1)
    outs = self(xs)  # apply the model
    loss = torch.nn.functional.mse_loss(outs, ys)  # compute the (squared error) loss
    return loss


def configure_optimizers(self: LinearRegression) -> torch.optim.Optimizer:
    optimizer = torch.optim.Adam(self.parameters(), lr=3e-4)  # https://fsdl.me/ol-reliable-img
    return optimizer

In [79]:
from torchvision import models
from torchsummary import summary

model = LinearRegression()
summary(model, (15,), dtypes=[torch.int32])

Layer (type:depth-idx)                        Output Shape              Param #
├─Embedding: 1-1                              [-1, 15, 3]               597
├─TransformerEncoder: 1-2                     [-1, 15, 3]               --
|    └─ModuleList: 2                          []                        --
|    |    └─TransformerEncoderLayer: 3-1      [-1, 15, 3]               14,399
├─Linear: 1-3                                 [-1, 15, 1]               4
├─Flatten: 1-4                                [-1, 15]                  --
├─Linear: 1-5                                 [-1, 1]                   16
Total params: 15,016
Trainable params: 15,016
Non-trainable params: 0
Total mult-adds (M): 0.04
Input size (MB): 0.00
Forward/backward pass size (MB): 0.24
Params size (MB): 0.06
Estimated Total Size (MB): 0.29


Layer (type:depth-idx)                        Output Shape              Param #
├─Embedding: 1-1                              [-1, 15, 3]               597
├─TransformerEncoder: 1-2                     [-1, 15, 3]               --
|    └─ModuleList: 2                          []                        --
|    |    └─TransformerEncoderLayer: 3-1      [-1, 15, 3]               14,399
├─Linear: 1-3                                 [-1, 15, 1]               4
├─Flatten: 1-4                                [-1, 15]                  --
├─Linear: 1-5                                 [-1, 1]                   16
Total params: 15,016
Trainable params: 15,016
Non-trainable params: 0
Total mult-adds (M): 0.04
Input size (MB): 0.00
Forward/backward pass size (MB): 0.24
Params size (MB): 0.06
Estimated Total Size (MB): 0.29

In [80]:

# logging.getLogger("pytorch_lightning").setLevel(logging.ERROR)

LinearRegression.configure_optimizers = configure_optimizers
LinearRegression.training_step = training_step

trainer = pl.Trainer(gpus=int(torch.cuda.is_available()), max_epochs=10)
trainer.fit(model=model, train_dataloaders=mb2016.train_dataloader())

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name                | Type               | Params
-----------------------------------------------------------
0 | embedding           | Embedding          | 597   
1 | transformer_encoder | TransformerEncoder | 14.4 K
2 | linear              | Linear             | 4     
3 | flatten             | Flatten            | 0     
4 | linear2             | Linear             | 16    
-----------------------------------------------------------
15.0 K    Trainable params
0         Non-trainable params
15.0 K    Total params
0.060     Total estimated model params size (MB)


Training: 0it [00:00, ?it/s]

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x117954310>
Traceback (most recent call last):
  File "/Users/henry/anaconda3/envs/moonboard-transformer-ml/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1478, in __del__
    self._shutdown_workers()
  File "/Users/henry/anaconda3/envs/moonboard-transformer-ml/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1442, in _shutdown_workers
    w.join(timeout=_utils.MP_STATUS_CHECK_INTERVAL)
  File "/Users/henry/anaconda3/envs/moonboard-transformer-ml/lib/python3.10/multiprocessing/process.py", line 149, in join
    res = self._popen.wait(timeout)
  File "/Users/henry/anaconda3/envs/moonboard-transformer-ml/lib/python3.10/multiprocessing/popen_fork.py", line 40, in wait
    if not wait([self.sentinel], timeout):
  File "/Users/henry/anaconda3/envs/moonboard-transformer-ml/lib/python3.10/multiprocessing/connection.py", line 931, in wait
    ready = selector.select(timeout)
  File 