A Jupyther notebook is used to rapidly iterate through the development of data ingestion and pre-processing, transformer model architecture and hyperparameter tuning.

The MB2016 Data Module processes the raw 2016 Moonboard dataset into train, validation and test datasets ready as inputs for the transformer deep learning model.

Debug MB2016 prepare_data and setup.

In [1]:
"""Sets up both local Jupyter and Google Colab notebooks in the same state."""
import os
from pathlib import Path
import shutil
import sys
import subprocess
from subprocess import PIPE, Popen

try:  # check if we're in a git repo
    repo_dir = subprocess.run(["git", "rev-parse", "--show-toplevel"], capture_output=True, check=True).stdout.decode().strip()
    repo = Path(repo_dir).name
except subprocess.CalledProcessError:
    repo = os.environ.get("MBML_REPO", "moonboard-transformer-ml")

branch = os.environ.get("MBML_BRANCH", "main")
token = os.environ.get("MBML_GHTOKEN")
prefix = token + "@" if token else ""

in_colab = "google.colab" in sys.modules

def _go():
    if in_colab: # create the repo and cd into it
        repo_root = Path("/") / "content" / repo
        os.chdir(repo_root.parent)

        shutil.rmtree(repo_root, ignore_errors=True)
        _clone_repo(repo, branch, prefix)

        os.chdir(repo_root)

        _install_dependencies_colab()

    else: # move to the repo root
        os.chdir(repo_dir)

def _clone_repo(repo, branch, prefix):
    url = f"https://{prefix}github.com/hmaguire/{repo}"
    subprocess.run(  # run git clone
        ["git", "clone", "--branch", branch, "-q", url], check=True)

def _install_dependencies_colab():
    subprocess.run( # directly pip install the prod requirements
        ["pip", "install", "--quiet", "-r", "requirements/prod.in"], check=True)

    # run a series of commands with pipes to pip install the dev requirements
    subprocess.run(
        ["sed 1d requirements/dev.in | grep -v '#' | xargs pip install --quiet"],
        shell=True, check=True)

    # reset pkg_resources list of requirements so gradio can ifner its version correctly
    import pkg_resources

    pkg_resources._initialize_master_working_set()


if "bootstrap" not in locals() or bootstrap_run:
    # path management for Python
    pythonpath, = !echo $PYTHONPATH
    if "." not in pythonpath.split(":"):
        pythonpath = ".:" + pythonpath
        %env PYTHONPATH={pythonpath}
        !echo $PYTHONPATH

    # get both Colab and local notebooks into the same state
    _go()

    bootstrap = True
    # allow "hot-reloading" of modules
    %load_ext autoreload
    %autoreload 2

    bootstrap_run = False  # change to True re-run setup

!pwd
%ls


env: PYTHONPATH=.:/Users/henry/MoonBoard-Transformer-ML
.:/Users/henry/MoonBoard-Transformer-ML
/Users/henry/MoonBoard-Transformer-ML
LICENSE.txt      [34m__pycache__[m[m/     [34mgrade_predictor[m[m/ [34mrequirements[m[m/
Makefile         [34mdata[m[m/            [34mnotebooks[m[m/       [34mtasks[m[m/
README.md        environment.yml  pyproject.toml   [34mtraining[m[m/


In [None]:
!python training/run_experiment.py --max_epochs=10 --accelerator=cpu --model_class=MB2016Transformer --data_class=MB2016 --fast_dev_run=True

Missing logger folder: training/logs/lightning_logs
Trainer already configured with model summary callbacks: [<class 'pytorch_lightning.callbacks.model_summary.ModelSummary'>]. Skipping setting a default `ModelSummary` callback.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
Running in fast_dev_run mode: will run a full train, val, test and prediction loop using 1 batch(es).
`Trainer(limit_train_batches=1)` was configured so 1 batch per epoch will be used.
`Trainer(limit_val_batches=1)` was configured so 1 batch will be used.
`Trainer(limit_test_batches=1)` was configured so 1 batch will be used.
`Trainer(limit_predict_batches=1)` was configured so 1 batch will be used.
`Trainer(val_check_interval=1.0)` was configured so validation will run at the end of the training epoch..

   | Name                      | Type               | Params
--------------------------------------