The following code sets up the environment used by FSDL for the labs used in their 2022 deep learning course.

In [1]:
"""Sets up both local Jupyter and Google Colab notebooks in the same state."""
import os
from pathlib import Path
import shutil
import sys
import subprocess
from subprocess import PIPE, Popen

try:  # check if we're in a git repo
    repo_dir = subprocess.run(["git", "rev-parse", "--show-toplevel"], capture_output=True, check=True).stdout.decode().strip()
    repo = Path(repo_dir).name
except subprocess.CalledProcessError:
    repo = os.environ.get("MBML_REPO", "moonboard-transformer-ml")

branch = os.environ.get("MBML_BRANCH", "dev")
token = os.environ.get("MBML_GHTOKEN")
prefix = token + "@" if token else ""

in_colab = "google.colab" in sys.modules

def _go():
    if in_colab: # create the repo and cd into it
        repo_root = Path("/") / "content" / repo
        os.chdir(repo_root.parent)

        shutil.rmtree(repo_root, ignore_errors=True)
        _clone_repo(repo, branch, prefix)

        os.chdir(repo_root)

        _install_dependencies_colab()

    else: # move to the repo root
        os.chdir(repo_dir)

def _clone_repo(repo, branch, prefix):
    url = f"https://{prefix}github.com/hmaguire/{repo}"
    subprocess.run(  # run git clone
        ["git", "clone", "--branch", branch, "-q", url], check=True)

def _install_dependencies_colab():
    subprocess.run( # directly pip install the prod requirements
        ["pip", "install", "--quiet", "-r", "requirements/prod.in"], check=True)

    # run a series of commands with pipes to pip install the dev requirements
    subprocess.run(
        ["sed 1d requirements/dev.in | grep -v '#' | xargs pip install --quiet"],
        shell=True, check=True)

    # reset pkg_resources list of requirements so gradio can ifner its version correctly
    import pkg_resources

    pkg_resources._initialize_master_working_set()


bootstrap_run = True
if "bootstrap" not in locals() or bootstrap_run:
    # path management for Python
    pythonpath, = !echo $PYTHONPATH
    if "." not in pythonpath.split(":"):
        pythonpath = ".:" + pythonpath
        %env PYTHONPATH={pythonpath}
        !echo $PYTHONPATH

    # get both Colab and local notebooks into the same state
    _go()

    bootstrap = True
    # allow "hot-reloading" of modules
    %load_ext autoreload
    %autoreload 2

    bootstrap_run = False  # change to True re-run setup

!pwd
%ls

env: PYTHONPATH=.:/Users/henry/MoonBoard-Transformer-ML
.:/Users/henry/MoonBoard-Transformer-ML
/Users/henry/MoonBoard-Transformer-ML
LICENSE.txt      [34m__pycache__[m[m/     [34mgrade_predictor[m[m/ [34mrequirements[m[m/
Makefile         [34mdata[m[m/            [34mnotebooks[m[m/       [34mtasks[m[m/
README.md        environment.yml  pyproject.toml   [34mtraining[m[m/


Download compilation of 2016 moonboard problems data from github project.

In [2]:
from IPython.display import display, HTML, IFrame

full_width = True
frame_height = 720  # adjust for your screen

if full_width:  # if we want the notebook to take up the whole width
    # add styling to the notebook's HTML directly
    display(HTML("<style>.container { width:100% !important; }</style>"))
    display(HTML("<style>.output_result { max-width:100% !important; }</style>"))

Create a pandas DataFrame from pickle file.

In [None]:
%%time
import torch


gpus = int(torch.cuda.is_available())

%run training/run_experiment.py --model_class MB2016Transformer --data_class MB2016 \
  --batch_size 32 --gpus {gpus} --max_epochs 2 \
  --limit_train_batches 0.1 --limit_val_batches 0.1 --limit_test_batches 0.1 --log_every_n_steps 10 --fast_dev_run=True

In [9]:
%%time
PYTORCH_ENABLE_MPS_FALLBACK=1

%run training/run_experiment.py --model_class MB2016Transformer --lr 0.05 --data_class MB2016 \
  --batch_size 32 --max_epochs 2 \
  --limit_train_batches 0.1 --limit_val_batches 0.1 --limit_test_batches 0.1 --log_every_n_steps 10 --fast_dev_run=True --accelerator="mps"

usage: run_experiment.py [--logger [LOGGER]]
                         [--checkpoint_callback [CHECKPOINT_CALLBACK]]
                         [--enable_checkpointing [ENABLE_CHECKPOINTING]]
                         [--default_root_dir DEFAULT_ROOT_DIR]
                         [--gradient_clip_val GRADIENT_CLIP_VAL]
                         [--gradient_clip_algorithm GRADIENT_CLIP_ALGORITHM]
                         [--process_position PROCESS_POSITION]
                         [--num_nodes NUM_NODES]
                         [--num_processes NUM_PROCESSES] [--devices DEVICES]
                         [--gpus GPUS] [--auto_select_gpus [AUTO_SELECT_GPUS]]
                         [--tpu_cores TPU_CORES] [--ipus IPUS]
                         [--log_gpu_memory LOG_GPU_MEMORY]
                         [--progress_bar_refresh_rate PROGRESS_BAR_REFRESH_RATE]
                         [--enable_progress_bar [ENABLE_PROGRESS_BAR]]
                         [--overfit_batches OVERFIT_BATCHES]
   

SystemExit: 2

CPU times: user 4.01 ms, sys: 5.72 ms, total: 9.73 ms
Wall time: 16.4 ms


In [None]:
# from pathlib import Path
#
# DATA_DIRNAME = Path(__file__).resolve().parents[3] / "data"
# DOWNLOADED_DATA_DIRNAME = DATA_DIRNAME / "downloaded"


In [None]:

from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import StratifiedShuffleSplit
from torch.utils.data import random_split
import itertools
from itertools import permutations

MINIMUM_REPEATS = 2
MINIMUM_GRADE_COUNT = 50
MAX_START_HOLDS = 2
MAX_MID_HOLDS = 11
MIN_MID_HOLDS = 2
MAX_END_HOLDS = 2

MAX_SEQUENCE = MAX_START_HOLDS + MAX_MID_HOLDS + MAX_END_HOLDS

#remove climbs with low or no amounts of repeats
cleaned_data = data[data['repeats'] >= MINIMUM_REPEATS]

#remove grades that include below a minimum amount of climbs
grade_count = cleaned_data['grade'].value_counts()
grades = [key for key, value in grade_count.items() if value>= MINIMUM_GRADE_COUNT]
cleaned_data = cleaned_data[cleaned_data['grade'].isin(grades)]


# remove climbs with extremely low or high number of mid holds used
cleaned_data = cleaned_data[cleaned_data.mid.map((len)) <= MAX_MID_HOLDS]
cleaned_data = cleaned_data[cleaned_data.mid.map((len)) >= MIN_MID_HOLDS]
#remove climbs with more than 2 start holds
cleaned_data = cleaned_data[cleaned_data.start.map((len)) <= MAX_START_HOLDS]
cleaned_data = cleaned_data[cleaned_data.end.map((len)) <= MAX_END_HOLDS]

# Create token dictionary
rows = range(0,11)
columns = range(0,18)
positions = itertools.product(rows, columns)
token_dict = {}
for i,k in enumerate(positions, start=1):
  token_dict[k] = i

def df_row_to_input(row, max_sequence):
  input = torch.zeros((4, max_sequence), dtype=torch.int)
  i = 0
  for position_column, position_index in [(row['start'], 1), (row['mid'], 2), (row['end'], 3)]:
    for item in position_column:
      input[0][i] = token_dict[tuple(item)]
      input[1][i] = position_index
      input[2][i] = tuple(item)[0]
      input[3][i] = tuple(item)[1]
      i +=1
  return input

def df_to_input_array(df):
  input_array = torch.zeros(df.shape[0], 4, MAX_SEQUENCE)

  input_array = df.apply(lambda x: df_row_to_input(x,MAX_SEQUENCE), axis=1)
  return input_array

def positional_2d_space_encoding():
  return

def positional_1d_encoding():
  return


# Grade stratified test and train split
# split = StratifiedShuffleSplit(n_splits=1, test_size = 0.2, random_state=42)
# for train_index, test_index in split.split(cleaned_data,cleaned_data['grade']):
#   strat_train_set = cleaned_data.iloc[train_index]
#   strat_test_set = cleaned_data.iloc[test_index]



In [None]:
# X Inputs:
X = df_to_input_array(cleaned_data.loc[:,['start', 'mid', 'end']])

In [None]:
# Y labels
lb = LabelBinarizer()
Y = data.loc[:,'grade']
Y = torch.FloatTensor(lb.fit_transform(Y))

In [None]:
# pip install einops
# pip install positional-encodings[pytorch]

In [None]:
import torch
import torch.nn as nn
from einops import rearrange

# borrowed from lucidrains
#https://github.com/lucidrains/bottleneck-transformer-pytorch/blob/main/bottleneck_transformer_pytorch/bottleneck_transformer_pytorch.py#L21
def relative_to_absolute(q):
    """
    Converts the dimension that is specified from the axis
    from relative distances (with length 2*tokens-1) to absolute distance (length tokens)
      Input: [bs, heads, length, 2*length - 1]
      Output: [bs, heads, length, length]
    """
    b, h, l, _, device, dtype = *q.shape, q.device, q.dtype
    dd = {'device': device, 'dtype': dtype}
    col_pad = torch.zeros((b, h, l, 1), **dd)
    x = torch.cat((q, col_pad), dim=3)  # zero pad 2l-1 to 2l
    flat_x = rearrange(x, 'b h l c -> b h (l c)')
    flat_pad = torch.zeros((b, h, l - 1), **dd)
    flat_x_padded = torch.cat((flat_x, flat_pad), dim=2)
    final_x = flat_x_padded.reshape(b, h, l + 1, 2 * l - 1)
    final_x = final_x[:, :, :l, (l - 1):]
    return final_x


def rel_pos_emb_1d(q, rel_emb, shared_heads):
   """
   Same functionality as RelPosEmb1D

   Args:
       q: a 4d tensor of shape [batch, heads, tokens, dim]
       rel_emb: a 2D or 3D tensor
       of shape [ 2*tokens-1 , dim] or [ heads, 2*tokens-1 , dim]
   """
   if shared_heads:
       emb = torch.einsum('b h t d, r d -> b h t r', q, rel_emb)
   else:
       emb = torch.einsum('b h t d, h r d -> b h t r', q, rel_emb)
   return relative_to_absolute(emb)

class RelPosEmb1D(nn.Module):
    def __init__(self, feat_map_size, dim_head, heads=None):
      """
      Outputs
      Inputs
      """

      super().__init__()
      self.h, self.w = (18,11)  # height, width of Moonboard hold map
      self.total_tokens = self.h * self.w
      self.shared_heads = heads if heads is not None else True

      self.emb_w = rel_pos_emb_1d(self.h, dim_head, heads)
      self.emb_h = rel_pos_emb_1d(self.w, dim_head, heads)

      scale = dim_head ** -0.5
      self.shared_heads = heads if heads is not None else True
      if self.shared_heads:
        self.rel_pos_emb = nn.Parameter(torch.randn(2 * tokens - 1, dim_head) * scale)
      else:
        self.rel_pos_emb = nn.Parameter(torch.randn(heads, 2 * tokens - 1, dim_head) * scale)

    def forward(self, q, position_info):
      return rel_pos_emb_1d(q, self.rel_pos_emb, self.shared_heads)

Data imput
X: Token id list [0,543,432,1,23,24,56,676,3,4,4,4,4]
X 2D position
X 1D position

[S, [4, 7], [1, 10],SM,[4, 12], [6, 12], ME, [6, 16], E, P,P,P]
-> token id list [0,45,54,1,63,74,2,82,3,4,4,4] [tokens, dim]
-> 1D [0,1,1,2,3,3,4,5,6,6,6] >
[tokens, dim]
pos_emb1D = torch.nn.Parameter(torch.randn(max_seq_tokens, dim))

-> 2D -> yD and xD combined -> [tokens, dim] with exception for S,SM,ME,E,P.
x > 11 horizontal > -10 to 10
y > 17 vertical > - 16 to 16
    
Y: Label binarize [0,0,0,1,0,0,0]



# initialization
pos_emb1D = torch.nn.Parameter(torch.randn(max_seq_tokens, dim))
# during forward pass
input_to_transformer_mhsa = input_embedding + pos_emb1D[:current_seq_tokens, :]


encoding makes this easier

[4,7] > token id > token embedding of size dim

batchsize, x, channel
50 tokens, 1d positions, embeddding size
batchsize, x,y, channel


input:
single data example for d/model size 4
max sequence length: 4
one word: [0.1,0.5,0.9,0.1,0.2] size 5 token
[0] series position -> [0.1,0.4,0.7,0.8,0.9] sin encoding for 0
[7,8] x,y abs position -> [0.1,0.4,0.7,0.8,0.9] encoding in 2d space for [7,8]


embed:
positon
x
y


3d positional encoding dimensions:
batchsize, tokens, max_sequence, max_width, max_height, channels


reduce to appropirate



In [None]:
import argparse
import pickle
import pandas as pd
import numpy as np
import torch
from sklearn.preprocessing import LabelBinarizer
from torch.utils.data import random_split
import itertools
from itertools import permutations

from text_recognizer.data.base_data_module import BaseDataModule, load_and_print_info

from sklearn.preprocessing import LabelBinarizer

# DIMS = (1, 28, 28)
# OUTPUT_DIMS = (1,)
# MAPPING = list(range(10))
# TRAIN_SIZE = 55000
# VAL_SIZE = 5000


""" Metada """

# DL_DATA_DIRNAME = "/content/drive/MyDrive/Datasets/"
# EXTRACTED_DATASET_DIRNAME = DL_DATA_DIRNAME / "moonGen_scrape_2016_final.pkl"

MINIMUM_REPEATS = 2
MINIMUM_GRADE_COUNT = 50
MAX_START_HOLDS = 2
MAX_MID_HOLDS = 11
MIN_MID_HOLDS = 2
MAX_END_HOLDS = 2

class MB2016(BaseDataModule):
    """MB2016 DataModule."""

    def __init__(self, args=None) -> None:
        super().__init__(args)
        # self.data_dir = metadata.DOWNLOADED_DATA_DIRNAME
        # self.transform = MNISTStem()
        # self.input_dims = metadata.DIMS
        # self.output_dims = OUTPUT_DIMS
        # self.mapping = metadata.MAPPING

    def prepare_data(self) -> None:

        #extract dataset to pandas dataframe
        pkl_file = open('/content/drive/MyDrive/Datasets/moonGen_scrape_2016_final.pkl', 'rb')
        pickle_data = pickle.load(pkl_file)
        pkl_file.close()
        data = pd.DataFrame.from_dict(pickle_data).T

        #remove climbs with low or no amounts of repeats
        cleaned_data = data[data['repeats'] >= MINIMUM_REPEATS]

        #remove climbs with grades that contain less than a minimum number of climbs
        grade_count = cleaned_data['grade'].value_counts()
        grades = [key for key, value in grade_count.items() if value>= MINIMUM_GRADE_COUNT]
        cleaned_data = cleaned_data[cleaned_data['grade'].isin(grades)]

        # max length >>> TODO Clean remove climbs with crazy number of holds
        # add <S> <SM> <ME> <P> <P> <P> for x input
        # mapping for [1,2] etc required > transform to tensor wigth mapped values
        # for

    def setup(self) -> None:
        # Cross validation TODO

        # Grade stratified test and train split
        split = StratifiedShuffleSplit(n_splits=1, test_size = 0.2, random_state=42)
        for train_index, test_index in split.split(cleaned_data,cleaned_data['grade']):
          strat_train_set = cleaned_data.iloc[train_index]
          strat_test_set = cleaned_data.iloc[test_index]

        """
        pipeline for X input
        - weights
        - list of lists > list of tokens (use tuples?)
        - token has an embedding mapped to it


        """


        # Token dictinary
        # drop repeats with only 1 repeat, and v12 +


        X = data.loc[:,['start', 'mid', 'end']]


        X['start'] = X['start'].map(lambda x: [ str(x1) for x1 in x])

        # X['start'] = X['start'].map(lambda x: np.asarray(x))
        # X['mid'] = X['mid'].map(lambda x: np.asarray(x))
        # X['end'] = X['end'].map(lambda x: np.asarray(x))

        display(X.start[0])
        display(X)

        X.info(verbose=True)


        X['mid'] = X['mid'].apply(lambda x: np.ravel(x))
        X['end'] = X['end'].apply(lambda x: np.ravel(x))

        Y = data.loc[:,'grade']
        lb = LabelBinarizer()
        Y = torch.Tensor(lb.fit_transform(Y))


data_holds = MB2016()

data_holds.prepare_data()

In [None]:
import pytorch_lightning as pl
from torch import nn

class GradePredictorModel(nn.Module):
  def __init__(self) -> None:
    super().__init__()
    self.input_dims = 5#?
    self.num_classes = 7#?
    self.dim = 2 #?


    self.embedding = nn.Embedding(self.num_classes, self.dim)
    self.enc_pos_encoder = Positional2DEncoding()
    self.transformer_decoder = nn.TransformerEncoder(
            nn.TransformerEncoderLayer(d_model=self.dim, nhead=tf_nhead, dim_feedforward=tf_fc_dim, dropout=tf_dropout),
            num_layers=tf_layers,
        )


class GradePredictor(pl.LightningModule):

    def __init__(self):
        super().__init__()  # just like in torch.nn.Module, we need to call the parent class __init__

        # attach torch.nn.Modules as top level attributes during init, just like in a torch.nn.Module
        self.model = torch.nn.Linear(in_features=1, out_features=1)
        # we like to define the entire model as one torch.nn.Module -- typically in a separate class

    # optionally, define a forward method
    def forward(self, xs):
        return self.model(xs)  # we like to just call the model's forward method