In [None]:
from pathlib import Path
from dummy_problems.dataloaders import LettersDataModule
from dummy_problems.models import MODEL_TYPES
import lightning as L

# Support Vector Machine

In [None]:
# Train SVM
def train_svm():
    settings =  {
        "num_classes": 26,
        "dataset_dir": Path("/home/ubuntu/data/letters_dataset"),
        "num_workers": 2,

        "model_type": "SVM",
        "model_name": "SVM",
        "stage": "train",
        "checkpoint": "/home/ubuntu/dummy-problems/weights/svm.pkl",
    }

    data = LettersDataModule(settings)
    model = MODEL_TYPES[settings['model_type']](settings)
    data.setup("train")
    model.fit(data.train_dataset)

train_svm()
# NOTE: if training with multiple dataset sizes, rename your saved checkpoint (e.g. svm_10.pkl, svm_100.pkl, svm_1000.pkl)

In [2]:
# Test SVM
def test_svm(dataset_size=10):
    settings =  {
        "num_classes": 26,
        "dataset_dir": Path("/home/ubuntu/data/letters_dataset"),
        "num_workers": 2,

        "model_type": "SVM",
        "model_name": "SVM",
        "stage": "train",
        "checkpoint": f"/home/ubuntu/dummy-problems/weights/svm_{dataset_size}.pkl",
    }

    data = LettersDataModule(settings)
    model = MODEL_TYPES[settings['model_type']](settings)

    data.setup("test")
    model.test(data.test_dataset)

In [None]:
# samples per letter = 10 (train + validate)
test_svm(dataset_size=10)

# samples per letter = 100 (train + validate)
test_svm(dataset_size=100)

# ConvNet

In [None]:
# Train ConvNet
def train_convnet():
    settings =  {
        "dataset_dir": Path("/home/ubuntu/data/letters_dataset"),
        "num_workers": 15,

        "model_type": "DL",
        "model_name": "ConvNet",
        "num_channels": 1,
        "num_classes": 26,
        "stage": "fit",
    }
    callbacks=[L.pytorch.callbacks.EarlyStopping(monitor="val_loss", mode="min")]

    data = LettersDataModule(settings)
    model = MODEL_TYPES[settings['model_type']](settings)
    trainer = L.Trainer(max_epochs=10, callbacks=callbacks, log_every_n_steps=5)
    trainer.fit(model, data)

train_convnet()

In [12]:
# Test ConvNet
def test_convnet(dataset_size=10):
    settings =  {
        "dataset_dir": Path("/home/ubuntu/data/letters_dataset"),
        "num_workers": 15,

        "model_type": "DL",
        "model_name": "ConvNet",
        "num_channels": 1,
        "num_classes": 26,
        "labels": ["A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z"],
        "stage": "test",
        "checkpoint": f"/home/ubuntu/dummy-problems/weights/convnet_{dataset_size}.ckpt",
    }

    data = LettersDataModule(settings)
    model = MODEL_TYPES[settings['model_type']].load_from_checkpoint(settings['checkpoint'], settings=settings)
    trainer = L.Trainer()
    trainer.test(model, data)

In [None]:
# samples per letter = 10 (train + validate)
test_convnet(dataset_size=10)

# samples per letter = 100 (train + validate)
test_convnet(dataset_size=100)

# samples per letter = 1000 (train + validate)
test_convnet(dataset_size=1000)

# Transformer - TinyViT

In [None]:
# Train TinyViT
def train_tinyvit():
    settings =  {
        "dataset_dir": Path("/home/ubuntu/data/letters_dataset"),
        "num_workers": 15,

        "model_type": "DL",
        "model_name": "tiny_vit_21m_224.dist_in22k_ft_in1k",
        "num_channels": 1,
        "num_classes": 26,
        "stage": "fit",
    }
    callbacks=[L.pytorch.callbacks.EarlyStopping(monitor="val_loss", mode="min")]

    data = LettersDataModule(settings)
    model = MODEL_TYPES[settings['model_type']](settings)
    trainer = L.Trainer(max_epochs=10, callbacks=callbacks, log_every_n_steps=5)
    trainer.fit(model, data)

train_tinyvit()

In [2]:
# Test TinyViT
def test_tinyvit(dataset_size=10):
    settings =  {
        "dataset_dir": Path("/home/ubuntu/data/letters_dataset"),
        "num_workers": 15,

        "model_type": "DL",
        "model_name": "tiny_vit_21m_224.dist_in22k_ft_in1k",
        "num_channels": 1,
        "num_classes": 26,
        "labels": ["A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z"],
        "stage": "test",
        "checkpoint": f"/home/ubuntu/dummy-problems/weights/tinyvit_{dataset_size}.ckpt",
    }

    data = LettersDataModule(settings)
    model = MODEL_TYPES[settings['model_type']].load_from_checkpoint(settings['checkpoint'], settings=settings)
    trainer = L.Trainer()
    trainer.test(model, data)

In [None]:
# samples per letter = 10 (train + validate)
test_tinyvit(dataset_size=10)

# samples per letter = 100 (train + validate)
test_tinyvit(dataset_size=100)

# samples per letter = 1000 (train + validate)
test_tinyvit(dataset_size=1000)