In [None]:
!git clone https://github.com/bwa2116/forexperiment.git

In [None]:
cd /content/forexperiment

In [None]:
from ViT.model import ViTForClassfication

import torch
from torch import nn, optim

import os

In [None]:
import matplotlib.pyplot as plt

In [None]:
from ViT.config import data_config
from datasets.loader import load_data
from trainer import Trainer

In [None]:
torch.cuda.empty_cache()
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'


device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [None]:
for data in ['MNIST', 'Places365', 'CIFAR10', 'ImageNet200']:
    config = data_config[data]

    # These are not hard constraints, but are used to prevent misconfigurations
    assert config["hidden_size"] % config["num_attention_heads"] == 0
    assert config["intermediate_size"] == 4 * config["hidden_size"]
    assert config["image_size"] % config["patch_size"] == 0

    img_size = (config["image_size"], config["image_size"])
    batch_size = 32

    trainloader, testloader, _ = load_data(
        name=config["name"], img_size=img_size, batch_size=batch_size
    )

    epochs = 20
    lr = 1e-4
    save_model_every = 0

    save_model_every_n_epochs = save_model_every

    loss_fn = nn.CrossEntropyLoss()

    for randomfeatures in [False,True]:
        if randomfeatures:
            attention_type = "Performer-Softmax"
            m_range = [16, 32, 64, 128]

            for m in m_range:
                exp_name = (
                    data + "_" + attention_type + "_with" + "_" + str(m)
                    + "_" + "random features"
                )

                print(f"This is experiment {exp_name}")

                model = ViTForClassfication(
                    config,
                    randomfeatures=randomfeatures, m=m
                )

                optimizer = optim.AdamW(model.parameters(),
                                        lr=lr, weight_decay=1e-2)

                trainer = Trainer(
                    model=model,
                    config=config,
                    optimizer=optimizer,
                    loss_fn=loss_fn,
                    exp_name=exp_name,
                    device=device,
                )

                trainer.train(
                    trainloader,
                    testloader,
                    epochs,
                    save_model_every_n_epochs=save_model_every_n_epochs,
                )

        else:
            m = 1  # dummy variable
            for perfrelu in [False, True]:
                if perfrelu:
                    attention_type = "Performer-ReLU"
                else:
                    attention_type = "Transformer"

                exp_name = data + "_" + attention_type
                print(f"This is experiment {exp_name}")

                model = ViTForClassfication(
                    config, perfrelu=perfrelu,
                    randomfeatures=randomfeatures, m=m
                )

                optimizer = optim.AdamW(model.parameters(),
                                        lr=lr, weight_decay=1e-2)

                trainer = Trainer(
                    model=model,
                    config=config,
                    optimizer=optimizer,
                    loss_fn=loss_fn,
                    exp_name=exp_name,
                    device=device,
                )

                trainer.train(
                    trainloader,
                    testloader,
                    epochs,
                    save_model_every_n_epochs=save_model_every_n_epochs,
                )

# output:
# 1. Transformer
# 2. Performer-ReLU
# 3. Performer Softmax with random feature 16
# 4. Performer Softmax with random feature 32
# 5. Performer Softmax with random feature 64
# 6. Performer Softmax with random feature 128

In [None]:
from google.colab import files
%cd /content/forexperiment

In [None]:
!zip -r experiments.zip experiments/
files.download('experiments.zip')

# Inspect

In [None]:
from ViT.utils import visualize_images, visualize_attention, load_experiment

In [None]:
visualize_images('MNIST') # Show some training images

In [None]:
# Load Experiment
config, model, train_losses, test_losses, accuracies = load_experiment("MNIST_Performer-ReLU")

# Create two subplots of train/test losses and accuracies
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5))
ax1.plot(train_losses, label="Train loss")
ax1.plot(test_losses, label="Test loss")
ax1.set_xlabel("Epoch")
ax1.set_ylabel("Loss")
ax1.legend()
ax2.plot(accuracies)
ax2.set_xlabel("Epoch")
ax2.set_ylabel("Accuracy")
plt.savefig("metrics.png")
plt.show()

In [None]:
visualize_attention(model,'MNIST', "attention.png")