## Imports

In [1]:
import sparseml
import sparsezoo
import torch
import torchvision

from torchvision import transforms

assert torch.__version__ >= "1.7"

In [2]:
from sparseml.pytorch.models import ModelRegistry
from sparsezoo import Model

## Config

In [3]:
class Config:
    RAW_DATA_FILE = "../data/raw/archive.zip"
    EXTRACTED_DATA_DIR = "../data/extracted"
    BS=1
    device="cuda"

## Load Model

In [4]:
zoo_stub_path = (
    "zoo:cv/classification/resnet_v1-50/pytorch/sparseml/imagenet/pruned_quant-moderate"
    "?recipe=transfer_learn"
)
model = ModelRegistry.create(
    key="resnet50",
    pretrained_path=zoo_stub_path,
    pretrained_dataset="imagenette",
    num_classes=4,
    ignore_error_tensors=["classifier.fc.weight", "classifier.fc.bias"],
)
input_shape = ModelRegistry.input_shape("resnet50")
input_size = input_shape[-1]

In [6]:
model.to(Config.device)
zoo_model = Model(zoo_stub_path)
recipe_path = zoo_model.recipes.default.path

## Load datasets

In [7]:
!unzip {Config.RAW_DATA_FILE} -d {Config.EXTRACTED_DATA_DIR}

Archive:  ../data/raw/archive.zip
replace ../data/extracted/image_data/none/0.jpg? [y]es, [n]o, [A]ll, [N]one, [r]ename: ^C


In [7]:
dataset = torchvision.datasets.ImageFolder(
    Config.EXTRACTED_DATA_DIR + "/image_data", transform=transforms.Compose(
    [
        transforms.Resize(size=input_shape[-2:]),
        transforms.ToTensor()
    ]
))


In [8]:
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])

In [9]:
train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=Config.BS, shuffle=True)
test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=Config.BS, shuffle=False)

In [10]:
from tqdm.auto import tqdm
import math

In [11]:
def run_model_one_epoch(model, data_loader, criterion, device, train=False, optimizer=None):
    if train:
        model.train()
    else:
        model.eval()

    running_loss = 0.0
    total_correct = 0
    total_predictions = 0

    for step, (inputs, labels) in tqdm(enumerate(data_loader), total=len(data_loader)):
        inputs = inputs.to(device)
        labels = labels.to(device)

        if train:
            optimizer.zero_grad()

        outputs, _ = model(inputs)  # model returns logits and softmax as a tuple
        loss = criterion(outputs, labels)

        if train:
            loss.backward()
            optimizer.step()

        running_loss += loss.item()

        predictions = outputs.argmax(dim=1)
        total_correct += torch.sum(predictions == labels).item()
        total_predictions += inputs.size(0)

    loss = running_loss / (step + 1.0)
    accuracy = total_correct / total_predictions
    return loss, accuracy

In [12]:
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=8e-3)

In [13]:
from sparseml.pytorch.optim import ScheduledModifierManager

# create ScheduledModifierManager and Optimizer wrapper
manager = ScheduledModifierManager.from_yaml(recipe_path)
optimizer = manager.modify(model, optimizer, steps_per_epoch=len(train_dataloader))

## Train

In [14]:
epoch = manager.min_epochs
for epoch in range(manager.max_epochs):
    # run training loop
    epoch_name = f"{epoch + 1}/{manager.max_epochs}"
    print(f"Running Training Epoch {epoch_name}")
    train_loss, train_acc = run_model_one_epoch(
        model, train_dataloader, criterion, Config.device, train=True, optimizer=optimizer
    )
    print(
        f"Training Epoch: {epoch_name}\nTraining Loss: {train_loss}\nTop 1 Acc: {train_acc}\n"
    )

    # run validation loop
    print(f"Running Validation Epoch {epoch_name}")
    val_loss, val_acc = run_model_one_epoch(model, test_dataloader, criterion, Config.device)
    print(
        f"Validation Epoch: {epoch_name}\nVal Loss: {val_loss}\nTop 1 Acc: {val_acc}\n"
    )

manager.finalize(model)

Running Training Epoch 1/10


  0%|          | 0/640 [00:00<?, ?it/s]

  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)


Training Epoch: 1/10
Training Loss: 0.7925851132189223
Top 1 Acc: 0.6625

Running Validation Epoch 1/10


  0%|          | 0/160 [00:00<?, ?it/s]

Validation Epoch: 1/10
Val Loss: 83.17463632673017
Top 1 Acc: 0.4875

Running Training Epoch 2/10


  0%|          | 0/640 [00:00<?, ?it/s]

Training Epoch: 2/10
Training Loss: 0.05458418198976176
Top 1 Acc: 0.984375

Running Validation Epoch 2/10


  0%|          | 0/160 [00:00<?, ?it/s]

Validation Epoch: 2/10
Val Loss: 13.381135426255785
Top 1 Acc: 0.7

Running Training Epoch 3/10


  0%|          | 0/640 [00:00<?, ?it/s]

Training Epoch: 3/10
Training Loss: 0.1970205758323999
Top 1 Acc: 0.9296875

Running Validation Epoch 3/10


  0%|          | 0/160 [00:00<?, ?it/s]

Validation Epoch: 3/10
Val Loss: 50.90606673955917
Top 1 Acc: 0.0

Running Training Epoch 4/10


  0%|          | 0/640 [00:00<?, ?it/s]

Training Epoch: 4/10
Training Loss: 0.6504197680067592
Top 1 Acc: 0.7328125

Running Validation Epoch 4/10


  0%|          | 0/160 [00:00<?, ?it/s]

Validation Epoch: 4/10
Val Loss: 2.535683552222326
Top 1 Acc: 0.19375

Running Training Epoch 5/10


  0%|          | 0/640 [00:00<?, ?it/s]

Training Epoch: 5/10
Training Loss: 0.021315516951358405
Top 1 Acc: 0.996875

Running Validation Epoch 5/10


  0%|          | 0/160 [00:00<?, ?it/s]

RuntimeError: CUDA out of memory. Tried to allocate 2.00 MiB (GPU 0; 1.96 GiB total capacity; 1006.97 MiB already allocated; 1.06 MiB free; 1.02 GiB reserved in total by PyTorch)