In [0]:
%pip install -r requirements.txt

In [0]:
%restart_python

In [0]:
%run ../setup/00_setup

In [0]:
import os

os.environ['HF_DATASETS_CACHE'] = cifar_cache

In [0]:
%sh nvidia-smi

## Data Splits

#### Total Rows: 328,000


| Split       | # of examples |
|-------------|---------------|
| Train       | 100,000  |
| Validation  | 10,000  |

In [0]:
from utils import hf_dataset_utilities as hf_util

cifar_dataset = hf_util.hfds_download_volume(
  hf_cache = os.environ['HF_DATASETS_CACHE'],
  dataset_path= 'uoft-cs/cifar10',
  trust_remote_code = True, 
  disable_progress = False, 
)

In [0]:
CIFARDataset = hf_util.create_torch_image_dataset(
  image_key="img",
  label_key="label"
)

In [0]:
import torchvision.transforms as transforms

ds_train_transforms = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

ds_test_transforms = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

In [0]:
train_dataset = CIFARDataset(cifar_dataset['train'], transform=ds_train_transforms)
test_dataset = CIFARDataset(cifar_dataset['test'], transform=ds_test_transforms)

In [0]:
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=128, shuffle=True)

test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=128, shuffle=True)

In [0]:
import logging
import torchvision.models as models
from composer.models import ComposerClassifier

batch_size = 128
num_epochs = "2ep"
num_class = train_dataset.num_classes
num_nodes = 1

logging.basicConfig(level=logging.INFO)

### Create a custom class that has a base of ComposerModel

In [0]:
import torchvision
import torch.nn.functional as F
from composer.models import ComposerModel

class ResNet50(ComposerModel):

    def __init__(self):
        super().__init__()
        self.model = torchvision.models.resnet50()

    def forward(self, batch): # batch is the output of the dataloader
        # specify how batches are passed through the model
        inputs, _ = batch
        return self.model(inputs)

    def loss(self, outputs, batch):
        # pass batches and `forward` outputs to the loss
        _, targets = batch
        return F.cross_entropy(outputs, targets)

### Alternative - Wrap the resnet in the Composer Classifier for use with Composer

In [0]:
resnet = models.resnet18(pretrained=False)
resnet_composer = ComposerClassifier(resnet, num_classes=num_class)

In [0]:
from composer import Trainer
from composer.algorithms import LabelSmoothing, CutMix, ChannelsLast
import torch.optim as optim
from composer.loggers import MLFlowLogger
import mlflow
import mlflow.pytorch

mlflow_logger = MLFlowLogger(
    experiment_name=experiment_path, 
    tracking_uri='databricks',
    model_registry_uri='databricks-uc',
    model_registry_prefix=f"{catalog}.models"
    )

model = ResNet50()
optimizer = optim.Adam(model.parameters(), lr=0.0001)

trainer = Trainer(
    model=model,
    optimizers=optimizer,
    train_dataloader=train_loader,
    eval_dataloader=test_loader,
    max_duration=num_epochs,
    algorithms=[
        LabelSmoothing(smoothing=0.1),
        CutMix(alpha=1.0),
        ChannelsLast(),
        ],
    loggers=[mlflow_logger],
)

trainer.fit()

active_run = mlflow.active_run()

if active_run:
    run_id = active_run.info.run_id
    mlflow.pytorch.log_model(model, "model")
    display(run_id)
else:
    display("No active run")

mlflow.end_run()

In [0]:
import mlflow
import mlflow.pytorch

# Set the registry URI to ensure correct path resolution
mlflow.set_registry_uri("databricks-uc")

# Load the model using the run_id
model_uri = f"runs:/{run_id}/model"
model = mlflow.pytorch.load_model(model_uri)

# Display the model architecture
print(model)

In [0]:
import torch
import matplotlib.pyplot as plt

# Get one image from the test set
test_iter = iter(test_loader)
images, labels = next(test_iter)  # Use next() function
image = images[0].unsqueeze(0)  # Add batch dimension

# Move the image to the same device as the model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
image = image.to(device)
model = model.to(device)

# Run inference
model.eval()
with torch.no_grad():
    output = model((image, 1))  # Pass a tuple to match the forward method signature
    _, predicted = torch.max(output, 1)

# Display the image and the prediction
plt.imshow(image.cpu().squeeze().permute(1, 2, 0).numpy())
plt.title(f"Predicted: {predicted.item()}, Actual: {labels[0]}")
plt.axis('off')
plt.show()

### Clear GPU Memory

In [0]:
%restart_python