In [0]:
%pip install -r requirements.txt

In [0]:
%restart_python

In [0]:
%run ../setup/00_setup

## Data Splits

#### Total Rows: 328,000


| Split       | # of examples |
|-------------|---------------|
| Train       | 100,000  |
| Validation  | 10,000  |

In [0]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download(handle="awsaf49/coco-2017-dataset")

print("Path to dataset files:", path)

In [0]:
from pycocotools.coco import COCO

dataType='val2017'
annFile='{}/annotations/instances_{}.json'.format(coco_cache, dataType)

In [0]:
import torch
from torchvision import datasets, transforms

# Define transformation
transform = transforms.Compose([
   transforms.Resize(256),
   transforms.CenterCrop(224),
   transforms.ToTensor(),
   transforms.Normalize(mean=[0.485, 0.456, 0.406],
                        std=[0.229, 0.224, 0.225])
])
 
# Load training dataset
train_dataset = datasets.CocoDetection(
  root='data/train',
  annFile='data/train/annotations/instances_train2017.json',
  transform=transform)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=128, shuffle=True)

In [0]:

# Load validation dataset
# val_dataset = datasets.CocoDetection(root='/Volumes/will_smith/datasets/ms_coco/validation',
#                                      annFile='/Volumes/will_smith/datasets/ms_coco/train/annotations/instances_val2017.json',
#                                      transform=transform)
# val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=128, shuffle=False)

In [0]:
import logging
import torchvision.models as models
from composer.models import ComposerClassifier

batch_size = 128
num_epochs = "2ep"
num_class =len(set(train_dataset["train"]["label"]))

logging.basicConfig(level=logging.INFO)
resnet = models.resnet18(pretrained=False)
resnet_composer = ComposerClassifier(resnet, num_classes=num_class)

In [0]:
import torch 
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from datasets import Dataset

train_dataset = train_dataset.get('train', None)
test_dataset = train_dataset.get('valid', None)

# Apply transformations directly to the dataset
train_dataset =  Dataset.from_dict({"image": train_dataset['image'], "label": train_dataset["label"]}).with_format("torch", device="cuda")
test_dataset =  Dataset.from_dict({"image": test_dataset['image'], "label": test_dataset["label"]}).with_format("torch", device="cuda")

train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size)

In [0]:
import torchvision
import torch.nn.functional as F
from composer.models import ComposerModel

class ResNet50(ComposerModel):

    def __init__(self):
        super().__init__()
        self.model = torchvision.models.resnet50()

    def forward(self, batch): # batch is the output of the dataloader
        # specify how batches are passed through the model
        inputs, _ = batch
        return self.model(inputs)

    def loss(self, outputs, batch):
        # pass batches and `forward` outputs to the loss
        _, targets = batch
        return F.cross_entropy(outputs, targets)

In [0]:
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

transform = transforms.Compose([
    transforms.ToTensor()
])
dataset = datasets.ImageNet("data", train=True, download=True, transform=transform)
mnist_dataloader = DataLoader(dataset, batch_size=128)

In [0]:

from composer import Trainer
from composer.algorithms import LabelSmoothing, CutMix, ChannelsLast
import torch.optim as optim

model = ResNet18()
optimizer = optim.Adam(model.parameters(), lr=0.0001)

trainer = Trainer(
    model=model,
    optimizers=optimizer,
    train_dataloader=train_dataloader,
    # train_dataloader=train_dataloader,
    # eval_dataloader=test_dataloader,
    max_duration=num_epochs,
    algorithms=[
        LabelSmoothing(smoothing=0.1),
        CutMix(alpha=1.0),
        ChannelsLast(),
        ]
)
trainer.fit()