In [1]:
import torch
import numpy as np

from tqdm import tqdm
from torch import nn, optim

from torch.utils.data import DataLoader

from torchvision.datasets import ImageFolder
from torchvision import transforms, models

In [4]:
!git clone https://github.com/alexeygrigorev/clothing-dataset-small.git

Cloning into 'clothing-dataset-small'...
remote: Enumerating objects: 3839, done.[K
remote: Counting objects: 100% (400/400), done.[K
remote: Compressing objects: 100% (400/400), done.[K
remote: Total 3839 (delta 9), reused 385 (delta 0), pack-reused 3439 (from 1)[K
Receiving objects: 100% (3839/3839), 100.58 MiB | 14.84 MiB/s, done.
Resolving deltas: 100% (10/10), done.


### Data Loading

In [5]:
input_size = 224

train_transforms = transforms.Compose([
    transforms.Resize((input_size, input_size)),

    # image transformations
    transforms.RandomRotation(10), # Equivalent to shear_range
    transforms.RandomResizedCrop(input_size, scale=(0.9, 1.0)), # Equivalent to zoom_range
    transforms.RandomHorizontalFlip(), # Equivalent to horizontal_flip

    transforms.ToTensor(),
    transforms.Normalize(
        mean = [0.485, 0.456, 0.406],
        std = [0.229, 0.224, 0.225]
    )
])

val_transforms = transforms.Compose([
    transforms.Resize((input_size, input_size)),

    transforms.ToTensor(),
    transforms.Normalize(
        mean = [0.485, 0.456, 0.406],
        std = [0.229, 0.224, 0.225]
    )
])

train_ds = ImageFolder(
    root="./clothing-dataset-small/train",
    transform=train_transforms,
    target_transform=None
)

val_ds = ImageFolder(
    root="./clothing-dataset-small/validation",
    transform=val_transforms
)

train_loader = DataLoader(
    dataset=train_ds,
    batch_size=32,
    shuffle=True
)

val_loader = DataLoader(
    dataset=val_ds,
    batch_size=32,
    shuffle=False
)

print(f"Train batches: {len(train_loader)}\nVal batches: {len(val_loader)}")

Train batches: 96
Val batches: 11


### Neural Network

In [7]:
class ClothingClassifier(nn.Module):
  def __init__(self, size_inner=100, droprate=0.2, num_classes=10):
    super().__init__()
    self.base_model = models.mobilenet_v2(weights="IMAGENET1K_V1")

    self.base_model.classifier = nn.Identity() # remove the mobilenet classifier

    self.pool = nn.AdaptiveAvgPool2d((1, 1))

    self.classification = nn.Sequential(
        nn.Flatten(),
        nn.Linear(1280, size_inner),
        nn.ReLU(),
        nn.Dropout(droprate),
        nn.Linear(size_inner, num_classes)
    )

  def forward(self, x):
    x = self.base_model.features(x) # access features
    x = self.pool(x)
    x = self.classification(x)
    return x

In [9]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

size = 32
droprate = 0.2

model = ClothingClassifier(
    size_inner=size,
    droprate=droprate,
    num_classes=len(train_ds.classes)
)

model.to(device)

print(f"model is on {device}")

Downloading: "https://download.pytorch.org/models/mobilenet_v2-b0353104.pth" to /root/.cache/torch/hub/checkpoints/mobilenet_v2-b0353104.pth


100%|██████████| 13.6M/13.6M [00:00<00:00, 194MB/s]


model is on cuda


In [None]:
learning_rate = 0.0001
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
criterion = nn.CrossEntropyLoss()

In [None]:
def train_one_epoch(train_loader):
    running_loss, total_predictions, correct_predictions = 0., 0, 0

    for i, (inputs, labels) in tqdm(enumerate(train_loader), desc="Training"):
        # Every data instance is an input + label pair
        inputs, labels = inputs.to(device), labels.to(device)

        # Zero your gradients for every batch!
        optimizer.zero_grad()

        # Make predictions for this batch
        outputs = model(inputs)

        # Compute the loss and its gradients
        loss = criterion(outputs, labels)
        loss.backward()

        # Adjust learning weights
        optimizer.step()

        # Gather data and report
        running_loss += loss.item()
        total_predictions += labels.size(0)
        correct_predictions += outputs.argmax(1).eq(labels).sum().item()

    epoch_loss = running_loss / len(train_loader)
    epoch_accuracy = correct_predictions / total_predictions

    return epoch_loss, epoch_accuracy

In [None]:
def validate_one_epoch(val_loader):
  vrunning_loss, vtotal_predictions, vcorrect_predictions = 0., 0, 0

  with torch.no_grad():
    for i, (vinputs, vlabels) in enumerate(val_loader):
      vinputs, vlabels = vinputs.to(device), vlabels.to(device)
      voutputs = model(vinputs)
      vloss = criterion(voutputs, vlabels)

      vrunning_loss += vloss.item()
      vtotal_predictions += vlabels.size(0)
      vcorrect_predictions += voutputs.argmax(1).eq(vlabels).sum().item()

  vepoch_loss = vrunning_loss / len(train_loader)
  vepoch_accuracy = vcorrect_predictions / vtotal_predictions

  return vepoch_loss, vepoch_accuracy


### Training the Model

In [None]:
epochs = 10
best_val_accuracy = 0.
checkpoint_path = 'mobilenet_v2_v1_{epoch:02d}_{val_accuracy:.3f}.pth'

for epoch in range(epochs):
  print(f"Epoch: {epoch + 1}")

  # training
  model.train()
  train_result = train_one_epoch(train_loader=train_loader)
  print(f"Train Loss: {train_result[0]} Train Accuracy: {train_result[1]}")

  # validating
  model.eval()
  val_result = validate_one_epoch(val_loader=val_loader)
  print(f"Val Loss: {val_result[0]} Val Accuracy: {val_result[1]}")

  # checkpointing
  if val_result[1] > best_val_accuracy:
    best_val_accuracy = val_result[1]
    checkpoint = checkpoint_path.format(epoch=epoch + 1,
                                       val_accuracy=val_result[1])
    torch.save(model.state_dict(), checkpoint)
    print(f"Checkpoint saved to {checkpoint}")



Epoch: 1


Training: 96it [00:30,  3.10it/s]


Train Loss: 0.5396366917217771 Train Accuracy: 0.840612777053455
Val Loss: 0.03889287660907333 Val Accuracy: 0.8914956011730205
Checkpoint saved to mobilenet_v2_v1_01_0.891.pth
Epoch: 2


Training: 96it [00:31,  3.09it/s]


Train Loss: 0.33076863960983854 Train Accuracy: 0.9002607561929595
Val Loss: 0.03204190063600739 Val Accuracy: 0.8973607038123167
Checkpoint saved to mobilenet_v2_v1_02_0.897.pth
Epoch: 3


Training: 96it [00:30,  3.14it/s]


Train Loss: 0.24424195215882114 Train Accuracy: 0.93122555410691
Val Loss: 0.029134521590700995 Val Accuracy: 0.9178885630498533
Checkpoint saved to mobilenet_v2_v1_03_0.918.pth
Epoch: 4


Training: 96it [00:31,  3.08it/s]


Train Loss: 0.1814966912691792 Train Accuracy: 0.9524119947848761
Val Loss: 0.032093258089541145 Val Accuracy: 0.9178885630498533
Epoch: 5


Training: 96it [00:31,  3.01it/s]


Train Loss: 0.13622834091074765 Train Accuracy: 0.9638200782268579
Val Loss: 0.03427098707955641 Val Accuracy: 0.8944281524926686
Epoch: 6


Training: 96it [00:30,  3.15it/s]


Train Loss: 0.11270405936132495 Train Accuracy: 0.969361147327249
Val Loss: 0.025823801557028975 Val Accuracy: 0.9149560117302052
Epoch: 7


Training: 96it [00:31,  3.09it/s]


Train Loss: 0.09809441672405228 Train Accuracy: 0.9729465449804433
Val Loss: 0.03013465239200741 Val Accuracy: 0.9120234604105572
Epoch: 8


Training: 96it [00:30,  3.15it/s]


Train Loss: 0.08418927516322583 Train Accuracy: 0.9755541069100391
Val Loss: 0.030503496126281487 Val Accuracy: 0.906158357771261
Epoch: 9


Training: 96it [00:30,  3.15it/s]


Train Loss: 0.057053528541776664 Train Accuracy: 0.9850065189048239
Val Loss: 0.028968871411052532 Val Accuracy: 0.9032258064516129
Epoch: 10


Training: 96it [00:30,  3.11it/s]


Train Loss: 0.0641610156162642 Train Accuracy: 0.984354628422425
Val Loss: 0.02946635974512901 Val Accuracy: 0.9208211143695014
Checkpoint saved to mobilenet_v2_v1_10_0.921.pth


### Using the Model

In [10]:
weights_file = "mobilenet_v2_v1_03_0.918.pth"

test_ds = ImageFolder(
    root="./clothing-dataset-small/test",
    transform=val_transforms
)

test_loader = DataLoader(
    dataset=test_ds,
    batch_size=32,
    shuffle=True
)

model = ClothingClassifier(
    size_inner=size, droprate=droprate, num_classes=len(train_ds.classes)
)
model.load_state_dict(torch.load(weights_file))
model.to(device)
model.eval()

for inputs, labels in test_loader:
  inputs, labels = inputs.to(device), labels.to(device)
  break

outputs = model(inputs)
total_predictions = labels.size(0)
correct_predictions = outputs.argmax(1).eq(labels).sum().item()
correct_predictions / total_predictions

0.8125

In [None]:
def pytorch_preprocessing(X):
  X = X / 255.

  mean = np.array([0.485, 0.456, 0.406]).reshape(1, 3, 1, 1)
  std = np.array([0.229, 0.224, 0.225]).reshape(1, 3, 1, 1)

  # batch, height, width, channels => batch, channels, height, width
  X = X.transpose(0, 3, 1, 2)
  X = (X - mean) / std

  return X.astype(np.float32)

In [None]:
from io import BytesIO
from urllib import request
from PIL import Image

def download_image(url):
    with request.urlopen(url) as resp:
        buffer = resp.read()
    stream = BytesIO(buffer)
    img = Image.open(stream)
    return img

In [None]:
def preprocess(img):
    if img.mode != 'RGB':
        img = img.convert('RGB')
    small = img.resize((224, 224), Image.NEAREST) # type: ignore
    x = np.array(small, dtype='float32')
    batch = np.expand_dims(x, axis=0)
    return pytorch_preprocessing(batch)

In [None]:
url = 'http://bit.ly/mlbookcamp-pants'
classes = [
    "dress",
    "hat",
    "longsleeve",
    "outwear",
    "pants",
    "shirt",
    "shoes",
    "shorts",
    "skirt",
    "t-shirt",
]

img = download_image(url)
X = preprocess(img)
X = torch.Tensor(X).to(device)

pred = np.array(model(X).data[0].cpu())
dict(zip(classes, pred.tolist()))

{'dress': -1.9799416065216064,
 'hat': -1.5919891595840454,
 'longsleeve': 0.10016340017318726,
 'outwear': 0.6754636168479919,
 'pants': 6.922043800354004,
 'shirt': -1.2410041093826294,
 'shoes': 0.10473060607910156,
 'shorts': 0.8319987654685974,
 'skirt': 0.5130680203437805,
 't-shirt': 0.2831695079803467}

### Exporting the Model to ONNX Format

In [12]:
!pip install onnxscript

Collecting onnxscript
  Downloading onnxscript-0.5.6-py3-none-any.whl.metadata (13 kB)
Collecting onnx_ir<2,>=0.1.12 (from onnxscript)
  Downloading onnx_ir-0.1.12-py3-none-any.whl.metadata (3.2 kB)
Collecting onnx>=1.16 (from onnxscript)
  Downloading onnx-1.20.0-cp312-abi3-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (8.4 kB)
Downloading onnxscript-0.5.6-py3-none-any.whl (683 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m683.0/683.0 kB[0m [31m43.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading onnx-1.20.0-cp312-abi3-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (18.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m18.1/18.1 MB[0m [31m119.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading onnx_ir-0.1.12-py3-none-any.whl (129 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m129.3/129.3 kB[0m [31m13.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: onnx, onnx_ir, onnxscript
Successfully install

In [15]:
dummy_input = torch.randn(1, 3, input_size, input_size).to(device)
onnx_path = "clothing_classification.onnx"

torch.onnx.export(
    model,
    dummy_input,
    onnx_path,
    verbose=True,
    input_names=["input"],
    output_names=["output"],
    dynamic_axes={
        "input": {0: "batch_size"},
        "output": {0: "batch_size"}
    },
)

print(f"Model saved to {onnx_path}")

  torch.onnx.export(


[torch.onnx] Obtain model graph for `ClothingClassifier([...]` with `torch.export.export(..., strict=False)`...
[torch.onnx] Obtain model graph for `ClothingClassifier([...]` with `torch.export.export(..., strict=False)`... ✅
[torch.onnx] Run decomposition...
[torch.onnx] Run decomposition... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅
Applied 104 of general pattern rewrite rules.
Model saved to clothing_classification.onnx


In [16]:
from onnx import load_model, save_model

# load model with external data directory
m = load_model("clothing_classification.onnx", load_external_data=True)

# force everything into a single file
save_model(
    m,
    "clothing_classification_single.onnx",
    save_as_external_data=False
)