In [6]:
import torch
from torch.utils.data import Dataset
from torchvision.io import decode_image
import os
import pandas as pd
import torchvision.transforms.v2 as T

In [7]:
class CustomImageDataset(Dataset):
    def __init__(self, annotations_file, img_dir, transform=None, target_transform=None):
        self.img_labels = pd.read_csv(annotations_file)
        self.img_dir = img_dir
        self.transform = transform
        self.target_transform = target_transform

    def __len__(self):
        return len(self.img_labels)

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, f"{self.img_labels.iloc[idx, 0]}.jpg")
        image = decode_image(img_path)
        label = self.img_labels.iloc[idx, 1]
        if self.transform:
            image = self.transform(image)
        if self.target_transform:
            label = self.target_transform(label)
        return image, label

In [8]:
def transform_target(label):
    brand_to_idx = {
        "adidas": 0,
        "converse": 1,
        "nike": 2,
    }
    return brand_to_idx[label]

transform = T.Compose([
    T.Resize((240, 240)),
    T.ToDtype(torch.float32, scale=True),
])

In [10]:
from torch.utils.data import DataLoader

training_data = CustomImageDataset(annotations_file="data/train/annotations.csv", img_dir="data/train/images", transform=transform,target_transform=transform_target)

testing_data = CustomImageDataset(annotations_file="data/test/annotations.csv", img_dir="data/test/images", transform=transform, target_transform=transform_target)

In [11]:
train_dataloader = DataLoader(training_data, batch_size=64, shuffle=True)
test_dataloader = DataLoader(testing_data, batch_size=64, shuffle=False)

In [12]:
import torch
model = torch.hub.load('pytorch/vision:v0.10.0', 'inception_v3', pretrained=True)

Downloading: "https://github.com/pytorch/vision/zipball/v0.10.0" to /root/.cache/torch/hub/v0.10.0.zip




Downloading: "https://download.pytorch.org/models/inception_v3_google-0cc3c7bd.pth" to /root/.cache/torch/hub/checkpoints/inception_v3_google-0cc3c7bd.pth


100%|██████████| 104M/104M [00:00<00:00, 180MB/s] 


In [31]:
from torch import nn
model.AuxLogits.fc = nn.Linear(in_features=768, out_features=3, bias=True)

In [37]:
model.Conv2d_1a_3x3.requires_grad_(False)
model.Conv2d_2a_3x3.requires_grad_(False)
model.Conv2d_2b_3x3.requires_grad_(False)
model.Conv2d_3b_1x1.requires_grad_(False)
model.Conv2d_4a_3x3.requires_grad_(False)
model.Mixed_5b.requires_grad_(False)
model.Mixed_5c.requires_grad_(False)

InceptionA(
  (branch1x1): BasicConv2d(
    (conv): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (branch5x5_1): BasicConv2d(
    (conv): Conv2d(256, 48, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (bn): BatchNorm2d(48, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (branch5x5_2): BasicConv2d(
    (conv): Conv2d(48, 64, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2), bias=False)
    (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (branch3x3dbl_1): BasicConv2d(
    (conv): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (branch3x3dbl_2): BasicConv2d(
    (conv): Conv2d(64, 96, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn): BatchNorm2d(96, eps=0.001, momentum=0.1, af

In [41]:
if torch.cuda.is_available():
  model.to("cuda")
else:
  model.to("cpu")

In [40]:
torch.cuda.is_available()

False

In [47]:
epochs = 100
preprocess = T.Compose([
    T.Resize(299),
    T.CenterCrop(299),
    T.ToTensor(),
    T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])
optim = torch.optim.Adam(model.parameters())
loss_fn = nn.CrossEntropyLoss()



In [49]:
def train_one_epoch():
    running_loss = 0.
    last_loss = 0.

    # Here, we use enumerate(training_loader) instead of
    # iter(training_loader) so that we can track the batch
    # index and do some intra-epoch reporting
    for i, data in enumerate(train_dataloader):
        # Every data instance is an input + label pair
        inputs, labels = data

        # Zero your gradients for every batch!
        optim.zero_grad()

        # Make predictions for this batch
        outputs = model(inputs)

        # Compute the loss and its gradients
        loss = loss_fn(outputs, labels)
        loss.backward()

        # Adjust learning weights
        optim.step()

        # Gather data and report
        running_loss += loss.item()
        if i % 1000 == 999:
            last_loss = running_loss / 1000 # loss per batch
            print('  batch {} loss: {}'.format(i + 1, last_loss))
            running_loss = 0.

    return last_loss

In [54]:
for epoch in range(epochs):
  print('EPOCH {}:'.format(epoch + 1))

  # Make sure gradient tracking is on, and do a pass over the data
  model.train(True)
  avg_loss = train_one_epoch()


  running_vloss = 0.0
  # Set the model to evaluation mode, disabling dropout and using population
  # statistics for batch normalization.
  model.eval()

  # Disable gradient computation and reduce memory consumption.
  with torch.no_grad():
      for i, vdata in enumerate(test_dataloader):
          vinputs, vlabels = vdata
          voutputs = model(vinputs)
          vloss = loss_fn(voutputs, vlabels)
          running_vloss += vloss

  avg_vloss = running_vloss / (i + 1)
  print('LOSS train {} valid {}'.format(avg_loss, avg_vloss))

EPOCH 1:


RuntimeError: Calculated padded input size per channel: (3 x 3). Kernel size: (5 x 5). Kernel size can't be greater than actual input size