In [1]:
%matplotlib inline

In [2]:
import torch
import numpy as np
from torch import nn
from torch.utils.data import DataLoader, Dataset
from torchvision import datasets, models, transforms
from torchvision.transforms import ToTensor, Resize, Compose

## Training components

<img src="./components.png" width="80%">

## Prepare the dataset

In [3]:
import os
from PIL import Image

class CustomImageDataset(Dataset):
    def __init__(self, img_dir, transform=None, target_transform=None):  
        self.img_dir = img_dir
        self.transform = transform
        self.target_transform = target_transform
        # load the files 
        filenames = []
        labels = []
        for filename in os.listdir(img_dir):
            filenames.append(filename)
            if filename.startswith("bird"):
                labels.append(0)
            elif filename.startswith("cat"):
                labels.append(1)
            elif filename.startswith("dog"):
                labels.append(2)
        self.filenames = filenames
        self.labels = labels

    def __len__(self):
        return len(self.filenames)

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.filenames[idx])
        image = Image.open(img_path)
        label = self.labels[idx]
        if self.transform:
            image = self.transform(image)
        if self.target_transform:
            label = self.target_transform(label)
        return image, label

In [4]:
pet_dataset = CustomImageDataset("./data/pets",transform = Compose([ToTensor(), Resize((28,28))]))
train_size = int(0.8 * len(pet_dataset))
test_size = len(pet_dataset) - train_size

train_dataset, test_dataset = torch.utils.data.random_split(pet_dataset, [train_size, test_size])

train_dataloader = DataLoader(train_dataset, batch_size=6, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=6, shuffle=True)

## Creating Models
Here we try two different neural networks: 
- Simple model with some convolutional layers;
- A pretrained classical convolutional neural network architecture: ResNet18.

In [5]:

# Get cpu or gpu device for training.
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device")

    
# Define model
class ConvNN(nn.Module):
    def __init__(self):
        super(ConvNN, self).__init__()
        self.conv_stack = nn.Sequential(
            nn.Conv2d(3,6,5),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(6,12,5),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Flatten(),
            nn.ReLU(),
            nn.Linear(192, 512),
            nn.ReLU(),
            nn.Linear(512, 3)
        )

    def forward(self, x):
        logits = self.conv_stack(x)
        return logits
    

conv_model = ConvNN().to(device)
print(conv_model)

Using cuda device
ConvNN(
  (conv_stack): Sequential(
    (0): Conv2d(3, 6, kernel_size=(5, 5), stride=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(6, 12, kernel_size=(5, 5), stride=(1, 1))
    (4): ReLU()
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Flatten(start_dim=1, end_dim=-1)
    (7): ReLU()
    (8): Linear(in_features=192, out_features=512, bias=True)
    (9): ReLU()
    (10): Linear(in_features=512, out_features=3, bias=True)
  )
)


All pre-trained models expect input images normalized in the same way, i.e. mini-batches of 3-channel RGB images of shape (3 x H x W), where H and W are expected to be at least 224. The images have to be loaded in to a range of [0, 1] and then normalized using mean = [0.485, 0.456, 0.406] and std = [0.229, 0.224, 0.225].

In [6]:
# Define model, dataset and dataloader for vgg model

resnet_model = models.resnet18(pretrained=True).to(device)
resnet_model

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to C:\Users\23133/.cache\torch\hub\checkpoints\resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:02<00:00, 16.6MB/s]


ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [7]:
resnet_transform = Compose([ToTensor(), Resize((224,224)), transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])])

pet_dataset_resnet = CustomImageDataset("./data/pets",transform = resnet_transform)
train_size = int(0.8 * len(pet_dataset_resnet))
test_size = len(pet_dataset_resnet) - train_size

train_dataset_resnet, test_dataset_resnet = torch.utils.data.random_split(pet_dataset_resnet, [train_size, test_size])

train_dataloader_resnet = DataLoader(train_dataset_resnet, batch_size=6, shuffle=True)
test_dataloader_resnet = DataLoader(test_dataset_resnet, batch_size=6, shuffle=True)

In [8]:
num_ftrs = resnet_model.fc.in_features
print(num_ftrs)
resnet_model.fc = nn.Linear(num_ftrs, 3)
resnet_model = resnet_model.to(device)

512


## Optimizing the Model Parameters

To train a model, we need a `loss function` and an `optimizer`.

In [9]:

model = resnet_model
trainloader = train_dataloader_resnet
testloader = test_dataloader_resnet
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.RMSprop(model.parameters(), lr=1e-4)

In a single training loop, the model makes predictions on the training dataset (fed to it in batches), and
backpropagates the prediction error to adjust the model's parameters.



In [10]:
def train(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.train()
    correct, train_loss = 0, 0 
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)

        # Compute prediction error
        pred = model(X)
        loss = loss_fn(pred, y)
        train_loss += loss.item()
        correct += (pred.argmax(1) == y).type(torch.float).sum().item()

        # Backpropagation
        optimizer.zero_grad()  
        loss.backward()
        optimizer.step() 
    correct /= size
    train_loss /= num_batches
    return train_loss, correct

We also check the model's performance against the test dataset to ensure it is learning.



In [11]:
def test(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval() 
    test_loss, correct = 0, 0
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
    test_loss /= num_batches
    correct /= size
    return test_loss, correct



The training process is conducted over several iterations (*epochs*). During each epoch, the model learns
parameters to make better predictions. We print the model's accuracy and loss at each epoch; we'd like to see the
accuracy increase and the loss decrease with every epoch.



In [12]:
epochs = 500
for t in range(epochs):
    train_loss, train_acc = train(trainloader, model, loss_fn, optimizer)
    test_loss, test_acc = test(testloader, model, loss_fn)
    if t%1 == 0:
        print(f"Epoch {t}:")
        print(f"Train Error. Accuracy: {(100*train_acc):>0.1f}%, Avg loss: {train_loss:>8f}")
        print(f"Test Error. Accuracy: {(100*test_acc):>0.1f}%, Avg loss: {test_loss:>8f}")
print("Done!")

Epoch 0:
Train Error. Accuracy: 70.8%, Avg loss: 0.681271
Test Error. Accuracy: 95.8%, Avg loss: 0.197678
Epoch 1:
Train Error. Accuracy: 87.5%, Avg loss: 0.367318
Test Error. Accuracy: 79.2%, Avg loss: 0.386519
Epoch 2:
Train Error. Accuracy: 90.6%, Avg loss: 0.212756
Test Error. Accuracy: 95.8%, Avg loss: 0.128781
Epoch 3:
Train Error. Accuracy: 96.9%, Avg loss: 0.092067
Test Error. Accuracy: 91.7%, Avg loss: 0.234279
Epoch 4:
Train Error. Accuracy: 97.9%, Avg loss: 0.082397
Test Error. Accuracy: 91.7%, Avg loss: 0.244073
Epoch 5:
Train Error. Accuracy: 100.0%, Avg loss: 0.045263
Test Error. Accuracy: 91.7%, Avg loss: 0.139338
Epoch 6:
Train Error. Accuracy: 99.0%, Avg loss: 0.047985
Test Error. Accuracy: 100.0%, Avg loss: 0.065401
Epoch 7:
Train Error. Accuracy: 95.8%, Avg loss: 0.106709
Test Error. Accuracy: 87.5%, Avg loss: 0.252171
Epoch 8:
Train Error. Accuracy: 97.9%, Avg loss: 0.070981
Test Error. Accuracy: 91.7%, Avg loss: 0.182017
Epoch 9:
Train Error. Accuracy: 100.0%, Avg 

This model can now be used to make predictions.



In [13]:
import matplotlib.pyplot as plt 
from torchvision.transforms import ToPILImage


classes = ["bird", "cat", "dog"]
model.eval()
x, labels = next(iter(testloader))

figure = plt.figure(figsize=(8, 6))
cols, rows = 3, 2
with torch.no_grad():
    preds = model(x)
correct = 0
for i in range(len(preds)):
    predicted, actual = classes[preds[i].argmax()], classes[labels[i]]
    if predicted == actual:
        correct += 1
    img = x[i]
    """
    mean = torch.tensor([0.485, 0.456, 0.406]).reshape((3,1,1))
    std = torch.tensor([0.229, 0.224, 0.225]).reshape((3,1,1))
    img = std * img + mean"""
    
    figure.add_subplot(rows, cols, i + 1)
    plt.title(f"Actual: {actual}, Pred: {predicted}")
    plt.axis("off")
    plt.imshow(ToPILImage()(img))
print(f"accuracy: %i/%i."% ( correct,  len(preds)))

RuntimeError: Input type (torch.FloatTensor) and weight type (torch.cuda.FloatTensor) should be the same or input should be a MKLDNN tensor and weight is a dense tensor

<Figure size 800x600 with 0 Axes>

Saving Models
-------------
A common way to save a model is to serialize the internal state dictionary (containing the model parameters).



In [None]:
torch.save(model.state_dict(), "model.pth")
print("Saved PyTorch Model State to model.pth")

Loading Models
----------------------------

The process for loading a model includes re-creating the model structure and loading
the state dictionary into it.



In [None]:
model.load_state_dict(torch.load("model.pth"))