In [1]:
from torchvision.datasets import ImageFolder
from torchvision.transforms import ToTensor
import torch
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torchvision.models as models
import torch.nn as nn
import torch.optim as optim
import numpy as np
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
import random
from sklearn.model_selection import train_test_split
import torch.optim.lr_scheduler as lr_scheduler
import time
import copy

When importing the data in we need to transform it so that the size is similar. This article has more information about it.

https://towardsdatascience.com/a-beginners-tutorial-on-building-an-ai-image-classifier-using-pytorch-6f85cb69cba7

In [2]:
transformations = transforms.Compose([
    transforms.Resize(255),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
])

In [3]:
data = ImageFolder('train/',transform=transformations)

In [4]:
r = []
b = []
g = []

for el in data:
    r.append(el[0][0])
    b.append(el[0][1])
    g.append(el[0][2])

In [5]:
mean_r = float(torch.mean(torch.stack(r)))
mean_b = float(torch.mean(torch.stack(b)))
mean_g = float(torch.mean(torch.stack(g)))
std_r = float(torch.std(torch.stack(r)))
std_b = float(torch.std(torch.stack(b)))
std_g = float(torch.std(torch.stack(g)))

In [6]:
transformations = transforms.Compose([
    transforms.Resize(255),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[mean_r,mean_b,mean_g], std=[std_r,std_b,std_g])
])

In [7]:
data = ImageFolder('train/',transform=transformations)

Split data into train and dev sets

In [8]:
q = list(range(0,len(data)))

In [9]:
len(data)

875

In [10]:
random.shuffle(q)

In [11]:
train_set = torch.utils.data.Subset(data, q[:100]) 
dev_set = torch.utils.data.Subset(data, q[100:])

In [12]:
train_loader = torch.utils.data.DataLoader(train_set,shuffle = True, num_workers = 4)
dev_loader = torch.utils.data.DataLoader(dev_set,shuffle = True, num_workers = 4)

In [13]:
dataloaders = {'train':train_loader,'val':dev_loader}

In [14]:
dataset_sizes = {'train':775,'val':100}

Load the model

In [27]:
model_ft = models.resnet18(pretrained=True)
num_ftrs = model_ft.fc.in_features

In [28]:
num_ftrs

512

In [29]:
for param in model_ft.parameters():
    param.requires_grad = False

In [30]:
model_ft.fc = nn.Linear(num_ftrs, 3)

In [35]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model_ft = model_ft.to(device)
criterion = nn.CrossEntropyLoss()
optimizer_ft = optim.Adam(model_ft.parameters(), lr=0.001)
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)

In [36]:
def train_model(model, criterion, optimizer, scheduler, num_epochs=5):
    since = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)
            if phase == 'train':
                scheduler.step()

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(
                phase, epoch_loss, epoch_acc))

            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())

        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model

In [37]:
train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler, num_epochs=4)

Epoch 0/3
----------
train Loss: 0.1533 Acc: 0.0477
val Loss: 8.3571 Acc: 3.2300

Epoch 1/3
----------
train Loss: 0.1529 Acc: 0.0452
val Loss: 8.2022 Acc: 3.2000

Epoch 2/3
----------
train Loss: 0.1536 Acc: 0.0490
val Loss: 9.4077 Acc: 3.2000

Epoch 3/3
----------
train Loss: 0.1563 Acc: 0.0542
val Loss: 9.1403 Acc: 3.1000

Training complete in 3m 5s
Best val Acc: 3.230000


ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  