In [9]:
import os
import shutil

import math
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt

import seaborn as sns
from sklearn.metrics import confusion_matrix

from torch.utils.data import Dataset
from PIL import Image
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torchvision import models

In [10]:
# function to load, transform, normalise, and split dataset
def load_dataset(PATH):
    # transform images to normalised tensors
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
    ])
    # load and transform all images
    dataset = torchvision.datasets.ImageFolder(
        root=PATH,
        transform=transform
    )
    # split dataset into training and validation
    train_size = int(0.7 * len(dataset))
    val_size = int(0.15 * len(dataset))
    test_size = int(len(dataset) - train_size - val_size)

    # use torch.utils.data.random_split for training/test split
    train_dataset, val_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, val_size, test_size])
    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=64, num_workers=0, shuffle=True)
    val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=32, num_workers=0, shuffle=True)
    test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=32, num_workers=0, shuffle=True)
    
    return train_loader, val_loader, test_loader




def display(img):
    img = img / 2 + 0.5  # unnormalize images
    npimg = img.numpy()
    npimg = np.transpose(npimg, (1, 2, 0))
    return npimg




# function to train model
def train(model, device, train_loader, optimizer, epoch):
    model.train()  # to set model to training mode
    train_loss = 0
    correct = 0
    print(f'Epoch {epoch}:')
    for batch_index, (image, label) in enumerate(train_loader):  # retrieve inputs
        image, label = image.to(device), label.to(device)  # use GPU if able to

        optimizer.zero_grad()  # reset optimiser parameters to 0

        # forward
        outputs = model(image)
        loss = criterion(outputs, label)
        train_loss += loss.item()  # track running total loss

        loss.backward()  # backward
        optimizer.step()  # optimize
        
        # print statistics every 10 batches
        if batch_index % 10 == 9:
            print('\tTraining batch {} Loss: {:.6f}'.format(batch_index+1, loss.item()))

        # print accuracy
        _, pred = torch.max(outputs, dim=1)
        correct += torch.sum(label == pred).item()

    #  calculate the average loss and total accuracy per epoch
    avg_loss = train_loss / (batch_index+1)
    accuracy = 100.0 * correct / len(train_loader.dataset)
    print('Training set: Average loss: {:.6f}, Accuracy: {}/{} ({:.3f}%)\n'
          .format(avg_loss, correct, len(train_loader.dataset), accuracy))
    return avg_loss, accuracy




# function to evaluate model on validation dataset
def validate(model, device, val_loader):
    model.eval()  # to set model to evaluation mode (meaning no backpropagation)
    val_loss = 0
    correct = 0
    with torch.no_grad():
        batch_count = 0
        for image, label in val_loader:
            batch_count += 1
            image, label = image.to(device), label.to(device) # uses GPU if possible
            val_outputs = model(image)  # get predicted classes

            # print loss
            val_loss = criterion(val_outputs, label).item()

            # print accuracy
            _, pred = torch.max(val_outputs, dim=1)
            correct += torch.sum(label==pred).item()

        # calculate the average loss and total accuracy per epoch
        avg_loss = val_loss / batch_count
        accuracy = 100.0 * correct / len(val_loader.dataset)

        print('Validation set: Average loss: {:.6f}, Accuracy: {}/{} ({:.3f}%)\n'
              .format(avg_loss, correct, len(val_loader.dataset), accuracy))

    # return average loss for the epoch
    return avg_loss, accuracy

In [11]:
# Load dataset and set batch sizes
DATA_PATH = '../input/roobansappani/HandGesture/images'
classes = sorted(os.listdir(DATA_PATH))
print("classes available: ", classes)

train_loader, val_loader, test_loader = load_dataset(DATA_PATH)
batch_size = train_loader.batch_size
print("batch size: ", batch_size)

In [12]:
# load random training images
dataiter = iter(train_loader)
images, labels = dataiter.next()

# view random image samples used for training
fig, axis = plt.subplots(3, 5, figsize=(15, 10))

for i, ax in enumerate(axis.flat):
    # remove ticks
    ax.set_xticks([])
    ax.set_yticks([])
    with torch.no_grad():
        image, label = images[i], labels[i]
        ax.imshow(display(image)) # add image
        ax.set(title = f"{classes[label.item()]}") # add label

In [13]:
# defining and initialising all cnn models used for implementation:
class BaseCNN(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 6, 3)
        self.conv2 = nn.Conv2d(6, 16, 3)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(in_features=43616, out_features=120)
        self.fc2 = nn.Linear(in_features=120, out_features=num_classes)

    def forward(self, x):
        # feature extractor
        x = self.pool(F.relu(self.conv1(x)))
        #print(x.shape)
        x = self.pool(F.relu(self.conv2(x)))
        # classifier
        x = torch.flatten(x, 1)  # flatten layer
        x = F.relu(self.fc1(x)) # fully connected layer
        x = self.fc2(x) # fully connected layer
        return x
    
    
class CNN1(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 6, 3)
        self.conv2 = nn.Conv2d(6, 6, 3)
        self.conv3 = nn.Conv2d(6, 16, 3)
        self.conv4 = nn.Conv2d(16, 16, 3)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(in_features=41040, out_features=120)
        self.fc2 = nn.Linear(in_features=120, out_features=num_classes)
        self.dropout = nn.Dropout(0.25)

    def forward(self, x):
        # feature extractor
        x = F.relu(self.conv1(x))
        x = self.pool(F.relu(self.conv2(x)))
        x = F.relu(self.conv3(x))
        x = self.pool(F.relu(self.conv4(x)))
        # classifier
        x = torch.flatten(x, 1)  # flatten layer
        x = F.relu(self.fc1(x)) # fully connected layer
        x = self.dropout(x)
        x = self.fc2(x) # fully connected layer
        return x
    
    
class CNN2(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 16, 3)
        self.conv2 = nn.Conv2d(16, 16, 3)
        self.conv3 = nn.Conv2d(16, 32, 3)
        self.conv4 = nn.Conv2d(32, 32, 3)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(in_features=82080, out_features=120)
        self.fc2 = nn.Linear(in_features=120, out_features=num_classes)
        self.dropout = nn.Dropout(0.25)

    def forward(self, x):
        # feature extractor
        x = F.relu(self.conv1(x))
        x = self.pool(F.relu(self.conv2(x)))
        x = F.relu(self.conv3(x))
        x = self.pool(F.relu(self.conv4(x)))
        # classifier
        x = torch.flatten(x, 1)  # flatten layer
        x = F.relu(self.fc1(x)) # fully connected layer
        #x = self.dropout(x)
        x = self.fc2(x) # fully connected layer
        return x


class CNN4(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 16, 3, padding=1)
        self.conv2 = nn.Conv2d(16, 32, 3, padding=1)
        self.conv3 = nn.Conv2d(32, 64, 3, padding=1)
        self.conv4 = nn.Conv2d(64, 128, 3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(in_features=23040, out_features=120)
        self.fc2 = nn.Linear(in_features=120, out_features=num_classes)
        self.dropout = nn.Dropout(0.25)
        self.conv1_bn = nn.BatchNorm2d(16)
        self.conv2_bn = nn.BatchNorm2d(32)
        self.conv3_bn = nn.BatchNorm2d(64)
        self.conv4_bn = nn.BatchNorm2d(128)
        self.fc_bn = nn.BatchNorm1d(120)

    def forward(self, x):
        # feature extractor
        x = self.conv1_bn(self.conv1(x))
        x = self.pool(F.relu(x))
        x = self.conv2_bn(self.conv2(x))
        x = self.pool(F.relu(x))
        x = self.conv3_bn(self.conv3(x))
        x = self.pool(F.relu(x))
        x = self.conv4_bn(self.conv4(x))
        x = self.pool(F.relu(x))
        # classifier
        x = torch.flatten(x, 1)  # flatten layer
        x = F.relu(self.fc_bn(self.fc1(x))) # fully connected layer
        #x = self.dropout(x)
        x = self.fc2(x) # fully connected layer
        return x
    

# initialise model class
#model = CNN4(num_classes=len(classes))
model = models.vgg16(pretrained=True)


for param in model.parameters(): # freeze CNN to prevent updating weights for transfer learning
    param.required_grad = False

num_ftrs = model.classifier[6].in_features
model.classifier[6] = nn.Linear(num_ftrs, len(classes))

In [None]:
# allocate model to device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # use GPU over CPU if available for faster training
print('device: ', device)

# enable data parallelism if >1 GPU available
if torch.cuda.device_count() > 1:
    print('GPUs available: ', torch.cuda.device_count())
    model = nn.DataParallel(model)
else:
    print('Only 1 GPU available')

model.to(device) # allocate model to device

In [None]:
# training and validation section:
criterion = nn.CrossEntropyLoss()  # specify loss criteria (softmax included)
optimizer = optim.SGD(model.parameters(), lr=0.001)  # specify optimiser to adjust weights

# arrays to track metrics
track_epoch = []
training_loss = []
validation_loss = []
training_accuracy = []
validation_accuracy = []

# running the model:
num_epochs = 10
for epoch in range(1, num_epochs+1):
    train_loss, train_acc = train(model, device, train_loader, optimizer, epoch)
    val_loss, val_acc = validate(model, device, val_loader)
    track_epoch.append(epoch)
    training_loss.append(train_loss)
    validation_loss.append(val_loss)
    training_accuracy.append(train_acc)
    validation_accuracy.append(val_acc)

torch.save(model.state_dict(), 'vgg16.pth')

In [None]:
# create epoch-loss graph
plt.figure(figsize=(15, 15))
plt.plot(track_epoch, training_loss, marker='o')
plt.plot(track_epoch, validation_loss, marker='o')
epoch_int = range(math.floor(min(track_epoch)), math.ceil(max(track_epoch)) + 1)
plt.xticks(epoch_int, fontsize=20)
plt.yticks(fontsize=32)
plt.xlabel('Epoch', fontsize=32)
plt.ylabel('Loss', fontsize=32)
plt.legend(['Training', 'Validation'], loc='upper right', fontsize=32)
plt.title('VGG16 Loss', fontsize=40)
plt.show()

# create epoch-accuracy graph
plt.figure(figsize=(15, 15))
plt.plot(track_epoch, training_accuracy, marker='o')
plt.plot(track_epoch, validation_accuracy, marker='o')
epoch_int = range(math.floor(min(track_epoch)), math.ceil(max(track_epoch)) + 1)
plt.xticks(epoch_int, fontsize=20)
plt.yticks(fontsize=32)
plt.xlabel('Epoch', fontsize=32)
plt.ylabel('Accuracy', fontsize=32)
plt.legend(['Training', 'Validation'], loc='upper right', fontsize=32)
plt.title('VGG16 Accuracy', fontsize=40)
plt.show()

In [14]:
# evaluate final model on test set
model.load_state_dict(torch.load('../input/final/vgg16 (1).pth'), strict=False) # load pretrained final model

# defining labels and predictions on test dataset for the confusion matrix
truelabels = []
predictions = []
model.eval()
correct = 0
batch_count = 0

print("Getting predictions from test set...")
for image, label in test_loader:
    batch_count += 1
    image, label = image.to(device), label.to(device) # uses GPU if possible
    test_output = model(image)  # get predicted classes
    
    # to print accuracy
    _, pred = torch.max(test_output, dim=1)
    correct += torch.sum(label==pred).item()
    
    # track results for the graph
    for label in label.cpu().data.numpy():
        truelabels.append(label)
    for prediction in model(image).cpu().data.numpy().argmax(1):
        predictions.append(prediction)

# calculate total accuracy
accuracy = 100.0 * correct / len(test_loader.dataset)
print('Test set accuracy: {}/{} ({:.3f}%)\n'.format(correct, len(test_loader.dataset), accuracy))


# plot a confusion matrix
cm = confusion_matrix(truelabels, predictions)
tick_marks = np.arange(len(classes))
df_cm = pd.DataFrame(cm, index = classes, columns = classes)
plt.figure(figsize = (15,15))
sns.heatmap(df_cm, annot=True, cmap=plt.cm.Blues, fmt='g')
plt.xticks(fontsize=10)
plt.yticks(fontsize=10)
plt.xlabel("Predicted Label", fontsize = 32)
plt.ylabel("True Label", fontsize = 32)
plt.title('Bad Model Confusion Matrix', fontsize=40)
plt.show()