In [69]:
### let's get some of the necessary libraries in here

import sys
import os
import torch
from torch import nn
import torch.nn.functional as F
import torchvision.transforms as transforms  
import torchvision
import torch.nn as nn
import os
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import time

import cv2
import pandas as pd
import torchvision.transforms as transform 
from torchvision.transforms import ToTensor, Normalize, Resize
from torch.utils.data import DataLoader
from torch.utils.data.sampler import SubsetRandomSampler
from torch.autograd import Variable
from torchvision.datasets import ImageFolder
from torch.utils.data import random_split

In [4]:
dataTrain = "/Users/John/Documents/allProjects/intelData/seg_train/seg_train" 
dataTest = "/Users/John/Documents/allProjects/intelData/seg_test/seg_test"
dataPred = "/Users/John/Documents/allProjects/intelData/seg_pred/seg_pred"

## the folders in the training data give us an easy list of labels
labels = os.listdir(dataTrain)

In [5]:
labels

['forest', 'buildings', 'glacier', 'street', 'mountain', 'sea']

### Normalizing image tensors
This process is important because it helps minimize the impact of image brightness and contrast amongst all images. Unless the images are taken school yearbook style - same location, lighting, camera, etc. - then there will inevitably be some differences among the images.

In [6]:
trainTemp = ImageFolder(dataTrain, transform = transforms.Compose([
    transforms.Resize(64),
    transforms.RandomCrop(64),
    transforms.ToTensor(),
]))
trainDL = DataLoader(trainTemp, 64, shuffle=True)

In [7]:
## do this to extract a single image
for (image, label) in list(enumerate(trainDL))[:1]:
    print(image)

0
0


Our dimensions are [64, 3, 64, 64] so we only need the means and sds for positions 0, 2, 3. We can discared 1 via the dim argument

In [8]:
label[0].size()

torch.Size([64, 3, 64, 64])

In [9]:
def calculateMeanSD(DL):
    """
    This function will calculate the mean and sum of squared mean for the data in a
    DataLoader. I adjusted it specifically to this dataset via the dim argument, skipping
    index 1 because that was not the data required. This function may require other adjustments
    for other datasets.
    """

    ## initialize the three variables as zero
    runningSum, sumSquared, batches = 0,0,0
    
    ## extract the data from the DataLoader and calculate the sums and sum squared
    for data, label in DL:
        runningSum += torch.mean(data, dim = ([0,2,3]))
        sumSquared += torch.mean(data**2, dim = ([0,2,3]))
        batches += 1

    ## simple calcs of     
    mean = runningSum/batches
    std = (sumSquared/batches - mean**2)**0.5
    return mean,std

In [10]:
mean, sd  = calculateMeanSD(trainDL)

Our means and standard deviations are below. This will inform the normalize step of any transformations within the data.

### Augmenting training data
The key here is to add in some randomness so that the CNN detects changes in the image. CNNs are shift invariant, meaning they will detect key details regardless of the position, yet they would struggle if I flipped, cropped, stretched, etc. the images. As such, we'll add in some of that randomness to help the CNN perform better on images with unique traits.

To do this I, resize, add a random crop, a random color jitter, and a random horizontal flip. This is quite a bit of randomness, which should introduce the CNN to a lot of different varieties. I'll likely want to crank the epochs to make sure the CNN "sees" everything.

In [11]:
## we'll do some augmentation on the training data
trainTransform = transform.Compose([
    transform.Resize((64,64)),
    transform.RandomCrop((64,64)),
    transforms.ColorJitter(0.3,0.4,0.4,0.2),
    transform.RandomHorizontalFlip(), ## default is p = 0.5
    transform.ToTensor(),
    transform.Normalize((mean[0],mean[1],mean[2]), (sd[0], sd[1], sd[2]))
])

We do not augment test data because we are evaluating the model's ability to correctly identify the images as opposed to preparing it to identify key attributes anywhere.

In [12]:
testTransform = transform.Compose([
    transform.Resize((64,64)),
    transform.ToTensor(),
    transform.Normalize((mean[0],mean[1],mean[2]), (sd[0], sd[1], sd[2]))
])

In [13]:
train = ImageFolder(dataTrain, transform = trainTransform)
test = ImageFolder(dataTest, transform = testTransform)

## Modeling

In [14]:
seedA = 33
torch.manual_seed(seedA);

In [15]:
## set the sizes and train test split the data
train_size = int(0.8*len(train))
val_size = int(len(train) - train_size)
print(train_size, val_size)

trainData, valData = random_split(train, [train_size, val_size])

11227 2807
11227 2807


Set parameters and the device. We'd like to use my gpu if it is available. If it is not, we can use the CPU.

In [26]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
batch_size = 64
classes = len(labels)
learning_rate = 0.001
epochs = 50

In [21]:
## define the dataloaders
train_dl = DataLoader(trainData, batch_size, shuffle=True, num_workers=2, pin_memory=True)
valid_dl = DataLoader(valData, batch_size*2, num_workers=2, pin_memory=True)
test_dl = DataLoader(test, batch_size*2, num_workers=2, pin_memory=True)

### Definine the Neural Network

I'm going to do one bespoke CNN and one pretrained model for comparison purposes. To build a bespoke NN with pytorch we need to define the class with nn.Module. 

In [82]:
class BespokeCNN(nn.Module):
    """
    This is an extremely basic CNN, I doubt it will perform as well as the
    pretrained models.
    """
	## Decide upon some layers within the network
    def __init__(self, classes):
        super(BespokeCNN, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3)
        self.max_pool1 = nn.MaxPool2d(kernel_size = 2, stride = 2)
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3)
        self.max_pool2 = nn.MaxPool2d(kernel_size = 2, stride = 2)

        ## fully connected layers with Softmax as the activation function
        ## Using softmax because this is a multi-classification problem
        self.fc1 = nn.Linear(12544, 128)
        self.softmax = nn.Softmax(dim = 1)
        self.fc2 = nn.Linear(128, classes)
    
    ## defines how the data moves across the layers
    def forward(self,x):
        forw = self.conv1(x)
        forw = self.max_pool1(forw)
        
        forw = self.conv2(forw)
        forw = self.max_pool2(forw)
                
        forw = forw.reshape(forw.size(0), -1)
        
        forw = self.fc1(forw)
        forw = self.softmax(forw)
        forw = self.fc2(forw)

        return forw

In [83]:
## define the model from the class we built
model = BespokeCNN(classes)

In [84]:
def trainModel(model, epochs, train_dl, valid_dl, learning_rate):
    """
    This function will train the provided model printing the accuracy for the validation data.
    It takes the model, number of epochs, and two data loaders and then runs the model through the 
    training process. If we want to change the loss function or optimizer, we can in the code below
    """
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, weight_decay = 0.001, momentum = 0.9)  
    #total_step = len(train_dl)


    for epoch in range(epochs):
        start = time.time()
        #Load in the data in batches using the train_loader object
        for i, (images, labels) in enumerate(train_dl):  
            # Move tensors to the configured device
            images = images.to(device)
            labels = labels.to(device)
            
            # Forward pass
            outputs = model(images)
            loss = criterion(outputs, labels)
            
            # Backward and optimize
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        ## let's monitor the prediction accuracy on the validation data for each epoch
        ## this may indicate a good stopping point for the particular nertal network
        with torch.no_grad():
            correct = 0
            total = 0
            for images, labels in valid_dl:
                images = images.to(device)
                labels = labels.to(device)
                outputs = model2(images)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
            print('Accuracy on {} validation images: {:.4f} %'.format(val_size, 100 * correct / total))

        end = time.time()
        print('Epoch [{}/{}], Loss: {:.4f}, Time in Seconds: {:.4f}'.format(epoch+1, num_epochs, loss.item(), end - start))

    return model

In [85]:
model = trainModel(model, epochs, train_dl, valid_dl, learning_rate) 

Accuracy on 2807 validation images: 17.9195 %
Epoch [1/50], Loss: 1.8011, Time in Seconds: 23.7436
Accuracy on 2807 validation images: 17.9195 %
Epoch [1/50], Loss: 1.8011, Time in Seconds: 23.7436


Accuracy on 2807 validation images: 18.5607 %
Epoch [2/50], Loss: 1.7996, Time in Seconds: 23.3876
Accuracy on 2807 validation images: 18.5607 %
Epoch [2/50], Loss: 1.7996, Time in Seconds: 23.3876


Accuracy on 2807 validation images: 18.2401 %
Epoch [3/50], Loss: 1.7967, Time in Seconds: 23.1383
Accuracy on 2807 validation images: 18.2401 %
Epoch [3/50], Loss: 1.7967, Time in Seconds: 23.1383


Accuracy on 2807 validation images: 18.0264 %
Epoch [4/50], Loss: 1.7942, Time in Seconds: 23.6105
Accuracy on 2807 validation images: 18.0264 %
Epoch [4/50], Loss: 1.7942, Time in Seconds: 23.6105


Accuracy on 2807 validation images: 17.8839 %
Epoch [5/50], Loss: 1.7932, Time in Seconds: 23.2763
Accuracy on 2807 validation images: 17.8839 %
Epoch [5/50], Loss: 1.7932, Time in Seconds: 23.2763


Accuracy on 2807 validation images: 18.3826 %
Epoch [6/50], Loss: 1.7780, Time in Seconds: 23.8703
Accuracy on 2807 validation images: 18.3826 %
Epoch [6/50], Loss: 1.7780, Time in Seconds: 23.8703


Accuracy on 2807 validation images: 18.3114 %
Epoch [7/50], Loss: 1.7829, Time in Seconds: 23.7921
Accuracy on 2807 validation images: 18.3114 %
Epoch [7/50], Loss: 1.7829, Time in Seconds: 23.7921


Accuracy on 2807 validation images: 18.5251 %
Epoch [8/50], Loss: 1.7608, Time in Seconds: 23.5456
Accuracy on 2807 validation images: 18.5251 %
Epoch [8/50], Loss: 1.7608, Time in Seconds: 23.5456


Accuracy on 2807 validation images: 17.9195 %
Epoch [9/50], Loss: 1.7262, Time in Seconds: 23.5719
Accuracy on 2807 validation images: 17.9195 %
Epoch [9/50], Loss: 1.7262, Time in Seconds: 23.5719


Accuracy on 2807 validation images: 18.3114 %
Epoch [10/50], Loss: 1.7280, Time in Seconds: 23.5538
Accuracy on 2807 validation images: 18.3114 %
Epoch [10/50], Loss: 1.7280, Time in Seconds: 23.5538


Accuracy on 2807 validation images: 18.0264 %
Epoch [11/50], Loss: 1.6231, Time in Seconds: 23.5811
Accuracy on 2807 validation images: 18.0264 %
Epoch [11/50], Loss: 1.6231, Time in Seconds: 23.5811


Accuracy on 2807 validation images: 18.5607 %
Epoch [12/50], Loss: 1.5918, Time in Seconds: 23.6162
Accuracy on 2807 validation images: 18.5607 %
Epoch [12/50], Loss: 1.5918, Time in Seconds: 23.6162


Accuracy on 2807 validation images: 18.2757 %
Epoch [13/50], Loss: 1.5591, Time in Seconds: 23.4580
Accuracy on 2807 validation images: 18.2757 %
Epoch [13/50], Loss: 1.5591, Time in Seconds: 23.4580


Accuracy on 2807 validation images: 17.9551 %
Epoch [14/50], Loss: 1.6567, Time in Seconds: 23.7193
Accuracy on 2807 validation images: 17.9551 %
Epoch [14/50], Loss: 1.6567, Time in Seconds: 23.7193


Accuracy on 2807 validation images: 17.7414 %
Epoch [15/50], Loss: 1.6063, Time in Seconds: 23.4093
Accuracy on 2807 validation images: 17.7414 %
Epoch [15/50], Loss: 1.6063, Time in Seconds: 23.4093


Accuracy on 2807 validation images: 17.7057 %
Epoch [16/50], Loss: 1.4723, Time in Seconds: 23.9243
Accuracy on 2807 validation images: 17.7057 %
Epoch [16/50], Loss: 1.4723, Time in Seconds: 23.9243


The accuracy improved as the model saw more data, which is usually the case; however it's poor. My guess is that the BespokeCNN is quite shallow. I'm going to add some layers to make it a deeper CNN and retest.

In [None]:
class DeeperCNN(nn.Module):
    """
    This is an extremely basic CNN, I doubt it will perform as well as the
    pretrained models.
    """
	## Decide upon some layers within the network
    def __init__(self, classes):
        super(BespokeCNN, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3)
        self.conv1 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3)
        self.conv1 = nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3)
        self.max_pool1 = nn.MaxPool2d(kernel_size = 2, stride = 2)
        self.conv2 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3)
        self.conv2 = nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3)
        self.max_pool2 = nn.MaxPool2d(kernel_size = 2, stride = 2)

        ## fully connected layers with Softmax as the activation function
        ## Using softmax because this is a multi-classification problem
        self.fc1 = nn.Linear(12544, 128)
        self.softmax = nn.Softmax(dim = 1)
        self.fc2 = nn.Linear(128, classes)
    
    ## defines how the data moves across the layers
    def forward(self,x):
        forw = self.conv1(x)
        forw = self.max_pool1(forw)
        
        forw = self.conv2(forw)
        forw = self.max_pool2(forw)
                
        forw = forw.reshape(forw.size(0), -1)
        
        forw = self.fc1(forw)
        forw = self.softmax(forw)
        forw = self.fc2(forw)

        return forw