In [44]:
### let's get some of the necessary libraries in here

import sys
import os
import torch
from torch import nn
import torch.nn.functional as F
import torchvision.transforms as transforms  
import torchvision
import os
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import cv2
import pandas as pd
import torchvision.transforms as transform 
from torchvision.transforms import ToTensor, Normalize, Resize
from torch.utils.data import DataLoader
from torch.utils.data.sampler import SubsetRandomSampler
from torch.autograd import Variable
from torchvision.datasets import ImageFolder

In [2]:
dataTrain = "/Users/John/Documents/allProjects/intelData/seg_train/seg_train" 
dataTest = "/Users/John/Documents/allProjects/intelData/seg_test/seg_test"
dataPred = "/Users/John/Documents/allProjects/intelData/seg_pred/seg_pred"

## the folders in the training data give us an easy list of labels
labels = os.listdir(dataTrain)

In [3]:
labels

['forest', 'buildings', 'glacier', 'street', 'mountain', 'sea']

### Normalizing image tensors
This process is important because it helps minimize the impact of image brightness and contrast amongst all images. Unless the images are taken school yearbook style - same location, lighting, camera, etc. - then there will inevitably be some differences among the images.

In [15]:
train = ImageFolder(dataTrain, transform = transforms.Compose([
    transforms.Resize(64),
    transforms.RandomCrop(64),
    transforms.ToTensor(),
]))
trainDL = DataLoader(train, 64, shuffle=True)

In [35]:
## do this to extract a single image
for (image, label) in list(enumerate(trainDL))[:1]:
    print(image)

0
0


Our dimensions are [64, 3, 64, 64] so we only need the means and sds for positions 0, 2, 3. We can discared 1 via the dim argument

In [52]:
label[0].size()

torch.Size([64, 3, 64, 64])

In [53]:
def calculateMeanSD(DL):
    """
    This function will calculate the mean and sum of squared mean for the data in a
    DataLoader. I adjusted it specifically to this dataset via the dim argument, skipping
    index 1 because that was not the data required. This function may require other adjustments
    for other datasets.
    """

    ## initialize the three variables as zero
    runningSum, sumSquared, batches = 0,0,0
    
    ## extract the data from the DataLoader and calculate the sums and sum squared
    for data, label in DL:
        runningSum += torch.mean(data, dim = ([0,2,3]))
        sumSquared += torch.mean(data**2, dim = ([0,2,3]))
        batches += 1

    ## simple calcs of     
    mean = runningSum/batches
    std = (sumSquared/batches - mean**2)**0.5
    return mean,std

In [40]:
mean, sd  = calculateMeanSD(trainDL)

Our means and standard deviations are below. This will inform the normalize step of any transformations within the data.

### Augmenting training data
The key here is to add in some randomness so that the CNN detects changes in the image. CNNs are shift invariant, meaning they will detect key details regardless of the position, yet they would struggle if I flipped, cropped, stretched, etc. the images. As such, we'll add in some of that randomness to help the CNN perform better on images with unique traits.

To do this I, resize, add a random crop, a random color jitter, and a random horizontal flip. This is quite a bit of randomness, which should introduce the CNN to a lot of different varieties. I'll likely want to crank the epochs to make sure the CNN "sees" everything.

In [47]:
## we'll do some augmentation on the training data
trainTransform = transform.Compose([
    transform.Resize((150,150)),
    transform.RandomCrop((64,64)),
    transforms.ColorJitter(0.3,0.4,0.4,0.2),
    transform.RandomHorizontalFlip(), ## default is p = 0.5
    transform.ToTensor(),
    transform.Normalize((mean[0],mean[1],mean[2]), (sd[0], sd[1], sd[2]))
])

We do not augment test data because we are evaluating the model's ability to correctly identify the images as opposed to preparing it to identify key attributes anywhere.

In [54]:
testTransform = transform.Compose([
    transform.Resize((150,150)),
    transform.ToTensor(),
    transform.Normalize((mean[0],mean[1],mean[2]), (sd[0], sd[1], sd[2]))
])