In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
from torchvision import transforms, datasets
from torch.utils.data.sampler import SubsetRandomSampler
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import numpy as np
from collections import OrderedDict
import time
import math

%matplotlib inline
%config InlineBackend.figure_format = 'retina'

In [11]:
train_dir = 'landmark_images/train'
test_dir = 'landmark_images/test'

batch_size = 50
valid_size = 0.2

mean = np.array([0.485, 0.456, 0.406])
std = np.array([0.229, 0.224, 0.225])
normalization = transforms.Normalize(mean=mean, std=std)

data_transforms = {
  'augment': transforms.Compose([transforms.RandomRotation(30),
                              transforms.RandomResizedCrop(224),
                              transforms.RandomHorizontalFlip(),
                              transforms.ToTensor(),
                              normalization,]),
  'no_augment' : transforms.Compose([transforms.Resize(224),
                              transforms.CenterCrop(224),
                              transforms.ToTensor(),
                              normalization,])
}

image_datasets = {
  'train': datasets.ImageFolder(train_dir, transform=data_transforms['augment']),
  'valid': datasets.ImageFolder(train_dir, transform=data_transforms['no_augment']),
  'test' : datasets.ImageFolder(test_dir, transform=data_transforms['no_augment'])
}

num_train = len(image_datasets['train'])
indices = list(range(num_train))
np.random.shuffle(indices)
split = int(np.floor(valid_size * num_train))
train_idx, valid_idx = indices[split:], indices[:split]

train_sampler = SubsetRandomSampler(train_idx)
valid_sampler = SubsetRandomSampler(valid_idx)

label_mapping = image_datasets['train'].classes
label_mapping = list(map(lambda x: x.split('.')[1].replace('_', ' '), label_mapping))
print(len(label_mapping), 'classes in this dataset')
print(label_mapping)

50 classes in this dataset
['Haleakala National Park', 'Mount Rainier National Park', 'Ljubljana Castle', 'Dead Sea', 'Wroclaws Dwarves', 'London Olympic Stadium', 'Niagara Falls', 'Stonehenge', 'Grand Canyon', 'Golden Gate Bridge', 'Edinburgh Castle', 'Mount Rushmore National Memorial', 'Kantanagar Temple', 'Yellowstone National Park', 'Terminal Tower', 'Central Park', 'Eiffel Tower', 'Changdeokgung', 'Delicate Arch', 'Vienna City Hall', 'Matterhorn', 'Taj Mahal', 'Moscow Raceway', 'Externsteine', 'Soreq Cave', 'Banff National Park', 'Pont du Gard', 'Seattle Japanese Garden', 'Sydney Harbour Bridge', 'Petronas Towers', 'Brooklyn Bridge', 'Washington Monument', 'Hanging Temple', 'Sydney Opera House', 'Great Barrier Reef', 'Monumento a la Revolucion', 'Badlands National Park', 'Atomium', 'Forth Bridge', 'Gateway of India', 'Stockholm City Hall', 'Machu Picchu', 'Death Valley National Park', 'Gullfoss Falls', 'Trevi Fountain', 'Temple of Heaven', 'Great Wall of China', 'Prague Astronomic

In [34]:
loaders_scratch = {
    'train': torch.utils.data.DataLoader(image_datasets['train'], batch_size=batch_size, sampler=train_sampler),
    'valid': torch.utils.data.DataLoader(image_datasets['valid'], batch_size=batch_size, sampler=valid_sampler),
    'test' : torch.utils.data.DataLoader(image_datasets['test'], batch_size=batch_size, shuffle=True),
}

**Question 1:** Describe your chosen procedure for preprocessing the data. 
- How does your code resize the images (by cropping, stretching, etc)?  What size did you pick for the input tensor, and why?
- Did you decide to augment the dataset?  If so, how (through translations, flips, rotations, etc)?  If not, why not?

**Answer**: 

I decided to resize all the input images to 224x224 in anticipation of processing them with a VGG model, which means I also normalized the images to be compatible the the VGG model as well, according to the [documentation](https://pytorch.org/hub/pytorch_vision_vgg/)

For data augmentation I chose to use RandomRotation, RandomResizedCrop, RandomHorizontalFlip to prevent over-fitting

I created a validation dataset out of 50% of the test dataset, I chose to use the test dataset rather than the train dataset because after some [research](https://datascience.stackexchange.com/questions/41422/when-using-data-augmentation-is-it-ok-to-validate-only-with-the-original-images) I found that I should be using un-augmented images for validataion

### (IMPLEMENTATION) Visualize a Batch of Training Data

Use the code cell below to retrieve a batch of images from your train data loader, display at least 5 images simultaneously, and label each displayed image with its class name (e.g., "Golden Gate Bridge").

Visualizing the output of your data loader is a great way to ensure that your data loading and preprocessing are working as expected.

### Initialize use_cuda variable

In [14]:
# useful variable that tells us whether we should use the GPU
use_cuda = torch.cuda.is_available()
print(use_cuda)
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(device)

True
cuda:0


In [None]:
def train(n_epochs, loaders, model, optimizer, criterion, use_cuda, save_path):
    """returns trained model"""
    # initialize tracker for minimum validation loss
    valid_loss_min = np.Inf 
    
    for epoch in range(1, n_epochs+1):
        # initialize variables to monitor training and validation loss
        train_loss = 0.0
        valid_loss = 0.0
        
        ###################
        # train the model #
        ###################
        # set the module to training mode
        model.train()
        for batch_idx, (data, target) in enumerate(loaders['train']):
            # move to GPU
            if use_cuda:
                data, target = data.cuda(), target.cuda()

            optimizer.zero_grad()
            log_ps = model.forward(data)
            loss = criterion(log_ps, target)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()            

        ######################    
        # validate the model #
        ######################
        # set the model to evaluation mode
        model.eval()
        for batch_idx, (data, target) in enumerate(loaders['valid']):
            # move to GPU
            if use_cuda:
                data, target = data.cuda(), target.cuda()

            with torch.no_grad():
                log_ps = model.forward(data)
                loss = criterion(log_ps, target)
                valid_loss += loss.item()            

        train_loss = train_loss/len(loaders['train'])
        valid_loss = valid_loss/len(loaders['valid'])
        # print training/validation statistics 
        print('Epoch: {} \tTraining Loss: {:.6f} \tValidation Loss: {:.6f}'.format(
            epoch, 
            train_loss,
            valid_loss
            ))

        if valid_loss < valid_loss_min:
            print('Validation loss has decreased, Saving...')
            valid_loss_min = valid_loss
            torch.save(model.state_dict(), save_path)
        
    return model

In [None]:
def test(loaders, model, criterion, use_cuda):

    # monitor test loss and accuracy
    test_loss = 0.
    correct = 0.
    total = 0.

    # set the module to evaluation mode
    model.eval()

    for data, target in loaders['test']:
        # move to GPU
        if use_cuda:
            data, target = data.cuda(), target.cuda()
        with torch.no_grad():
            # forward pass: compute predicted outputs by passing inputs to the model
            log_ps = model.forward(data)
            ps = torch.exp(log_ps)
            # calculate the loss
            loss = criterion(log_ps, target)
            # update average test loss 
            test_loss += loss.item()
            # convert output probabilities to predicted class
            top_p, top_class = ps.topk(1, dim=1)
            # compare predictions to true label
            correct += torch.sum((top_class == target.reshape(*top_class.shape)).type(torch.FloatTensor)).item()
            total += data.size(0)

    
    test_loss = test_loss/len(loaders['test'])
    print('Test Loss: {:.6f}\n'.format(test_loss))

    print('\nTest Accuracy: %2d%% (%2d/%2d)' % (
        100. * correct / total, correct, total))

### (IMPLEMENTATION) Specify Loss Function and Optimizer

Use the next code cell to specify a [loss function](http://pytorch.org/docs/stable/nn.html#loss-functions) and [optimizer](http://pytorch.org/docs/stable/optim.html).  Save the chosen loss function as `criterion_scratch`, and fill in the function `get_optimizer_scratch` below.

### (IMPLEMENTATION) Model Architecture

Create a CNN to classify images of landmarks.  Use the template in the code cell below.

In [67]:
criterion_scratch = nn.NLLLoss()

def get_optimizer_scratch(model):
    return optim.Adam(model.parameters()) 

# original

import torch.nn as nn

# define the CNN architecture
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        # sees 224x224x3 tesnsor
        self.conv1 = nn.Conv2d(3, 16, 3, stride=2, padding=1)
        # sees 112x112x16 tensor
        self.conv2 = nn.Conv2d(16, 32, 3, stride=2, padding=1)
        # sees 56x56x32 tensor
        self.conv3 = nn.Conv2d(32, 64, 3, stride=2, padding=1)
        # sees 28x28x64 tensor
        self.conv4 = nn.Conv2d(64, 64, 3, stride=2, padding=1)
        # outputs 14x14x64 tensor
        # expects flattened tensor with 12544 features
        self.classifier = nn.Sequential(OrderedDict([
            ('fc1', nn.Linear(12544, 500)),
            ('relu1', nn.ReLU()),
            ('dropout1', nn.Dropout(p=0.2)),
            ('fc2', nn.Linear(500, 250)),
            ('relu2', nn.ReLU()),
            ('dropout2', nn.Dropout(p=0.2)),
            ('fc3', nn.Linear(250, 100)),
            ('relu3', nn.ReLU()),
            ('dropout3', nn.Dropout(p=0.2)),
            ('fc_final', nn.Linear(100, 50)),
            ('log_output', nn.LogSoftmax(dim=1))
        ]))
    
    def forward(self, x):

        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = F.relu(self.conv3(x))
        x = F.relu(self.conv4(x))
        
        x = x.view(-1, 14 * 14 * 64)

        x = self.classifier(x)
        
        self.log_ps = x
        return x

#-#-# Do NOT modify the code below this line. #-#-#

# instantiate the CNN
model_scratch = Net()

# move tensors to GPU if CUDA is available
if use_cuda:
    model_scratch.cuda()

In [None]:
model_scratch = train(20, loaders_scratch, model_scratch, get_optimizer_scratch(model_scratch),
                      criterion_scratch, use_cuda, 'ignore.pt')
                      #original

In [None]:
model_scratch.load_state_dict(torch.load('ignore.pt'))
test(loaders_scratch, model_scratch, criterion_scratch, use_cuda)

In [None]:
criterion_scratch = nn.NLLLoss()

def get_optimizer_scratch(model):
    return optim.Adam(model.parameters()) 

# maxpool only

import torch.nn as nn

# define the CNN architecture
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        # sees 224x224x3 tesnsor
        self.conv1 = nn.Conv2d(3, 16, 3, stride=1, padding=1)
        self.conv1_bn = nn.BatchNorm2d(16)
        # sees 112x112x16 tensor
        self.conv2 = nn.Conv2d(16, 32, 3, stride=1, padding=1)
        self.conv2_bn = nn.BatchNorm2d(32)
        # sees 56x56x32 tensor
        self.conv3 = nn.Conv2d(32, 64, 3, stride=1, padding=1)
        self.conv3_bn = nn.BatchNorm2d(64)
        # sees 28x28x64 tensor
        self.conv4 = nn.Conv2d(64, 64, 3, stride=1, padding=1)
        self.conv4_bn = nn.BatchNorm2d(64)
        # outputs 14x14x64 tensor
        # expects flattened tensor with 12544 features
        self.classifier = nn.Sequential(OrderedDict([
            ('fc1', nn.Linear(12544, 500)),
            # ('fc1_bn', nn.BatchNorm1d(500)),
            ('relu1', nn.ReLU()),
            ('dropout1', nn.Dropout(p=0.2)),
            ('fc2', nn.Linear(500, 250)),
            ('relu2', nn.ReLU()),
            # ('fc2_bn', nn.BatchNorm1d(250)),
            ('dropout2', nn.Dropout(p=0.2)),
            ('fc3', nn.Linear(250, 100)),
            # ('fc3_bn', nn.BatchNorm1d(100)),
            ('relu3', nn.ReLU()),
            ('dropout3', nn.Dropout(p=0.2)),
            ('fc_final', nn.Linear(100, 50)),
            # ('fc_final_bn', nn.BatchNorm1d(50)),
            ('log_output', nn.LogSoftmax(dim=1))
        ]))
    
    def forward(self, x):


        x = F.relu(self.conv1(x))
        x = F.max_pool2d(x,2)

        x = F.relu(self.conv2(x))
        x = F.max_pool2d(x,2)

        x = F.relu(self.conv3(x))
        x = F.max_pool2d(x,2)

        x = F.relu(self.conv4(x))
        x = F.max_pool2d(x,2)
        x = x.view(-1, 14 * 14 * 64)

        x = self.classifier(x)
        
        self.log_ps = x
        return x

#-#-# Do NOT modify the code below this line. #-#-#

# instantiate the CNN
model_scratch = Net()

# move tensors to GPU if CUDA is available
if use_cuda:
    model_scratch.cuda()

In [None]:
model_scratch = train(20, loaders_scratch, model_scratch, get_optimizer_scratch(model_scratch),
                      criterion_scratch, use_cuda, 'ignore.pt')
                      #maxpooling only 

In [None]:
model_scratch.load_state_dict(torch.load('ignore.pt'))
test(loaders_scratch, model_scratch, criterion_scratch, use_cuda)

In [None]:
criterion_scratch = nn.NLLLoss()

def get_optimizer_scratch(model):
    return optim.Adam(model.parameters()) 

# batch norm only

import torch.nn as nn

# define the CNN architecture
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        # sees 224x224x3 tesnsor
        self.conv1 = nn.Conv2d(3, 16, 3, stride=2, padding=1)
        self.conv1_bn = nn.BatchNorm2d(16)
        # sees 112x112x16 tensor
        self.conv2 = nn.Conv2d(16, 32, 3, stride=2, padding=1)
        self.conv2_bn = nn.BatchNorm2d(32)
        # sees 56x56x32 tensor
        self.conv3 = nn.Conv2d(32, 64, 3, stride=2, padding=1)
        self.conv3_bn = nn.BatchNorm2d(64)
        # sees 28x28x64 tensor
        self.conv4 = nn.Conv2d(64, 64, 3, stride=2, padding=1)
        self.conv4_bn = nn.BatchNorm2d(64)
        # outputs 14x14x64 tensor
        # expects flattened tensor with 12544 features
        self.classifier = nn.Sequential(OrderedDict([
            ('fc1', nn.Linear(12544, 500)),
            ('fc1_bn', nn.BatchNorm1d(500)),
            ('relu1', nn.ReLU()),
            ('dropout1', nn.Dropout(p=0.2)),
            ('fc2', nn.Linear(500, 250)),
            ('relu2', nn.ReLU()),
            ('fc2_bn', nn.BatchNorm1d(250)),
            ('dropout2', nn.Dropout(p=0.2)),
            ('fc3', nn.Linear(250, 100)),
            ('fc3_bn', nn.BatchNorm1d(100)),
            ('relu3', nn.ReLU()),
            ('dropout3', nn.Dropout(p=0.2)),
            ('fc_final', nn.Linear(100, 50)),
            ('fc_final_bn', nn.BatchNorm1d(50)),
            ('log_output', nn.LogSoftmax(dim=1))
        ]))
    
    def forward(self, x):

        x = self.conv1(x)
        x = F.relu(self.conv1_bn(x))
        x = self.conv2(x)
        x = F.relu(self.conv2_bn(x))
        x = self.conv3(x)
        x = F.relu(self.conv3_bn(x))
        x = self.conv4(x)
        x = F.relu(self.conv4_bn(x))
        
        x = x.view(-1, 14 * 14 * 64)

        x = self.classifier(x)
        
        self.log_ps = x
        return x

#-#-# Do NOT modify the code below this line. #-#-#

# instantiate the CNN
model_scratch = Net()

# move tensors to GPU if CUDA is available
if use_cuda:
    model_scratch.cuda()

In [None]:
model_scratch = train(20, loaders_scratch, model_scratch, get_optimizer_scratch(model_scratch),
                      criterion_scratch, use_cuda, 'ignore.pt')
                      #batch norm

In [None]:
model_scratch.load_state_dict(torch.load('ignore.pt'))
test(loaders_scratch, model_scratch, criterion_scratch, use_cuda)

In [None]:
criterion_scratch = nn.NLLLoss()

def get_optimizer_scratch(model):
    return optim.Adam(model.parameters()) 

# maxpool and batch norm

import torch.nn as nn

# define the CNN architecture
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        # sees 224x224x3 tesnsor
        self.conv1 = nn.Conv2d(3, 16, 3, stride=1, padding=1)
        self.conv1_bn = nn.BatchNorm2d(16)
        # sees 112x112x16 tensor
        self.conv2 = nn.Conv2d(16, 32, 3, stride=1, padding=1)
        self.conv2_bn = nn.BatchNorm2d(32)
        # sees 56x56x32 tensor
        self.conv3 = nn.Conv2d(32, 64, 3, stride=1, padding=1)
        self.conv3_bn = nn.BatchNorm2d(64)
        # sees 28x28x64 tensor
        self.conv4 = nn.Conv2d(64, 64, 3, stride=1, padding=1)
        self.conv4_bn = nn.BatchNorm2d(64)
        # outputs 14x14x64 tensor
        # expects flattened tensor with 12544 features
        self.classifier = nn.Sequential(OrderedDict([
            ('fc1', nn.Linear(12544, 500)),
            ('fc1_bn', nn.BatchNorm1d(500)),
            ('relu1', nn.ReLU()),
            ('dropout1', nn.Dropout(p=0.2)),
            ('fc2', nn.Linear(500, 250)),
            ('relu2', nn.ReLU()),
            ('fc2_bn', nn.BatchNorm1d(250)),
            ('dropout2', nn.Dropout(p=0.2)),
            ('fc3', nn.Linear(250, 100)),
            ('fc3_bn', nn.BatchNorm1d(100)),
            ('relu3', nn.ReLU()),
            ('dropout3', nn.Dropout(p=0.2)),
            ('fc_final', nn.Linear(100, 50)),
            ('fc_final_bn', nn.BatchNorm1d(50)),
            ('log_output', nn.LogSoftmax(dim=1))
        ]))
    
    def forward(self, x):

        x = self.conv1(x)
        x = F.relu(self.conv1_bn(x))
        x = F.max_pool2d(x,2)
        x = self.conv2(x)
        x = F.relu(self.conv2_bn(x))
        x = F.max_pool2d(x,2)
        x = self.conv3(x)
        x = F.relu(self.conv3_bn(x))
        x = F.max_pool2d(x,2)
        x = self.conv4(x)
        x = F.relu(self.conv4_bn(x))
        x = F.max_pool2d(x,2)
        x = x.view(-1, 14 * 14 * 64)

        x = self.classifier(x)
        
        self.log_ps = x
        return x

#-#-# Do NOT modify the code below this line. #-#-#

# instantiate the CNN
model_scratch = Net()

# move tensors to GPU if CUDA is available
if use_cuda:
    model_scratch.cuda()

In [None]:
model_scratch = train(20, loaders_scratch, model_scratch, get_optimizer_scratch(model_scratch),
                      criterion_scratch, use_cuda, 'ignore.pt')
                      #batch norm + max pooling

In [None]:
model_scratch.load_state_dict(torch.load('ignore.pt'))
test(loaders_scratch, model_scratch, criterion_scratch, use_cuda)