In [1]:
import torch
import torchvision.models as models

# check if CUDA is available
use_cuda = torch.cuda.is_available()
print(f"CUDA available: {use_cuda}")

CUDA available: True


In [2]:
import os
from torchvision import datasets
import torchvision.transforms as transforms

### TODO: Write data loaders for training, validation, and test sets
## Specify appropriate transforms, and batch_sizes

batch_size = 20
img_size = (224, 224)

transform_train = transforms.Compose([transforms.RandomHorizontalFlip(),
                                      transforms.RandomRotation(20),                                      
                                      transforms.Resize(img_size),                                      
                                      transforms.ToTensor(),
                                      transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])])

transform_valid = transforms.Compose([transforms.Resize(img_size),
                                      transforms.ToTensor(),
                                      transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])])

train_data = datasets.ImageFolder('dogImages/train',transform=transform_train)
val_data = datasets.ImageFolder('dogImages/valid',transform=transform_valid)
test_data = datasets.ImageFolder('dogImages/test',transform=transform_valid)

train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, shuffle = True)
val_loader = torch.utils.data.DataLoader(val_data, batch_size=batch_size)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=batch_size)

loaders_scratch = {'train': train_loader,
                   'test': val_loader,
                   'valid': test_loader}

classes = train_data.classes

In [3]:
import torch.nn as nn
import torch.nn.functional as F

# define the CNN architecture
class Net(nn.Module):
    ### TODO: choose an architecture, and complete the class
    def __init__(self):
        super(Net, self).__init__()
        ## Define layers of a CNN
        # 4 Convolutional layers
        self.conv1_1 = nn.Conv2d(3, 32, 3, padding=1)
        #self.conv1_2 = nn.Conv2d(32, 32, 3, padding=1)
        self.conv2_1 = nn.Conv2d(32, 64, 3, padding=1)
        #self.conv2_2 = nn.Conv2d(64, 64, 3, padding=1)        
        self.conv3_1 = nn.Conv2d(64, 128, 3, padding=1)
        #self.conv3_2 = nn.Conv2d(128, 128, 3, padding=1)        
        self.conv4_1= nn.Conv2d(128, 256, 3, padding=1)
        #self.conv4_2 = nn.Conv2d(256, 256, 3, padding=1)        
        self.conv5_1 = nn.Conv2d(256, 512, 3, padding=1)
        #self.conv5_2 = nn.Conv2d(512, 512, 3, padding=1)
        
        # batch norm layers
        self.conv1_bn = nn.BatchNorm2d(32)
        self.conv2_bn = nn.BatchNorm2d(64)
        self.conv3_bn = nn.BatchNorm2d(128)
        self.conv4_bn = nn.BatchNorm2d(256)
        self.conv5_bn = nn.BatchNorm2d(512)
    
        # max pooling layer
        self.max_pool = nn.MaxPool2d(2, 2)
        
        # linear layers
        self.fc1 = nn.Linear(512 * 7 * 7, 1024)
        self.fc2 = nn.Linear(1024, 512)
        self.output = nn.Linear(512, len(classes))
        
        # dropout layer
        self.dropout_convs = nn.Dropout(0.25)
        self.dropout_linear = nn.Dropout(0.5)
    
    def forward(self, x):
        ## Define forward behavior
        
        # 1st conv layer set
        x = self.conv1_bn(F.relu(self.conv1_1(x)))
        #x = self.conv1_bn(F.relu(self.conv1_2(x)))
        x = self.max_pool(x)
        x = self.dropout_convs(x)
        # 2nd conv layer set
        x = self.conv2_bn(F.relu(self.conv2_1(x)))
        #x = self.conv2_bn(F.relu(self.conv2_2(x)))
        x = self.max_pool(x)
        x = self.dropout_convs(x)       
        # 3rd conv layer set
        x = self.conv3_bn(F.relu(self.conv3_1(x)))
        #x = self.conv3_bn(F.relu(self.conv3_2(x)))
        x = self.max_pool(x)
        x = self.dropout_convs(x)
        # 4th conv layer set
        x = self.conv4_bn(F.relu(self.conv4_1(x)))
        #x = self.conv4_bn(F.relu(self.conv4_2(x)))
        x = self.max_pool(x)
        x = self.dropout_convs(x)
        # 5th conv layer set
        x = self.conv5_bn(F.relu(self.conv5_1(x)))
        #x = self.conv5_bn(F.relu(self.conv5_2(x)))
        x = self.max_pool(x)
        x = self.dropout_convs(x)
        
        # flatten image input
        x = x.view(-1, 512 * 7 * 7)
        
        # 1st hidden layer, with relu activation function and dropout
        x = F.relu(self.fc1(x))
        x = self.dropout_linear(x)
        
        # 2nd hidden layer, with relu activation function and dropout
        x = F.relu(self.fc2(x))
        x = self.dropout_linear(x)
        
        # output layer
        x = self.output(x)
        
        return x

#-#-# You do NOT have to modify the code below this line. #-#-#

# instantiate the CNN
model_scratch = Net()

# move tensors to GPU if CUDA is available
if use_cuda:
    model_scratch.cuda()
    
print(model_scratch)

Net(
  (conv1_1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2_1): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv3_1): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv4_1): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv5_1): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv1_bn): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv2_bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv3_bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv4_bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv5_bn): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (max_pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=25088,

In [4]:
import torch.optim as optim

### TODO: select loss function
criterion_scratch = nn.CrossEntropyLoss()

### TODO: select optimizer
optimizer_scratch = optim.SGD(model_scratch.parameters(), lr=0.01)

In [5]:
# the following import is required for training to be robust to truncated images
import numpy as np

from PIL import ImageFile 
ImageFile.LOAD_TRUNCATED_IMAGES = True

def train(n_epochs, loaders, model, optimizer, criterion, use_cuda, save_path):
    """returns trained model"""
    # initialize tracker for minimum validation loss
    valid_loss_min = np.Inf
    
    for epoch in range(1, n_epochs+1):
        # initialize variables to monitor training and validation loss
        train_loss = 0.0
        valid_loss = 0.0
        
        ###################
        # train the model #
        ###################
        model.train()
        for batch_idx, (data, target) in enumerate(loaders['train']):
            # move to GPU
            if use_cuda:
                data, target = data.cuda(), target.cuda()
            ## find the loss and update the model parameters accordingly
            ## record the average training loss, using something like
            ## train_loss = train_loss + ((1 / (batch_idx + 1)) * (loss.data - train_loss))
            
            # clear the gradients of all optimized variables
            optimizer.zero_grad()
            # forward pass: compute predicted outputs by passing inputs to the model
            output = model(data)
            # calculate the batch loss
            loss = criterion(output, target)
            # backward pass: compute gradient of the loss with respect to model parameters
            loss.backward()
            # perform a single optimization step (parameter update)
            optimizer.step()
            # calculate the train loss
            train_loss = train_loss + ((1 / (batch_idx + 1)) * (loss.data - train_loss))
        
        ######################    
        # validate the model #
        ######################
        model.eval()
        for batch_idx, (data, target) in enumerate(loaders['valid']):
            # move to GPU
            if use_cuda:
                data, target = data.cuda(), target.cuda()
            
            # forward pass to predict the outputs
            output = model(data)
            # compute the loss
            loss = criterion(output,target)
            # update the average validation loss
            valid_loss = valid_loss + ((1 / (batch_idx + 1)) * (loss.data - valid_loss))
            
        # print training/validation statistics 
        print(f"Epoch: {epoch} \tTraining Loss: {train_loss:.6f} \tValidation Loss: {valid_loss:.6f}")
        
        ## save the model if validation loss has decreased
        if valid_loss <= valid_loss_min:
            print(f"Validation loss decreased ({valid_loss_min:.6f} --> {valid_loss:.6f}).  Saving model ...")
            torch.save(model.state_dict(), save_path)
            valid_loss_min = valid_loss
            
    # return trained model
    return model

# train the model
model_scratch = train(40, loaders_scratch, model_scratch, optimizer_scratch, 
                      criterion_scratch, use_cuda, 'model_scratch_test.pt')

# load the model that got the best validation accuracy
model_scratch.load_state_dict(torch.load('model_scratch_test.pt'))

Epoch: 1 	Training Loss: 4.895207 	Validation Loss: 4.783557
Validation loss decreased (inf --> 4.783557).  Saving model ...
Epoch: 2 	Training Loss: 4.670681 	Validation Loss: 4.608133
Validation loss decreased (4.783557 --> 4.608133).  Saving model ...
Epoch: 3 	Training Loss: 4.485846 	Validation Loss: 4.494287
Validation loss decreased (4.608133 --> 4.494287).  Saving model ...
Epoch: 4 	Training Loss: 4.368755 	Validation Loss: 4.480138
Validation loss decreased (4.494287 --> 4.480138).  Saving model ...
Epoch: 5 	Training Loss: 4.272352 	Validation Loss: 4.303110
Validation loss decreased (4.480138 --> 4.303110).  Saving model ...
Epoch: 6 	Training Loss: 4.172677 	Validation Loss: 4.330766
Epoch: 7 	Training Loss: 4.104091 	Validation Loss: 4.347281
Epoch: 8 	Training Loss: 4.043726 	Validation Loss: 4.192131
Validation loss decreased (4.303110 --> 4.192131).  Saving model ...
Epoch: 9 	Training Loss: 3.965282 	Validation Loss: 4.182181
Validation loss decreased (4.192131 --> 4.

<All keys matched successfully>

In [6]:
def test(loaders, model, criterion, use_cuda):

    # monitor test loss and accuracy
    test_loss = 0.
    correct = 0.
    total = 0.

    model.eval()
    for batch_idx, (data, target) in enumerate(loaders['test']):
        # move to GPU
        if use_cuda:
            data, target = data.cuda(), target.cuda()
        # forward pass: compute predicted outputs by passing inputs to the model
        output = model(data)
        # calculate the loss
        loss = criterion(output, target)
        # update average test loss 
        test_loss = test_loss + ((1 / (batch_idx + 1)) * (loss.data - test_loss))
        # convert output probabilities to predicted class
        pred = output.data.max(1, keepdim=True)[1]
        # compare predictions to true label
        correct += np.sum(np.squeeze(pred.eq(target.data.view_as(pred))).cpu().numpy())
        total += data.size(0)
            
    print('Test Loss: {:.6f}\n'.format(test_loss))

    print('\nTest Accuracy: %2d%% (%2d/%2d)' % (
        100. * correct / total, correct, total))

# call test function    
test(loaders_scratch, model_scratch, criterion_scratch, use_cuda)

Test Loss: 3.352300


Test Accuracy: 24% (204/835)
