In [1]:
import torch
import torchvision.models as models

# check if CUDA is available
use_cuda = torch.cuda.is_available()
print(f"CUDA available: {use_cuda}")

CUDA available: True


In [2]:
import os
from torchvision import datasets
import torchvision.transforms as transforms

### TODO: Write data loaders for training, validation, and test sets
## Specify appropriate transforms, and batch_sizes

TARGET_SIZE = (224, 224)
BATCH_SIZE = 20
DATA_DIR = "dogImages"

standard_normalization = transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
#standard_normalization = transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
train_transforms = transforms.Compose([transforms.RandomHorizontalFlip(),
                                        #CustomRotation(angles=[30, 45, 20]),
                                        transforms.RandomRotation(40),
                                        transforms.Resize(TARGET_SIZE),
                                        transforms.ToTensor(),
                                        # ColorTransform("hsv"),
                                        standard_normalization])
                                        #transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])

val_transforms = transforms.Compose([transforms.Resize(TARGET_SIZE),
                                        transforms.ToTensor(),
                                        standard_normalization
                                        #ColorTransform("hsv"),
                                        ])

test_transforms = transforms.Compose([transforms.Resize(TARGET_SIZE),
                                    transforms.ToTensor(),
                                  standard_normalization
                                    #ColorTransform("hsv"),
                                    ])


train_data = datasets.ImageFolder(DATA_DIR + '/train', transform=train_transforms)
val_data = datasets.ImageFolder(DATA_DIR + '/valid', transform=val_transforms)
test_data = datasets.ImageFolder(DATA_DIR + '/test', transform=test_transforms)
classes = train_data.classes

train_loader = torch.utils.data.DataLoader(train_data, batch_size=BATCH_SIZE, shuffle = True)
val_loader = torch.utils.data.DataLoader(val_data, batch_size=BATCH_SIZE)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=BATCH_SIZE)

loaders_scratch = {}
loaders_scratch["train"] = train_loader
loaders_scratch["valid"] = val_loader
loaders_scratch["test"] = test_loader

In [3]:
import torch.nn as nn
import torch.nn.functional as F

class Net(nn.Module):
    ### TODO: choose an architecture, and complete the class
    def __init__(self, ):
        super(Net, self).__init__()        
        # max pool = 2, # input_size / max pool 
        self.conv_block_1 = self.conv_block(3, 16) 
        self.conv_block_2 = self.conv_block(16, 32)
        self.conv_block_3 = self.conv_block(32, 64)
        self.conv_block_4 = self.conv_block(64, 128)
        self.conv_block_5 = self.conv_block(128, 256)
        self.conv_block_6 = self.conv_block(256, 512)
        # dropout layers
        self.dropout1 = nn.Dropout(0.2)        
        self.dropout2 = nn.Dropout(0.5)                
        self.flatten = nn.Flatten()        
        # fully connected layers
        #self.fc1 = nn.Linear(256 * 7 * 7, 2000)
        self.fc1 = nn.Linear(512 * 3 * 3, 2000)
        self.fc2 = nn.Linear(2000,133)# 133 => num classes

    def conv_block(self, in_channels, out_channels):
        return nn.Sequential(
            nn.Conv2d(in_channels, out_channels, 3, padding=1),
            nn.Conv2d(out_channels, out_channels, 3, padding=1),
            nn.ReLU(),            
            nn.MaxPool2d((2, 2)),
            nn.BatchNorm2d(out_channels)
        )
    def forward(self, x):
        ## extra features        
        x = self.conv_block_1(x)        
        x = self.conv_block_2(x)
        x = self.conv_block_3(x)
        x = self.conv_block_4(x)
        x = self.conv_block_5(x)
        x = self.conv_block_6(x)                
        # flatten features
        x = self.dropout1(x)
        x = self.flatten(x)
        #x = x.view(-1, 512 * 1 * 1)                
        x = F.relu(self.fc1(x))        
        x = self.dropout2(x)
        x = self.fc2(x)
        return x

model_scratch = Net()
use_cuda = torch.cuda.is_available()
# move tensors to GPU if CUDA is available
if use_cuda:
    model_scratch.cuda()

In [4]:
import torch.optim as optim

### TODO: select loss function
criterion_scratch = nn.CrossEntropyLoss()

### TODO: select optimizer
optimizer_scratch = optim.SGD(model_scratch.parameters(), lr=0.01)

In [5]:
# the following import is required for training to be robust to truncated images
import numpy as np

from PIL import ImageFile 
ImageFile.LOAD_TRUNCATED_IMAGES = True

def train(n_epochs, loaders, model, optimizer, criterion, use_cuda, save_path):
    """returns trained model"""
    # initialize tracker for minimum validation loss
    valid_loss_min = np.Inf
    
    for epoch in range(1, n_epochs+1):
        # initialize variables to monitor training and validation loss
        train_loss = 0.0
        valid_loss = 0.0
        
        ###################
        # train the model #
        ###################
        model.train()
        for batch_idx, (data, target) in enumerate(loaders['train']):
            # move to GPU
            if use_cuda:
                data, target = data.cuda(), target.cuda()
            ## find the loss and update the model parameters accordingly
            ## record the average training loss, using something like
            ## train_loss = train_loss + ((1 / (batch_idx + 1)) * (loss.data - train_loss))
            
            # clear the gradients of all optimized variables
            optimizer.zero_grad()
            # forward pass: compute predicted outputs by passing inputs to the model
            output = model(data)
            # calculate the batch loss
            loss = criterion(output, target)
            # backward pass: compute gradient of the loss with respect to model parameters
            loss.backward()
            # perform a single optimization step (parameter update)
            optimizer.step()
            # calculate the train loss
            train_loss = train_loss + ((1 / (batch_idx + 1)) * (loss.data - train_loss))
        
        ######################    
        # validate the model #
        ######################
        model.eval()
        for batch_idx, (data, target) in enumerate(loaders['valid']):
            # move to GPU
            if use_cuda:
                data, target = data.cuda(), target.cuda()
            
            # forward pass to predict the outputs
            output = model(data)
            # compute the loss
            loss = criterion(output,target)
            # update the average validation loss
            valid_loss = valid_loss + ((1 / (batch_idx + 1)) * (loss.data - valid_loss))
            
        # print training/validation statistics 
        print(f"Epoch: {epoch} \tTraining Loss: {train_loss:.6f} \tValidation Loss: {valid_loss:.6f}")
        
        ## TODO: save the model if validation loss has decreased
        if valid_loss <= valid_loss_min:
            print(f"Validation loss decreased ({valid_loss_min:.6f} --> {valid_loss:.6f}).  Saving model ...")
            torch.save(model.state_dict(), save_path)
            valid_loss_min = valid_loss
            
    # return trained model
    return model

# train the model
model_scratch = train(40, loaders_scratch, model_scratch, optimizer_scratch, 
                      criterion_scratch, use_cuda, 'model_scratch_test.pt')

# load the model that got the best validation accuracy
model_scratch.load_state_dict(torch.load('model_scratch_test.pt'))

Epoch: 1 	Training Loss: 4.660955 	Validation Loss: 4.351301
Validation loss decreased (inf --> 4.351301).  Saving model ...
Epoch: 2 	Training Loss: 4.271890 	Validation Loss: 4.181582
Validation loss decreased (4.351301 --> 4.181582).  Saving model ...
Epoch: 3 	Training Loss: 4.007847 	Validation Loss: 3.923587
Validation loss decreased (4.181582 --> 3.923587).  Saving model ...
Epoch: 4 	Training Loss: 3.802997 	Validation Loss: 3.790495
Validation loss decreased (3.923587 --> 3.790495).  Saving model ...
Epoch: 5 	Training Loss: 3.601889 	Validation Loss: 3.583936
Validation loss decreased (3.790495 --> 3.583936).  Saving model ...
Epoch: 6 	Training Loss: 3.429857 	Validation Loss: 3.522250
Validation loss decreased (3.583936 --> 3.522250).  Saving model ...
Epoch: 7 	Training Loss: 3.264986 	Validation Loss: 3.581443
Epoch: 8 	Training Loss: 3.112244 	Validation Loss: 3.288750
Validation loss decreased (3.522250 --> 3.288750).  Saving model ...
Epoch: 9 	Training Loss: 2.967093 

<All keys matched successfully>

In [6]:
def test(loaders, model, criterion, use_cuda):

    # monitor test loss and accuracy
    test_loss = 0.
    correct = 0.
    total = 0.

    model.eval()
    for batch_idx, (data, target) in enumerate(loaders['test']):
        # move to GPU
        if use_cuda:
            data, target = data.cuda(), target.cuda()
        # forward pass: compute predicted outputs by passing inputs to the model
        output = model(data)
        # calculate the loss
        loss = criterion(output, target)
        # update average test loss 
        test_loss = test_loss + ((1 / (batch_idx + 1)) * (loss.data - test_loss))
        # convert output probabilities to predicted class
        pred = output.data.max(1, keepdim=True)[1]
        # compare predictions to true label
        correct += np.sum(np.squeeze(pred.eq(target.data.view_as(pred))).cpu().numpy())
        total += data.size(0)
            
    print('Test Loss: {:.6f}\n'.format(test_loss))

    print('\nTest Accuracy: %2d%% (%2d/%2d)' % (
        100. * correct / total, correct, total))

# call test function    
test(loaders_scratch, model_scratch, criterion_scratch, use_cuda)

Test Loss: 2.038834


Test Accuracy: 48% (402/836)
