### Dependencies

In [0]:
#PyTorch
from os.path import exists

from wheel.pep425tags import get_abbr_impl, get_impl_ver, get_abi_tag

platform = '{}{}-{}'.format(get_abbr_impl(), get_impl_ver(), get_abi_tag())

cuda_output = !ldconfig -p | grep cudart.so | sed -e 's/.*\.\([0-9]*\)\.\([0-9]*\)$/cu\1\2/'

accelerator = cuda_output[0] if exists('/dev/nvidia0') else 'cpu'

!pip install -q http://download.pytorch.org/whl/{accelerator}/torch-0.4.1-{platform}-linux_x86_64.whl torchvision

# Pillow
!pip install Pillow==4.1.1 image

In [0]:
!wget -cq https://github.com/udacity/pytorch_challenge/raw/master/cat_to_name.json

!wget -cq https://s3.amazonaws.com/content.udacity-data.com/courses/nd188/flower_data.zip

!unzip -qq flower_data.zip

!rm -f flower_data.zip || true

#### Path

In [0]:
path_train = 'flower_data/train'

path_validation = 'flower_data/valid'

path_classes = 'cat_to_name.json'

### Imports

In [0]:
# Json
import json

# Torch
import torch

# Torch Vision
import torchvision

# Numpy
import numpy as np

# Path
from pathlib import Path

# Matplot
import matplotlib.pyplot as pp

# Reduce
from functools import reduce

### GPU

In [0]:
# Check if CUDA is available
try:
    print(torch.cuda.get_device_name(torch.cuda.current_device()))
       
    # Set default tensor to CUDA tensor
    torch.set_default_tensor_type('torch.cuda.FloatTensor')
except:
    print('CPU')

### Pre-processing

The train dataset is loaded, resized, transformed to tensor and normalized; besides that, other transformations are applied to augment the dataset. The test dataset is a split of the validation dataset, 80% from the dataset is used for validation and 20% to test.

#### Functions

In [0]:
def load(path = None, transform = None):
    '''
        Load dataset
        
        Parameters
        ----------
        
        path: str
            Dataset path
        
        transform: torchvision.transforms
            Transform function
            
        Usage
        -----
        
        >>> load(path = '')
        
        Return
        ------
        
        Image Folder
    '''
    
    pth = Path(path)
    
    if not pth.exists() or not pth.is_dir():
        raise Exception('Incompatible path')
    
    return torchvision.datasets.ImageFolder(root = path, transform = transform)

In [0]:
def plot(axe = None, images = None, cmap = 'gray', title = '', color = False):
    '''
        Plot images
        
        Parameters
        ----------
        
        axe: matplotlib.pyplot.subplots
            Matplot buffer
        
        images: torch.tensor
            Tensor matrix
            
        cmap: str
            Color map
        
        title: str
            Matrix title
        
        color: bool
            Plot RGB images
            
        Usage
        -----
        
        Gray
        
        >>> figure, axe = pp.subplots(nrows = 2, ncols = 2, figsize = (2, 2))
        >>>
        >>> plot(axe, [torch.randn((4, 4)) for image in range(0, 4)])
        
        RGB
        
        >>> figure, axe = pp.subplots(nrows = 2, ncols = 2, figsize = (2, 2))
        >>>
        >>> plot(axe, [torch.randn((4, 4, 3)) for image in range(0, 4)], color = True)
        
        Return
        ------
        
        None
        
        References
        ----------
        
        https://matplotlib.org/examples/color/colormaps_reference.html
    '''
        
    if len(axe.shape) == 1:
        count = 0
        
        for col in range(axe.shape[0]):
            image = images[count].cpu().clone()
            
            if color:
                axe[col].imshow(image.numpy().transpose((1, 2, 0)))
            else:
                axe[col].imshow(image.numpy().squeeze(), cmap=cmap)
            
            axe[col].axis('off')
            
            if title:
                axe[col].set_title(title)
            
            count += 1
    else:
        raise Exception('Invalid shape')

#### Processing

In [0]:
# Normalize (normalize the train, test and validation datasets)
normalize = torchvision.transforms.Normalize(mean = (0.485, 0.456, 0.406), std = (0.229, 0.224, 0.225))

##### Train

In [0]:
# Train dataset
height_train, width_train = 224, 224

batch_size_train = 12

num_workers_train = 1

# Transform template
transform_train = [
    torchvision.transforms.Resize((height_train, width_train)),
    torchvision.transforms.ToTensor(),
    normalize
]

# Train 1 - Original
train1 = load(path = path_train, transform = torchvision.transforms.Compose(transform_train))

# Train 2 - Rotation
_train2 = transform_train.copy()

_train2.insert(1, torchvision.transforms.RandomRotation((-30, 30)))

train2 = load(path = path_train, transform = torchvision.transforms.Compose(_train2))

# Train 3 - Color
_train3 = transform_train.copy()

_train3.insert(1, torchvision.transforms.ColorJitter(brightness = 1.0, hue = 0.5, saturation = 0.5))

train3 = load(path = path_train, transform = torchvision.transforms.Compose(_train3))

# Train 4 - Crop
_train4 = transform_train.copy()

_train4.pop(0)

_train4.insert(0, torchvision.transforms.RandomCrop(size=(height_train, width_train)))

train4 = load(path = path_train, transform = torchvision.transforms.Compose(_train4))

# Train 5 - Flip horizontal
_train5 = transform_train.copy()

_train5.insert(1, torchvision.transforms.RandomHorizontalFlip(p = 1.0))

train5 = load(path = path_train, transform = torchvision.transforms.Compose(_train5))

# Train 6 - Flip vertical
_train6 = transform_train.copy()

_train6.insert(1, torchvision.transforms.RandomVerticalFlip(p = 1.0))

train6 = load(path = path_train, transform = torchvision.transforms.Compose(_train6))

# Concat datasets
train_datasets = [
    {'title': 'Original', 'data': train1},
    {'title': 'Color', 'data': train3},
    {'title': 'Crop', 'data': train4},
    {'title': 'Rotation', 'data': train2},
    {'title': 'Flip Horizontal', 'data': train5},
    {'title': 'Flip Vertical', 'data': train6}
]

for dataset in train_datasets:
    dataset['data'] = torch.utils.data.DataLoader(
        dataset['data'],
        num_workers = num_workers_train,
        batch_size = batch_size_train,
        shuffle = True,
    )

print(reduce(lambda start, length: start + length, [len(dataset['data'].dataset) for dataset in train_datasets]))

In [0]:
# Plot train dataset
%matplotlib inline

for dataset in train_datasets:
    # Get batch
    images_train, labels_train = iter(dataset['data']).next()
    
    # Create buffer
    figure, axe = pp.subplots(nrows = 1, ncols = 5, figsize=(15, 10))
     
    # Plot images
    plot(axe = axe, images = images_train[:, 1], title = dataset['title'])

##### Validation

In [0]:
# Validation dataset
height_validation, width_validation = 224, 224

batch_size_validation = 12

num_workers_validation = 1

# Transform template
transform_validation = [
    torchvision.transforms.Resize((height_validation, width_validation)),
    torchvision.transforms.ToTensor(),
    normalize
]

# Validation
validation = torch.utils.data.DataLoader(
    load(path = path_validation, transform = torchvision.transforms.Compose(transform_validation)),
    batch_size = batch_size_validation,
    num_workers = num_workers_validation,
    shuffle = True
)

print(len(validation.dataset))

In [0]:
# Plot validation dataset
%matplotlib inline

# Get batch
images_validation, labels_validation = iter(validation).next()

# Create buffer
figure, axe = pp.subplots(nrows = 1, ncols = 5, figsize = (15, 10))

# Plot images
plot(axe = axe, images = images_validation[:, 1], title = 'Original')

##### Test

In [0]:
# Test dataset
# validation_size = int(0.8 * len(validation.dataset))

# test_size = int(len(validation.dataset) - validation_size)

# validation, test = torch.utils.data.random_split(validation.dataset, [validation_size, test_size])

# print(len(validation), len(test))

### Models

References:

1. [Very deep convolutional networks for large-scale image recognition](https://arxiv.org/abs/1409.1556)

2. [Deep residual learning for image recognition](https://arxiv.org/abs/1512.03385)

#### Functions

In [0]:
def train(model, datasets, criterion, optimizer):
    '''
        Train model
        
        Parameters
        ----------
        
        model: torchvision.models
            Neural Network model
        
        datasets: list
            List of datasets
        
        criterion: torch.nn
            Loss function
        
        optimizer: torch.optim
            Gradient optimizer

        Usage
        -----
        
        >>> train(
        >>>     torchvision.models.resnet18(),
        >>>     [],
        >>>     torch.nn.CrossEntropyLoss(),
        >>>     torch.optim.Adam(params = torchvision.models.resnet18().parameters(), lr = 0.0001)
        >>> )
        
        Return
        ------
        
        List of loss
    '''
    
    _loss = []
    
    for dataset in datasets:
        for batch, (images, labels) in enumerate(dataset['data']):
            # Activate CUDA
            if torch.cuda.is_available():
                images, labels = images.cuda(), labels.cuda()

            # Set the gradient to zero
            model.zero_grad()
            
            # Compute the output
            output = model(images)
            
            # Compute the loss
            loss = criterion(output, labels)
            
            _loss.append(loss)
            
            # Retro propagate
            loss.backward()

            optimizer.step()
            
            # Print loss
            print('Batch {0} | Dataset {1}: {2} Loss'.format(batch, dataset['title'], loss))
            
    return _loss

In [0]:
def accuracy(model, dataset, classes):
    '''
        Test accuracy
        
        Parameters
        ----------
        
        model: torchvision.models
            Neural Network model
        
        dataset: torch.utils.data
            Test dataset
                
        classes: dict
            Meta data dict

        Usage
        -----
        
        >>> accuracy(
        >>>     torchvision.models.resnet18(),
        >>>     [],
        >>>     {},
        >>> )
        
        Return
        ------
        
        Number of correct predictions to each class, total of objects in each class and samples
        
        Extra
        -----
        
        Calculate the accuracy: round(100. * np.sum(_correct) / np.sum(_total)), 2)
    '''
    
    _correct = [0.] * len(classes)
    
    _total = [0.] * len(classes)
    
    _sample = []
    
    # Disable gradient
    with torch.no_grad():
        # Disable normalize and dropout
        model.eval()

        for batch, (images, labels) in enumerate(dataset):
            if torch.cuda.is_available():
                images, labels = images.cuda(), labels.cuda()

            # Compute the output
            output = model(images)

            # Get the max probability and it's indices (classes)
            probability, prediction = torch.max(output, 1)    
            
            # Get sample
            _sample.append({
                'image': images[0],
                'label': labels[0],
                'predict': prediction[0],
                'probability': probability[0]
            })
            
            # Compare predictions and truth labels
            correct = prediction.eq(labels.data.view_as(prediction))

            # Transform to 1D
            if torch.cuda.is_available():
                correct = np.squeeze(correct.cpu().numpy())
            else:
                correct = np.squeeze(correct.numpy())
            
            for _ in range(len(images)):
                label = labels.data[_]

                _correct[label] += correct[_].item()

                _total[label] += 1
            
        return _correct, _total, _sample

In [0]:
def show_predict(axe, sample, classes):
    '''
        Plot predicted images
        
        Parameters
        ----------
        
        axe: matplotlib.pyplot.subplots
            Matplot buffer
        
        sample: list
            List of objects (sample)
                
        classes: dict
            Meta data dict

        Usage
        -----
        
        >>> figure, axe = pp.subplots(nrows = 2, ncols = 3, figsize=(15, 5))
        >>>
        >>> axe(
        >>>     axe,
        >>>     [{
        >>>         'image': torch.Tensor(torch.randn((3, 3, 3))),
        >>>         'label':  torch.Tensor([1]),
        >>>         'predict': torch.Tensor([1]),
        >>>         'probability': torch.Tensor([.5])
        >>>     }],
        >>>     {'1': 'one'},
        >>> )
        
        Return
        ------
        
        None
    '''
    
    count = 0
    
    for row in range(axe.shape[0]):
        for col in range(axe.shape[1]):
            # Convert tensor to array and transpose
            image = sample[count]['image'].cpu().clone().numpy().transpose((1, 2, 0))
            
            # Plot image
            axe[row, col].imshow(image[:, :, 0], cmap = 'gray')     
            
            # Set title
            try:
                label = classes[str(sample[count]['label'].cpu().numpy())]
                
                predict = classes[str(sample[count]['predict'].cpu().numpy())]
            except:
                label = 'label not found'
                
                predict = 'Label not found'
            
            axe[row, col].set_title(
                '{0} - {1}'.format(label.title(), predict.title()),
                color = 'green' if label == predict else 'red' 
            )
            
            # Configure axe
            # axe[row, col].set_ylabel('Probabily {0}'.format(sample[count]['probability'].cpu().numpy()))
            
            # axe[row, col].barh(classes, probabilities)
            
            axe[row, col].axis('off')
            
            count += 1

#### Classes

In [0]:
classes = json.load(open(path_classes))

print(classes)

#### Resnet 18

In [0]:
# Create the model
resnet18 = torchvision.models.resnet18(pretrained = True)

In [0]:
# Update dense layer
resnet18.fc = torch.nn.Linear(512, 102, bias = True)

In [0]:
# Load model
# resnet18.load_state_dict(torch.load('./resnet18.pth'))

In [0]:
# Freeze first two layers
for layer in [resnet18.layer1, resnet18.layer2]:
    for parameter in layer.parameters():
        parameter.requires_grad = False

In [0]:
# Activate CUDA
if torch.cuda.is_available:
    resnet18 = resnet18.cuda()

In [0]:
# Define criterion
resnet18_criterion = torch.nn.CrossEntropyLoss()

# Define optimizer
resnet18_optimizer = torch.optim.Adam(params = resnet18.parameters(), lr = 0.0001)

In [0]:
# Train model
resnet18.train(mode = True)

resnet18_loss = train(resnet18, train_datasets, resnet18_criterion, resnet18_optimizer)

In [0]:
# Plot train loss
pp.style.use('grayscale')

pp.plot(resnet18_loss, linestyle = '-.')

In [0]:
# Plot accuracy
resnet18_class_correct, resnet18_class_total, resnet18_sample = accuracy(resnet18, validation, classes)

print('Model accuracy: {0}'.format(round(100. * np.sum(resnet18_class_correct) / np.sum(resnet18_class_total)), 2))

In [0]:
# Plot predictions
resnet18_figure, resnet18_axe = pp.subplots(nrows = 2, ncols = 3, figsize=(15, 5))

show_predict(resnet18_axe, resnet18_sample, classes)        

In [0]:
# Save the model
torch.save(resnet18.state_dict(), './resnet18.pth')

#### VGG

In [0]:
# Create the model
vgg11 = torchvision.models.vgg11_bn(pretrained = True)

In [0]:
# Update dense layer
vgg11.fc = torch.nn.Linear(4096, 102, bias = True)

In [0]:
# Load model
# vgg11.load_state_dict(torch.load('./vgg11.pth'))

In [0]:
# Freeze features
for parameter in vgg11.features.parameters():
    parameter.requires_grad = False

In [0]:
# Activate CUDA
if torch.cuda.is_available:
    vgg11 = vgg11.cuda()

In [0]:
# Define criterion
vgg11_criterion = torch.nn.CrossEntropyLoss()

# Define optimizer
vgg11_optimizer = torch.optim.Adam(params = vgg11.parameters(), lr = 0.0001)

In [0]:
# Train model
vgg11.train(mode = True)

vgg11_loss = train(vgg11, train_datasets, vgg11_criterion, vgg11_optimizer)

In [0]:
# Plot train loss
pp.style.use('grayscale')

pp.plot(vgg11_loss, linestyle = '-.')

In [0]:
# Plot accuracy
vgg11_class_correct, vgg11_class_total, vgg11_sample = accuracy(vgg11, validation, classes)

print('Model accuracy: {0}'.format(round(100. * np.sum(vgg11_class_correct) / np.sum(vgg11_class_total)), 2))

In [0]:
# Plot predictions
vgg11_figure, vgg11_axe = pp.subplots(nrows = 2, ncols = 3, figsize=(15, 5))

show_predict(vgg11_axe, vgg11_sample, classes)        

In [0]:
# Save the model
torch.save(vgg11.state_dict(), './vgg11.pth')