### Dependencies

In [0]:
#PyTorch
from os.path import exists

from wheel.pep425tags import get_abbr_impl, get_impl_ver, get_abi_tag

platform = '{}{}-{}'.format(get_abbr_impl(), get_impl_ver(), get_abi_tag())

cuda_output = !ldconfig -p | grep cudart.so | sed -e 's/.*\.\([0-9]*\)\.\([0-9]*\)$/cu\1\2/'

accelerator = cuda_output[0] if exists('/dev/nvidia0') else 'cpu'

!pip install -q http://download.pytorch.org/whl/{accelerator}/torch-0.4.1-{platform}-linux_x86_64.whl torchvision

# Pillow
!pip install Pillow==4.1.1

!pip install image

In [0]:
# !wget -cq https://github.com/udacity/pytorch_challenge/raw/master/cat_to_name.json

# !wget -cq https://s3.amazonaws.com/content.udacity-data.com/courses/nd188/flower_data.zip

# !unzip -qq flower_data.zip

# !rm -f flower_data.zip || true

path_train = 'flower_data/train'

path_validation = 'flower_data/valid'

path_classes = 'cat_to_name.json'

### Imports

In [0]:
# Torch
import torch

# Torch Vision
import torchvision

# Path
from pathlib import Path

# Matplot
import matplotlib.pyplot as pp

# Reduce
from functools import reduce

# Load
import json

# Numpy
import numpy as np

### GPU

In [0]:
# Check if CUDA is available
try:
    print(torch.cuda.get_device_name(torch.cuda.current_device()))
       
    # Set default tensor
    torch.set_default_tensor_type('torch.cuda.FloatTensor')
except:
    print('CPU')

### Pre-processing <a name="pre-processing"></a>

The train dataset is loaded, resized, transformed to tensor and normalized; besides that, another transformations are applied to augment the dataset. The test dataset is a split of the validation dataset, 80% from the dataset is used to validation and 20% to test.

In [0]:
def load(path = None, transform = None):
    '''
        Load dataset
        
        Parameters
        ----------
        
        path: str
            Dataset path
        
        transform: torchvision.transforms
            Transform function
            
        Usage
        -----
        
        >>> load(path = '')
        
        Return
        ------
        
        Image Folder
        
        References
        ----------
        
        https://pytorch.org/docs/stable/torchvision/transforms.html
        
        https://pytorch.org/docs/stable/data.html
    '''
    
    pth = Path(path)
    
    if not pth.exists() or not pth.is_dir():
        raise Exception('Incompatible path')
    
    return torchvision.datasets.ImageFolder(root = path, transform = transform)

In [0]:
def plot(axe = None, images = None, cmap = 'gray', title = '', color = False):
    '''
        Plot images
        
        Parameters
        ----------
        
        axe: matplotlib.pyplot.subplots
            Matplot buffer
        
        images: torch.tensor
            Tensor matrix
            
        cmap: str
            Color map
        
        title: str
            Matrix title
        
        color: bool
            RGB images
            
        Usage
        -----
        
        Gray
        
        >>> figure, axe = pp.subplots(nrows=2, ncols=2, figsize=(2, 2))
        >>>
        >>> plot(axe, [torch.randn((4, 4)) for image in range(0, 4)])
        
        RGB
        
        >>> figure, axe = pp.subplots(nrows=2, ncols=2, figsize=(2, 2))
        >>>
        >>> plot(axe, [torch.randn((4, 4, 3)) for image in range(0, 4)], color = True)
        
        Return
        ------
        
        None
        
        References
        ----------
        
        https://matplotlib.org/examples/color/colormaps_reference.html
    '''
        
    if len(axe.shape) == 1:
        count = 0
        
        for col in range(axe.shape[0]):
            if color:
                axe[col].imshow(images[count].numpy().transpose((1, 2, 0)))
            else:
                axe[col].imshow(images[count].numpy().squeeze(), cmap=cmap)
            
            axe[col].axis('off')
            
            if title:
                axe[col].set_title(title)
            
            count += 1
    else:
        raise Exception('Invalid shape')

In [0]:
# Normalize
normalize = torchvision.transforms.Normalize(mean = (0.485, 0.456, 0.406), std = (0.229, 0.224, 0.225))

In [0]:
# Train
height_train, width_train = 224, 224

batch_size_train = 124  # Resnet 18 (124), VGG 11 (12)

num_workers_train = 5

transform_train = [
    torchvision.transforms.Resize((height_train, width_train)),
    torchvision.transforms.ToTensor(),
    normalize
]

# Train 1
train1 = load(path = path_train, transform = torchvision.transforms.Compose(transform_train))

# Train 2
_train2 = transform_train.copy()

_train2.insert(1, torchvision.transforms.RandomRotation((-30, 30)))

train2 = load(path = path_train, transform = torchvision.transforms.Compose(_train2))

# Train 3
_train3 = transform_train.copy()

_train3.insert(1, torchvision.transforms.ColorJitter(brightness = 1.0, hue = 0.5, saturation = 0.5))

train3 = load(path = path_train, transform = torchvision.transforms.Compose(_train3))

# Train 4
_train4 = transform_train.copy()

_train4.pop(0)

_train4.insert(0, torchvision.transforms.RandomCrop(size=(height_train, width_train)))

train4 = load(path = path_train, transform = torchvision.transforms.Compose(_train4))

# Train 5
_train5 = transform_train.copy()

_train5.insert(1, torchvision.transforms.RandomHorizontalFlip(p = 1.0))

train5 = load(path = path_train, transform = torchvision.transforms.Compose(_train5))

# Train 6
_train6 = transform_train.copy()

_train6.insert(1, torchvision.transforms.RandomVerticalFlip(p = 1.0))

train6 = load(path = path_train, transform = torchvision.transforms.Compose(_train6))

# Concat
train_dataset = [
    {'title': 'Original', 'data': train1},
    {'title': 'Brightness', 'data': train3},
    {'title': 'Crop', 'data': train4},
    {'title': 'Rotation', 'data': train2},
    {'title': 'Flip Horizontal', 'data': train5},
    {'title': 'Flip Vertical', 'data': train6}
]

for dataset in train_dataset:
    dataset['data'] = torch.utils.data.DataLoader(
        dataset['data'],
        num_workers = num_workers_train,
        batch_size = batch_size_train,
        shuffle = True,
    )

print(reduce(lambda start, length: start + length, [len(dataset['data'].dataset) for dataset in train_dataset]))

In [0]:
# Plot train
%matplotlib inline

for dataset in train_dataset:
    # Get batch
    images_train, labels_train = iter(dataset['data']).next()
    
    # Create buffer
    figure, axe = pp.subplots(nrows = 1, ncols = 5, figsize=(15, 10))
     
    # Plot images
    plot(axe = axe, images = images_train[:, 1], title = dataset['title'])

In [0]:
# Validation
height_validation, width_validation = 224, 224

batch_size_validation = 124 # Resnet 18 (124), VGG 11 (12)

num_workers_validation = 5

# Transform
transform_validation = [
    torchvision.transforms.Resize((height_validation, width_validation)),
    torchvision.transforms.ToTensor(),
    normalize
]

# Validation
validation = torch.utils.data.DataLoader(
    load(path = path_validation, transform = torchvision.transforms.Compose(transform_validation)),
    batch_size = batch_size_validation,
    num_workers = num_workers_validation,
    shuffle = True
)

print(len(validation.dataset))

In [0]:
# Plot validation
%matplotlib inline

# Get batch
images_validation, labels_validation = iter(validation).next()

# Create buffer
figure, axe = pp.subplots(nrows = 1, ncols = 5, figsize=(15, 10))

# Plot images
plot(axe = axe, images = images_validation[:, 1], title = 'Original')

In [0]:
# Test dataset
# validation_size = int(0.8 * len(validation.dataset))

# test_size = int(len(validation.dataset) - validation_size)

# validation, test = torch.utils.data.random_split(validation.dataset, [validation_size, test_size])

# print(len(validation), len(test))

### Models

References:

1. [Very deep convolutional networks for large-scale image recognition](https://arxiv.org/abs/1409.1556)

2. [Deep residual learning for image recognition](https://arxiv.org/abs/1512.03385)

3. [Unsupervised representation learning using convolutional and stacked auto-encoders: a domain and cross-domain feature space analysis](https://arxiv.org/abs/1811.00473)

#### Classes

In [0]:
classes = json.load(open(path_classes))

print(classes)

#### Resnet 18

In [0]:
# Create a model
resnet18 = torchvision.models.resnet18(pretrained = True)

In [0]:
# Update dense layer
resnet18.fc = torch.nn.Linear(512, 102, bias = True)

In [0]:
# Load model
# resnet18.load_state_dict(torch.load('./resnet18.pth'))

In [0]:
# Freeze first layer
for parameter in resnet18.layer1.parameters():
    parameter.requires_grad = False

In [0]:
# Activate CUDA
if torch.cuda.is_available:
    resnet18 = resnet18.cuda()

In [0]:
# Define criterion
resnet18_criterion = torch.nn.CrossEntropyLoss()

# Define optimizer
resnet18_optimizer = torch.optim.Adam(params = resnet18.parameters(), lr = 0.001)

# Define epochs
resnet18_epochs = 1

In [0]:
# Train
resnet18.train(mode = True)

resnet18_loss = []

for dataset in train_dataset:
    for epoch in range(resnet18_epochs):
        for batch, (images, labels) in enumerate(dataset['data']):
            # Activate CUDA
            if torch.cuda.is_available():
                images, labels = images.cuda(), labels.cuda()

            # Set the gradient to zero
            resnet18_optimizer.zero_grad()
            
            # Compute the output
            output = resnet18(images)
            
            # Compute the loss
            loss = resnet18_criterion(output, labels)
            
            resnet18_loss.append(loss)
            
            # Retro propagate
            loss.backward()

            resnet18_optimizer.step()
            
            # Print loss
            print('Epoch {0} | Batch {1} | Dataset {2}: {3} Loss'.format(epoch, batch, dataset['title'], loss))

In [0]:
# Plot train loss
pp.style.use('grayscale')

pp.plot(resnet18_loss, linestyle = '-.')

In [0]:
# Accuracy
resnet18_class_correct = [0.] * len(classes)

resnet18_class_total = [0.] * len(classes)

# Disable gradient
with torch.no_grad():
    # Disable normalize and dropout
    resnet18.eval()
    
    for count, (images, labels) in enumerate(validation):
        if torch.cuda.is_available():
            images, labels = images.cuda(), labels.cuda()
        
        # Compute the output
        output = resnet18(images)
        
        # Get the max probabilities and its indices (classes)
        max_values, prediction = torch.max(output, 1)    
        
        # Compare predictions to true label
        correct = prediction.eq(labels.data.view_as(prediction))
        
        # Transform to 1D
        if torch.cuda.is_available():
            correct = np.squeeze(correct.cpu().numpy())
        else:
            correct = np.squeeze(correct.numpy())
        
        for c in range(len(images)):
            label = labels.data[c]
            
            resnet18_class_correct[label] += correct[c].item()
            
            resnet18_class_total[label] += 1

In [0]:
print('Model accuracy: {0}'.format(round(100. * np.sum(resnet18_class_correct) / np.sum(resnet18_class_total)), 2))

In [0]:
# Save the model
# torch.save(resnet18.state_dict(), './resnet18.pth')

#### VGG

In [0]:
# Define model
vgg11 = torchvision.models.vgg11_bn(pretrained = True)

In [0]:
# Update dense layer
vgg11.fc = torch.nn.Linear(4096, 102, bias = True)

In [0]:
# Load model
# vgg11.load_state_dict(torch.load('./vgg11.pth'))

In [0]:
# Activate CUDA
if torch.cuda.is_available:
    vgg11 = vgg11.cuda()

In [0]:
# Define criterion
vgg11_criterion = torch.nn.CrossEntropyLoss()

# Define optimizer
vgg11_optimizer = torch.optim.SGD(params = vgg11.parameters(), lr = 0.01)  # 0.001

# Define epochs
vgg11_epochs = 1

In [0]:
# Train
vgg11.train(mode = True)

vgg11_loss = []

for dataset in train_dataset:
    for epoch in range(vgg11_epochs):
        for batch, (images, labels) in enumerate(dataset['data']):
            # Activate CUDA
            if torch.cuda.is_available():
                images, labels = images.cuda(), labels.cuda()

            # Set the gradient to zero
            vgg11_optimizer.zero_grad()
            
            # Compute the output
            output = vgg11(images)
            
            # Compute the loss
            loss = vgg11_criterion(output, labels)
            
            vgg11_loss.append(loss)
            
            # Retro propagate
            loss.backward()

            vgg11_optimizer.step()
            
            # Print loss
            print('Epoch {0} | Batch {1} | Dataset {2}: {3} Loss'.format(epoch, batch, dataset['title'], loss))

In [0]:
# Plot train loss
pp.style.use('grayscale')

pp.plot(vgg11_loss, linestyle = '-.')

In [0]:
# Accuracy
vgg11_class_correct = [0.] * len(classes)

vgg11_class_total = [0.] * len(classes)

# Disable gradient
with torch.no_grad():
    # Disable normalize and dropout
    vgg11.eval()
    
    for count, (images, labels) in enumerate(validation):
        if torch.cuda.is_available():
            images, labels = images.cuda(), labels.cuda()
        
        # Compute the output
        output = vgg11(images)
                
        # Get the max probabilities and its indices (classes)
        max_values, prediction = torch.max(output, 1)    
        
        # Compare predictions to true label
        correct = prediction.eq(labels.data.view_as(prediction))
        
        # Transform to 1D
        if torch.cuda.is_available():
            correct = np.squeeze(correct.cpu().numpy())
        else:
            correct = np.squeeze(correct.numpy())
        
        for c in range(len(images)):
            label = labels.data[c]
            
            vgg11_class_correct[label] += correct[c].item()
            
            vgg11_class_total[label] += 1

In [0]:
print('Model accuracy: {0}'.format(round(100. * np.sum(vgg11_class_correct) / np.sum(vgg11_class_total)), 2))

In [0]:
# Save the model
# torch.save(vgg11.state_dict(), './vgg11.pth')