In [None]:
%load_ext autoreload
%autoreload 2

# Images Classification.

* [Data](#Data)
* [Augmentation pipeline](#Pipeline)
* [Model definition](#Model-definition)
* [Training](#Training)
* [Validation](#Validation)
* [VGG](#VGG)
* [ResNet](#ResNet)

In [None]:
import sys

import PIL
import numpy as np
import matplotlib.pyplot as plt

sys.path.append('..')

## Data

In [None]:
from batchflow.opensets import CIFAR10
from batchflow import ImagesBatch

Load dataset.

In [None]:
data = CIFAR10(batch_class=ImagesBatch)

Dataset implicitely devided into train and test parts.

In [None]:
print('Images in train: {}'.format(len(data.train)))
print('Images in test: {}'.format(len(data.test)))

In [None]:
from utils import plot_images

Plot some images from the dataset.

In [None]:
images = data.train.images[:10]
labels = data.train.labels[:10]
images = np.array(images)
plot_images(images, labels=labels, figsize=(10, 8))

## Pipeline

In [None]:
from batchflow import Pipeline

Declare pipeline with augmentations.

In [None]:
pipeline = (Pipeline()
               .flip(p=0.5)
               .rotate(angle=30, p=0.5)
           )

Link the pipeline to the dataset.

In [None]:
augmentation_pipeline = pipeline << data.train

Generate batch of data.

In [None]:
batch = augmentation_pipeline.next_batch(batch_size=10, shuffle=False)

In [None]:
plot_images(np.array(batch.images), figsize=(10, 10))

In [None]:
from batchflow.models.torch import TorchModel
from batchflow import B, V, W

# Model

Configure the model.

Letters in lauout:
1. c - convolution
2. a - activation
3. f - fully connected layer

In [None]:
model_config = {
           'body/layout': 'ca ca',
           'body/filters': [5, 10],
           'head': dict(layout='f', units=10),
           'output': {'predicted': ['proba']},
    
           'loss': 'ce',
           'optimizer': dict(name='SGD', lr=0.01),
           'device': 'gpu'
}

# Training

In [None]:
BATCH_SIZE = 64
N_ITERS = 300

Declare the training pipeline.

In [None]:
train_pipeline = (data.train.p # dataset can be linked to pipeline this way as well
                    .to_array(channels='first') # make inputs compatible with Torch
                    .init_variable('loss', []) # loss is stored here
                    .init_model('dynamic', TorchModel, 'classification', model_config) # initialize the model
                    .train_model('classification', B('images'), B('labels'), # perform tranining step
                                 fetches='loss', save_to=V('loss', mode='a'))
                    .run_later(BATCH_SIZE, n_iters=N_ITERS, drop_last=True, shuffle=42, bar=True)
                )

In [None]:
train_pipeline.run(bar_desc=W(V('loss')[-1].format('Loss is: {:7.7}')))

Plot the training loss.

In [None]:
loss = train_pipeline.v('loss')
plt.figure(figsize=(10, 10))
plt.plot(loss[10:])
plt.grid()

# Validation

Pipeline for model evaluation.

In [None]:
test_pipeline = (data.test.p    
                    .to_array(channels='first')
                    .import_model('classification', train_pipeline)
                    .init_variable('metrics')
                    .predict_model('classification', B('images'), fetches='predicted_proba', 
                                   save_to=B('predictions'))
                    .gather_metrics('class', targets=B.labels, predictions=B.predictions,
                                    fmt='proba', axis=-1, save_to=V('metrics'))
                    .run_later(300, shuffle=True, n_epochs=1, drop_last=False, bar=True)
                )

In [None]:
test_pipeline.run()

In [None]:
metrics = test_pipeline.v('metrics')
accuracy = metrics.evaluate('accuracy')
print('Accuracy with 2 conv layers - {}'.format(accuracy))

Draw images from test data and model predictions

In [None]:
batch = test_pipeline.next_batch(10)

In [None]:
images = np.moveaxis(batch.images, 1, -1)
plot_images(images, labels=batch.labels, proba=batch.predictions, figsize=(15, 15))

## VGG

In [None]:
vgg_config = {
    'body': dict(layout='cp' * 4, kernel_size=3, filters=[8, 16, 32, 64]),
    'head': dict(layout='f', units=10),
    'optimizer': dict(name='SGD', lr=10),
}

In [None]:
from utils import run_train, run_test

In [None]:
BATCH_SIZE = 2
N_ITERS = 50

In [None]:
vgg_train = run_train(data.train, TorchModel, vgg_config, 'VGG', BATCH_SIZE, N_ITERS)

In [None]:
loss = vgg_train.v('loss')
plt.plot(loss[10:])

In [None]:
vgg_test = run_test(data.test, vgg_train, 500)

In [None]:
metrics = vgg_test.v('metrics')
accuracy = metrics.evaluate('accuracy')
print('Accuracy VGG - {}'.format(accuracy))

In [None]:
batch = vgg_test.next_batch(10)

In [None]:
images = np.moveaxis(batch.images, 1, -1)
plot_images(images, labels=batch.labels, proba=batch.predictions, figsize=(15, 15))

## ResNet

You can import and train ready to use model from [model's zoo](https://analysiscenter.github.io/batchflow/api/batchflow.models.html).

In [None]:
from batchflow.models.torch import ResNet18

In [None]:
resnet_config = {    
           'head': dict(layout='cV', filters=10),
           'optimizer': dict(name='Adam', lr=0.001),
        }

In [None]:
BATCH_SIZE = 32
N_ITERS = 1000

In [None]:
resnet_train = run_train(data.train, ResNet18, resnet_config, 'resnet', BATCH_SIZE, N_ITERS)

In [None]:
loss = resnet_train.v('loss')
plt.plot(loss[10:])

In [None]:
resnet_test = run_test(data.test, resnet_train, 500)

In [None]:
metrics = resnet_test.v('metrics')
metrics.evaluate('accuracy')

In [None]:
batch = resnet_test.next_batch(10)

In [None]:
images = np.moveaxis(batch.images, 1, -1)
plot_images(images, labels=batch.labels, proba=batch.predictions, figsize=(15, 15))