In [None]:
%load_ext autoreload
%autoreload 2

# Images Classification.

* [Data](#Data)
* [Pipeline](#Pipeline)
* [Setting Model](#Model-definition)
* [Training](#Training)
* [Validation](#Validation)
* [AlexNet](#AlexNet)
* [VGG](#VGG)
* [ResNet](#ResNet)

In [None]:
import sys

import PIL
import numpy as np
import matplotlib.pyplot as plt

sys.path.append('..')

## Data

In [None]:
from batchflow.opensets import CIFAR10, CIFAR100, Imagenette160, ImageWoof, PascalClassification, MNIST

Load dataset.

In [None]:
data = Imagenette160()

Thats what happening under the hood the line above.

In [None]:
# from batchflow import Dataset, DatasetIndex, ImagesBatch
# my_index = DatasetIndex(np.arange(len(imagenette_data)))
# data = Dataset(my_index, 
#                batch_class=ImagesBatch, 
#                preloaded=imagenette_data)

Dataset divided into train and test parts.

In [None]:
print('Images in train: {}'.format(len(data.train.images)))
print('Images in test: {}'.format(len(data.test.images)))

Plot some images from the dataset.

In [None]:
from utils import plot_images

In [None]:
classes = ['tench', 'English springer', 'cassette player', 'chain saw', 'church',
           'French horn', 'garbage truck', 'gas pump', 'golf ball', 'parachute']

In [None]:
ind = np.random.randint(len(data.train), size=10)
images = data.train.images[ind]
labels = data.train.labels[ind]
images = np.array(images)
plot_images(images, labels=labels, classes=classes, figsize=(15, 15))

## Pipeline

Declare pipeline with augmentations.   

In [None]:
pipeline = (data.train.pipeline()
               .crop(shape=(160, 160), origin='random', dst='augm_images')
               .flip(p=0.5, src='augm_images', dst='augm_images')
           )

Take a look the all images actions avaliable. [link](https://github.com/analysiscenter/batchflow/blob/master/examples/tutorials/06_image_augmentation.ipynb)

In [None]:
# class ImagesBatch:
#     def flip(self):
#         # code for flipping images
#         return self
    
#     def rotate(self):
#         # code for rotation images
#         return self

Generate batch of data.

In [None]:
batch = pipeline.next_batch(batch_size=5, shuffle=True)

In [None]:
for i in range(len(batch)):
    fig, ax = plt.subplots(1, 2 , figsize=(10, 3))
    ax[0].imshow(batch.images[i])
    ax[1].imshow(batch.augm_images[i])

# Model

In [None]:
from batchflow.models.torch import TorchModel
from batchflow import B, V, W

Configure the model.

In [None]:
model_config = {
#          'initial_block': empty
           'body/layout': 'ca ca',
           'body/filters': [5, 10],
           'head/layout': 'f',
           'head/units': 10,
    
           'output': {'predicted': ['proba']},    
           'loss': 'ce',
           'optimizer': dict(name='SGD', lr=0.01),
           'device': 'gpu'
}

# Training

In [None]:
BATCH_SIZE = 64
N_ITERS = 300

Declare the training pipeline.

In [None]:
train_pipeline = (data.train.p 
                    .crop(shape=(160, 160), origin='random')
                    .flip(p=0.5)
                    .to_array(channels='first') 
                    .init_variable('loss', []) 
                    .init_model('dynamic', TorchModel, 'classification', model_config)
                    .train_model('classification', B('images'), B('labels'), 
                                 fetches='loss', save_to=V('loss', mode='a'))
                    .run_later(BATCH_SIZE, n_iters=N_ITERS, drop_last=True, shuffle=42, bar=True)
                )

In [None]:
train_pipeline.run(bar_desc=W(V('loss')[-1].format('Loss is: {:7.7}')))

Plot the training loss.

In [None]:
loss = train_pipeline.v('loss')
plt.figure(figsize=(6, 6))
plt.plot(loss[5:])
plt.grid()

# Validation

Pipeline for model evaluation.

In [None]:
test_pipeline = (data.test.p
                    .crop(shape=(160, 160), origin='random')
                    .to_array(channels='first')
                    .import_model('classification', train_pipeline)
                    .init_variable('metrics')
                    .predict_model('classification', B('images'), fetches='predicted_proba', 
                                   save_to=B('predictions'))
                    .gather_metrics('class', targets=B.labels, predictions=B.predictions,
                                    fmt='proba', axis=-1, save_to=V('metrics'))
                    .run_later(100, shuffle=True, n_epochs=1, drop_last=False, bar=True)
                )

In [None]:
test_pipeline.run()

In [None]:
metrics = test_pipeline.v('metrics')
accuracy = metrics.evaluate('accuracy')
print('Accuracy {}'.format(accuracy))

Draw images from test data and model predictions

In [None]:
batch = test_pipeline.next_batch(10, shuffle=True)

images = np.moveaxis(batch.images, 1, -1)
plot_images(images, labels=batch.labels, proba=batch.predictions, figsize=(15, 15))

In [None]:
batch.predictions

## Config for Pipeline

In [None]:
from batchflow import C

In [None]:
train_pipeline = (data.train.p 
                    .crop(shape=(160, 160), origin='random')
                    .flip(p=0.5)
                    .to_array(channels='first') 
                    .init_variable('loss', []) 
                    .init_model('dynamic', C('model_class'), 'classification', C('model'))
                    .train_model('classification', B('images'), B('labels'), 
                                 fetches='loss', save_to=V('loss', mode='a'))
                    .run_later(C('batch_size'), n_iters=C('n_iters'), drop_last=True, shuffle=42, bar=True)
                )

test_pipeline = (data.test.p
                    .crop(shape=(160, 160), origin='random')
                    .to_array(channels='first')
                    .init_variable('metrics')
                    .import_model('classification', C('train_ppl'))
                    .predict_model('classification', B('images'), fetches='predicted_proba', 
                                   save_to=B('predictions'))
                    .gather_metrics('class', targets=B.labels, predictions=B.predictions,
                                    fmt='proba', axis=-1, save_to=V('metrics'))
                    .run_later(300, shuffle=True, n_epochs=1, drop_last=False, bar=True)
                )

# AlexNet 

In [None]:
alexnet_model = {
    'initial_block': {'layout': 'cap', 'kernel_size': 11, 'filters': 4, 'strides': 4, 'pool_size': 3},
    'body': {'layout': 'cap ca ca ca', 'kernel_size': [5, 3, 3, 3], 'filters': [8, 16, 32, 64]},
    'head': {'layout': 'fa f', 'units': [100, 10]},
       
    'optimizer': dict(name='SGD', lr=10),    
    'output': {'predicted': ['proba']},    
    'loss': 'ce',
    'optimizer': dict(name='SGD', lr=0.1),
    'device': 'gpu'
}

In [None]:
train_config = {
        'model': alexnet_model,
        'model_class': TorchModel,
        'batch_size': 64,
        'n_iters': 300
            }

In [None]:
alexnet_train = train_pipeline << train_config

In [None]:
alexnet_train.run(bar_desc=W(V('loss')[-1].format('Loss is: {:7.7}')))

In [None]:
loss = alexnet_train.v('loss')
plt.plot(loss[10:])

In [None]:
test_config = {
    'train_ppl': alexnet_train,
#    'metrics_name': 'alex_metr'
}

alexnet_test = test_pipeline << test_config

In [None]:
alexnet_test.run()

In [None]:
metrics = alexnet_test.v('metrics')
accuracy = metrics.evaluate('accuracy')
print('Accuracy AlexNet - {}'.format(accuracy))

# VGG

In [None]:
model_common = {
    'device': 'gpu',
    'loss': 'ce',
    'output': {'predicted': ['proba']},    
}

In [None]:
vgg_model = {
    'body': dict(layout='ca ca p' * 5, kernel_size=3, 
                 filters=[8, 8, 16, 16, 32, 32,  64, 64, 128, 128]),
    'head': dict(layout='fa f', units=[100, 10]),
    
    'optimizer': dict(name='Adam', lr=0.001),    
    **model_common
}

In [None]:
train_config = {
        'model': vgg_model,
        'model_class': TorchModel,
        'batch_size': 64,
        'n_iters': 500
            }

In [None]:
vgg_train = train_pipeline << train_config

In [None]:
vgg_train.run(bar_desc=W(V('loss')[-1].format('Loss is: {:7.7}')))

In [None]:
loss = vgg_train.v('loss')
plt.plot(loss[10:])

In [None]:
test_config = {
    'train_ppl': vgg_train,
#    'metrics_name': 'vgg_metr' 
}

In [None]:
vgg_test = test_pipeline << test_config

In [None]:
vgg_test.run()

In [None]:
metrics = vgg_test.v('metrics')
accuracy = metrics.evaluate('accuracy')
print('Accuracy VGG - {}'.format(accuracy))

In [None]:
batch = vgg_test.next_batch(5, shuffle=True)

images = np.moveaxis(batch.images, 1, -1)
plot_images(images, labels=batch.labels, proba=batch.predictions, classes=classes, figsize=(15, 15))

## ResNet

You can import and train ready to use model from [model's zoo](https://analysiscenter.github.io/batchflow/api/batchflow.models.html).

In [None]:
#from batchflow.models.torch import ResNet18

Too long!

In [None]:
# resnet_config = {
#            'initial_block':dict(layout='cnap', filters=8, kernel_size=7, strides=2,
#                                 pool_size=3, pool_strides=2),
#            'body': {'layout': 'R cnacna+ R cnacna+ p R cnacna& R cnacna+ p R cnacna& R cnacna+ p R cnacna& R cnacna+',
#                     'filters': [8, 8, 8, 8, 16, 16, 16, 16, 32, 32, 32, 32, 64, 64, 64, 64]},
#            'head': dict(layout='cV', filters=10),
    
#            'optimizer': dict(name='Adam', lr=0.001),
#             **model_common
#         }

In [None]:
from batchflow.models.torch import Encoder
from batchflow.models.torch import ResBlock, VGGBlock, DenseBlock, XceptionBlock

In [None]:
resnet_config = {
           'initial_block':dict(layout='cnap', filters=8, kernel_size=7, strides=2,
                                pool_size=3, pool_strides=2),   
           'body/encoder/num_stages': 4,
           'body/encoder/blocks': dict(base=ResBlock, layout='cnacna',
                                              filters=[8, 16, 32, 64],
                                              n_reps=[2, 2, 2, 2],
                                              downsample=[False, True, True, True]),
           'head': dict(layout='cV', filters=10),
    
           'optimizer': dict(name='Adam', lr=0.001),
            **model_common
        }

In [None]:
train_config = {
        'model': resnet_config,
        'model_class': Encoder,
        'batch_size': 64,
        'n_iters': 1000
            }

In [None]:
resnet_train = train_pipeline << train_config

In [None]:
resnet_train.run(bar_desc=W(V('loss')[-1].format('Loss is: {:7.7}')))

In [None]:
# from batchflow.models.torch import ResNet34
# resnet_config = {
#            'head': dict(layout='cV', filters=10),
    
#            'optimizer': dict(name='Adam', lr=0.001),
#             **model_common
#         }

# train_config = {
#         'model': resnet_config,
#         'model_class': ResNet34,
#         'batch_size': 64,
#         'n_iters': 1000
#             }

In [None]:
loss = resnet_train.v('loss')
plt.plot(loss[10:])

In [None]:
test_config = {
    'train_ppl': resnet_train
}

resnet_test = test_pipeline << test_config

In [None]:
resnet_test.run()

In [None]:
metrics = resnet_test.v('metrics')
metrics.evaluate('accuracy')
print('Accuracy ResNet - {}'.format(accuracy))

In [None]:
batch = resnet_test.next_batch(10, shuffle=True)

In [None]:
images = np.moveaxis(batch.images, 1, -1)
plot_images(images, labels=batch.labels, proba=batch.predictions, figsize=(15, 15))