In [1]:
# Let's see how to use DBT to:
# 1: train a VGG-like network on CIFAR-10
# 2: continue a train from the last iteration
# 3: do TRANSFER LEARNING from the trained model to another model that will be able to classify CIFAR-100
# 4: do FINE TUNING of the model trained on CIFAR-10 to solve the CIFAR-100 classification problem
# 5: compare the train/validation/test performance of the models

import pandas as pd
import pprint
import tensorflow as tf
from dytb.inputs.predefined import Cifar10, Cifar100
from dytb.train import train
from dytb.models.predefined.VGG import VGG

In [2]:
# Instantiate the model
vgg = VGG()

In [3]:
# Instantiate the CIFAR-10 input source
cifar10 = Cifar10.Cifar10()

In [4]:
# 1: Train VGG on Cifar10 for an Epoch

# Place the train process on GPU:0
device = '/gpu:0'
with tf.device(device):
    info = train(
        model=vgg,
        dataset=cifar10,
        hyperparameters={
            "epochs": 1,
            "batch_size": 50,
            "regularizations": {
                "l2": 1e-5,
                "augmentation": {
                    "name": "FlipLR",
                    "fn": tf.image.random_flip_left_right,
                    # On average the training set size double appling this
                    # transformation, thus factor=2
                    "factor": 2,
                }
            },
            "gd": {
                "optimizer": tf.train.AdamOptimizer,
                "args": {
                    "learning_rate": 1e-3,
                    "beta1": 0.9,
                    "beta2": 0.99,
                    "epsilon": 1e-8
                }
            }
        },
        force_restart=True)

Original training set size 50000. Augmented training set size: 100000
<tf.Variable 'VGG/64/conv1/W:0' shape=(3, 3, 3, 64) dtype=float32_ref>
<tf.Variable 'VGG/64/conv1/b:0' shape=(64,) dtype=float32_ref>
<tf.Variable 'VGG/64/conv2/W:0' shape=(3, 3, 64, 64) dtype=float32_ref>
<tf.Variable 'VGG/64/conv2/b:0' shape=(64,) dtype=float32_ref>
<tf.Variable 'VGG/128/conv3/W:0' shape=(3, 3, 64, 128) dtype=float32_ref>
<tf.Variable 'VGG/128/conv3/b:0' shape=(128,) dtype=float32_ref>
<tf.Variable 'VGG/128/conv4/W:0' shape=(3, 3, 128, 128) dtype=float32_ref>
<tf.Variable 'VGG/128/conv4/b:0' shape=(128,) dtype=float32_ref>
<tf.Variable 'VGG/256/conv5/W:0' shape=(3, 3, 128, 256) dtype=float32_ref>
<tf.Variable 'VGG/256/conv5/b:0' shape=(256,) dtype=float32_ref>
<tf.Variable 'VGG/256/conv6/W:0' shape=(3, 3, 256, 256) dtype=float32_ref>
<tf.Variable 'VGG/256/conv6/b:0' shape=(256,) dtype=float32_ref>
<tf.Variable 'VGG/256/conv7/W:0' shape=(3, 3, 256, 256) dtype=float32_ref>
<tf.Variable 'VGG/256/conv7

In [5]:
# Info contains every information related to the trained model.
# We're interested in stats only, thus we extract only them from the info dict
# Display the results in a table. Let's use a Pandas DataFrame for that

# Extract the accuracyes measured in every set (train/validation/test)
accuracies = {key: value["accuracy"] for key, value in info["stats"].items()}
df = pd.DataFrame.from_records(accuracies, index=["accuracy"])
df

Unnamed: 0,test,train,validation
accuracy,0.5718,0.59188,0.5717


In [6]:
# Extract the confusion matrices 
confusion_matrices = {key: value["confusion_matrix"] for key, value in info["stats"].items()}
# Display the confusione matrices for the training set
df = pd.DataFrame(confusion_matrices["train"])
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,2945.0,178.0,75.0,60.0,6.0,62.0,31.0,37.0,1137.0,423.0
1,12.0,4019.0,1.0,16.0,0.0,8.0,17.0,6.0,99.0,838.0
2,1138.0,28.0,1146.0,525.0,265.0,757.0,597.0,207.0,127.0,202.0
3,244.0,23.0,138.0,1234.0,60.0,2354.0,367.0,118.0,65.0,340.0
4,302.0,33.0,422.0,399.0,1887.0,516.0,602.0,611.0,36.0,233.0
5,140.0,16.0,109.0,516.0,75.0,3598.0,109.0,161.0,32.0,228.0
6,64.0,62.0,115.0,588.0,106.0,171.0,3610.0,24.0,55.0,245.0
7,115.0,10.0,47.0,118.0,167.0,999.0,36.0,3006.0,23.0,530.0
8,494.0,253.0,14.0,43.0,1.0,29.0,14.0,10.0,3904.0,229.0
9,63.0,434.0,3.0,14.0,0.0,33.0,4.0,18.0,88.0,4331.0


In [7]:
# 2: train it again for another epoch
# Note the `force_restart` parameter removed.
# `epochs` is the TOTAL number of epoch for the trained model
# Thus since we trained it before for a single epoch,
# we set "epochs": 2 in order to train it for another epoch

with tf.device(device):
    info = train(
        model=vgg,
        dataset=cifar10,
        hyperparameters={
            "epochs": 2,
            "batch_size": 50,
            "regularizations": {
                "l2": 1e-5,
                "augmentation": {
                    "name": "FlipLR",
                    "fn": tf.image.random_flip_left_right,
                    "factor": 2,
                }
            },
            "gd": {
                "optimizer": tf.train.AdamOptimizer,
                "args": {
                    "learning_rate": 1e-3,
                    "beta1": 0.9,
                    "beta2": 0.99,
                    "epsilon": 1e-8
                }
            }
        })

Original training set size 50000. Augmented training set size: 100000
<tf.Variable 'VGG/64/conv1/W:0' shape=(3, 3, 3, 64) dtype=float32_ref>
<tf.Variable 'VGG/64/conv1/b:0' shape=(64,) dtype=float32_ref>
<tf.Variable 'VGG/64/conv2/W:0' shape=(3, 3, 64, 64) dtype=float32_ref>
<tf.Variable 'VGG/64/conv2/b:0' shape=(64,) dtype=float32_ref>
<tf.Variable 'VGG/128/conv3/W:0' shape=(3, 3, 64, 128) dtype=float32_ref>
<tf.Variable 'VGG/128/conv3/b:0' shape=(128,) dtype=float32_ref>
<tf.Variable 'VGG/128/conv4/W:0' shape=(3, 3, 128, 128) dtype=float32_ref>
<tf.Variable 'VGG/128/conv4/b:0' shape=(128,) dtype=float32_ref>
<tf.Variable 'VGG/256/conv5/W:0' shape=(3, 3, 128, 256) dtype=float32_ref>
<tf.Variable 'VGG/256/conv5/b:0' shape=(256,) dtype=float32_ref>
<tf.Variable 'VGG/256/conv6/W:0' shape=(3, 3, 256, 256) dtype=float32_ref>
<tf.Variable 'VGG/256/conv6/b:0' shape=(256,) dtype=float32_ref>
<tf.Variable 'VGG/256/conv7/W:0' shape=(3, 3, 256, 256) dtype=float32_ref>
<tf.Variable 'VGG/256/conv7

In [8]:
# Display the results in a table. Let's use a Pandas DataFrame for that
accuracies = {key: value["accuracy"] for key, value in info["stats"].items()}
df = pd.DataFrame.from_records(accuracies, index=["accuracy"])
df

Unnamed: 0,test,train,validation
accuracy,0.7241,0.74684,0.724


In [9]:
# Save last trained model info
vggInfo = info

In [10]:
# 3: TRANSFER LEARNING
# Use the best model trained on Cifar10, to classify Cifar 100 images.
# Thus we train ONLY the softmax linear scope (that has 100 neurons, now),
# keeping constant any other previosly trained layer
# We load the weights from the previous trained model, or better
# DyTB saves the "best" model (w.r.t. a metric) in a separate folder
# So we extract the info["paths"]["best"] path, that's the path of the best
# model trained so far.
cifar100 = Cifar100.Cifar100()
with tf.device(device):
    transferInfo = train(
        model=vgg,
        dataset=cifar100,
        hyperparameters={
            "epochs": 1,
            "batch_size": 50,
            "regularizations": {
                "l2": 1e-5,
                "augmentation": {
                    "name": "FlipLR",
                    "fn": tf.image.random_flip_left_right,
                    "factor": 2,
                }
            },
            "gd": {
                "optimizer": tf.train.AdamOptimizer,
                "args": {
                    "learning_rate": 1e-3,
                    "beta1": 0.9,
                    "beta2": 0.99,
                    "epsilon": 1e-8
                    }
                }
        },
        force_restart=True,
        surgery={
            "checkpoint_path": vggInfo["paths"]["best"],
            "exclude_scopes": "VGG/softmax_linear",
            "trainable_scopes": "VGG/softmax_linear"
        })

Original training set size 50000. Augmented training set size: 100000
<tf.Variable 'VGG/64/conv1/W:0' shape=(3, 3, 3, 64) dtype=float32_ref>
<tf.Variable 'VGG/64/conv1/b:0' shape=(64,) dtype=float32_ref>
<tf.Variable 'VGG/64/conv2/W:0' shape=(3, 3, 64, 64) dtype=float32_ref>
<tf.Variable 'VGG/64/conv2/b:0' shape=(64,) dtype=float32_ref>
<tf.Variable 'VGG/128/conv3/W:0' shape=(3, 3, 64, 128) dtype=float32_ref>
<tf.Variable 'VGG/128/conv3/b:0' shape=(128,) dtype=float32_ref>
<tf.Variable 'VGG/128/conv4/W:0' shape=(3, 3, 128, 128) dtype=float32_ref>
<tf.Variable 'VGG/128/conv4/b:0' shape=(128,) dtype=float32_ref>
<tf.Variable 'VGG/256/conv5/W:0' shape=(3, 3, 128, 256) dtype=float32_ref>
<tf.Variable 'VGG/256/conv5/b:0' shape=(256,) dtype=float32_ref>
<tf.Variable 'VGG/256/conv6/W:0' shape=(3, 3, 256, 256) dtype=float32_ref>
<tf.Variable 'VGG/256/conv6/b:0' shape=(256,) dtype=float32_ref>
<tf.Variable 'VGG/256/conv7/W:0' shape=(3, 3, 256, 256) dtype=float32_ref>
<tf.Variable 'VGG/256/conv7

In [11]:
# 4: FINE TUNING:
# Use the model pointed by vggInfo to fine tune the whole network
# and tune it on Cifar100.
# Let's retrain the whole network end-to-end, starting from the learned weights
# Just remove the "traiable_scopes" section from the surgery parameter
with tf.device(device):
    fineTuningInfo = train(
        model=vgg,
        dataset=cifar100,
        hyperparameters={
            "epochs": 1,
            "batch_size": 50,
            "regularizations": {
                "l2": 1e-5,
                "augmentation": {
                    "name": "FlipLR",
                    "fn": tf.image.random_flip_left_right,
                    "factor": 2,
                }
            },
            "gd": {
                "optimizer": tf.train.AdamOptimizer,
                "args": {
                    "learning_rate": 1e-3,
                    "beta1": 0.9,
                    "beta2": 0.99,
                    "epsilon": 1e-8
                }
            }
        },
        force_restart=True,
        surgery={
            "checkpoint_path": vggInfo["paths"]["best"],
            "exclude_scopes": "VGG/softmax_linear"
        })


Original training set size 50000. Augmented training set size: 100000
<tf.Variable 'VGG/64/conv1/W:0' shape=(3, 3, 3, 64) dtype=float32_ref>
<tf.Variable 'VGG/64/conv1/b:0' shape=(64,) dtype=float32_ref>
<tf.Variable 'VGG/64/conv2/W:0' shape=(3, 3, 64, 64) dtype=float32_ref>
<tf.Variable 'VGG/64/conv2/b:0' shape=(64,) dtype=float32_ref>
<tf.Variable 'VGG/128/conv3/W:0' shape=(3, 3, 64, 128) dtype=float32_ref>
<tf.Variable 'VGG/128/conv3/b:0' shape=(128,) dtype=float32_ref>
<tf.Variable 'VGG/128/conv4/W:0' shape=(3, 3, 128, 128) dtype=float32_ref>
<tf.Variable 'VGG/128/conv4/b:0' shape=(128,) dtype=float32_ref>
<tf.Variable 'VGG/256/conv5/W:0' shape=(3, 3, 128, 256) dtype=float32_ref>
<tf.Variable 'VGG/256/conv5/b:0' shape=(256,) dtype=float32_ref>
<tf.Variable 'VGG/256/conv6/W:0' shape=(3, 3, 256, 256) dtype=float32_ref>
<tf.Variable 'VGG/256/conv6/b:0' shape=(256,) dtype=float32_ref>
<tf.Variable 'VGG/256/conv7/W:0' shape=(3, 3, 256, 256) dtype=float32_ref>
<tf.Variable 'VGG/256/conv7

In [12]:
# Compare the performance of Transfer learning and Fine Tuning
accuracies = {key: value["accuracy"] for key, value in transferInfo["stats"].items()}
df = pd.DataFrame.from_records(accuracies, index=["accuracy"])
df

Unnamed: 0,test,train,validation
accuracy,0.01,0.01032,0.01


In [13]:
accuracies = {key: value["accuracy"] for key, value in fineTuningInfo["stats"].items()}
df = pd.DataFrame.from_records(accuracies, index=["accuracy"])
df

Unnamed: 0,test,train,validation
accuracy,0.01,0.0101,0.01


In [14]:
# For completeness, lets see what a info object contains
pprint.pprint(info, indent=4)

{   'args': {   'batch_size': 50,
                'checkpoint_path': '',
                'comment': '',
                'dataset': <dytb.inputs.predefined.Cifar10.Cifar10 object at 0x7f42e04a86a0>,
                'epochs': 2,
                'exclude_scopes': None,
                'force_restart': False,
                'gd': {   'args': {   'beta1': 0.9,
                                      'beta2': 0.99,
                                      'epsilon': 1e-08,
                                      'learning_rate': 0.001},
                          'optimizer': <class 'tensorflow.python.training.adam.AdamOptimizer'>},
                'lr_decay': {'enabled': False, 'epochs': 25, 'factor': 0.1},
                'model': <dytb.models.predefined.VGG.VGG object at 0x7f4289c98ef0>,
                'regularizations': {   'augmentation': {   'factor': 2,
                                                           'fn': <function random_flip_left_right at 0x7f4289d5c7b8>,
                     