In [1]:
# Let's see how to use DBT to:
# 1: train a VGG-like network on CIFAR-10
# 2: continue a train from the last iteration
# 3: do TRANSFER LEARNING from the trained model to another model that will be able to classify CIFAR-100
# 4: do FINE TUNING of the model trained on CIFAR-10 to solve the CIFAR-100 classification problem
# 5: compare the train/validation/test performance of the models

import pandas as pd
import pprint
import tensorflow as tf
from dytb.inputs.predefined import Cifar10, Cifar100
from dytb.train import train
from dytb.models.predefined.VGG import VGG

In [2]:
# Instantiate the model
vgg = VGG()

In [3]:
# Instantiate the CIFAR-10 input source
cifar10 = Cifar10.Cifar10()

In [4]:
# 1: Train VGG on Cifar10 for an Epoch

# Place the train process on GPU:0
device = '/gpu:0'
with tf.device(device):
    info = train(
        model=vgg,
        dataset=cifar10,
        hyperparameters={
            "epochs": 1,
            "batch_size": 50,
            "regularizations": {
                "l2": 1e-5,
                "augmentation": {
                    "name": "FlipLR",
                    "fn": tf.image.random_flip_left_right
                }
            },
            "gd": {
                "optimizer": tf.train.AdamOptimizer,
                "args": {
                    "learning_rate": 1e-3,
                    "beta1": 0.9,
                    "beta2": 0.99,
                    "epsilon": 1e-8
                }
            }
        },
        force_restart=True)

2017-03-24 16:39:55.738164: step 0, loss = 2.4898 (21.8 examples/sec; 2.294 sec/batch)
2017-03-24 16:40:01.599543: step 100, loss = 2.1912 (895.4 examples/sec; 0.056 sec/batch)
2017-03-24 16:40:07.363273: step 200, loss = 1.9165 (887.7 examples/sec; 0.056 sec/batch)
2017-03-24 16:40:13.146663: step 300, loss = 1.9434 (897.0 examples/sec; 0.056 sec/batch)
2017-03-24 16:40:18.899361: step 400, loss = 1.8396 (888.5 examples/sec; 0.056 sec/batch)
2017-03-24 16:40:24.673581: step 500, loss = 1.8630 (895.4 examples/sec; 0.056 sec/batch)
2017-03-24 16:40:30.474606: step 600, loss = 1.7952 (882.7 examples/sec; 0.057 sec/batch)
2017-03-24 16:40:36.266451: step 700, loss = 1.8256 (887.0 examples/sec; 0.056 sec/batch)
2017-03-24 16:40:42.076769: step 800, loss = 1.6266 (886.4 examples/sec; 0.056 sec/batch)
2017-03-24 16:40:47.909738: step 900, loss = 1.5489 (894.9 examples/sec; 0.056 sec/batch)
2017-03-24 16:40:53.753567: step 1000, loss = 1.7377 (882.7 examples/sec; 0.057 sec/batch)
2017-03-24 1

In [5]:
# Info containes every information related to the trained model.
# We're interested in stats only, thus we extract only them from the info dict
# Display the results in a table. Let's use a Pandas DataFrame for that

# Extract the accuracyes measured in every set (train/validation/test)
accuracies = {key: value["accuracy"] for key, value in info["stats"].items()}
df = pd.DataFrame.from_records(accuracies, index=["accuracy"])
df

Unnamed: 0,test,train,validation
accuracy,0.4184,0.42032,0.4184


In [6]:
# Extract the confusion matrices 
confusion_matrices = {key: value["confusion_matrix"] for key, value in info["stats"].items()}
# Display the confusione matrices for the training set
df = pd.DataFrame(confusion_matrices["train"])
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,2063,6,399,169,172,96,86,341,1502,164
1,140,802,63,67,58,41,52,314,378,3148
2,382,0,558,551,659,597,1786,395,47,19
3,101,0,217,939,238,2024,1160,346,8,36
4,161,0,230,237,963,145,2388,796,11,13
5,32,0,131,706,251,2555,958,367,5,16
6,36,0,95,362,204,300,3857,118,5,23
7,46,0,81,270,651,451,475,2962,6,33
8,1076,17,233,127,39,67,49,79,3101,185
9,193,64,81,130,116,95,100,626,337,3221


In [7]:
# 2: train it again for another epoch
# Note the `force_restart` parameter removed.
# `epochs` is the TOTAL number of epoch for the trained model
# Thus since we trained it before for a single epoch,
# we set "epochs": 2 in order to train it for another epoch

with tf.device(device):
    info = train(
        model=vgg,
        dataset=cifar10,
        hyperparameters={
            "epochs": 2,
            "batch_size": 50,
            "regularizations": {
                "l2": 1e-5,
                "augmentation": {
                    "name": "FlipLR",
                    "fn": tf.image.random_flip_left_right
                }
            },
            "gd": {
                "optimizer": tf.train.AdamOptimizer,
                "args": {
                    "learning_rate": 1e-3,
                    "beta1": 0.9,
                    "beta2": 0.99,
                    "epsilon": 1e-8
                }
            }
        })

2017-03-24 16:41:54.541918: step 1100, loss = 1.5129 (870.3 examples/sec; 0.057 sec/batch)
2017-03-24 16:42:00.511865: step 1200, loss = 1.5524 (876.2 examples/sec; 0.057 sec/batch)
2017-03-24 16:42:06.363273: step 1300, loss = 1.6666 (876.8 examples/sec; 0.057 sec/batch)
2017-03-24 16:42:12.195253: step 1400, loss = 1.4128 (881.0 examples/sec; 0.057 sec/batch)
2017-03-24 16:42:18.016253: step 1500, loss = 1.4976 (876.5 examples/sec; 0.057 sec/batch)
2017-03-24 16:42:23.834454: step 1600, loss = 1.1128 (878.8 examples/sec; 0.057 sec/batch)
2017-03-24 16:42:29.652662: step 1700, loss = 1.1047 (884.4 examples/sec; 0.057 sec/batch)
2017-03-24 16:42:35.477904: step 1800, loss = 1.2570 (857.0 examples/sec; 0.058 sec/batch)
2017-03-24 16:42:41.317559: step 1900, loss = 1.0597 (877.2 examples/sec; 0.057 sec/batch)
2017-03-24 16:42:47.170549: step 2000, loss = 1.1177 (877.0 examples/sec; 0.057 sec/batch)
2017-03-24 16:42:50.963063 (2): train accuracy = 0.640 validation accuracy = 0.609


In [8]:
# Display the results in a table. Let's use a Pandas DataFrame for that
accuracies = {key: value["accuracy"] for key, value in info["stats"].items()}
df = pd.DataFrame.from_records(accuracies, index=["accuracy"])
df

Unnamed: 0,test,train,validation
accuracy,0.6092,0.62098,0.6092


In [9]:
# Save last trained model info
vggInfo = info

In [10]:
# 3: TRANSFER LEARNING
# Use the best model trained on Cifar10, to classify Cifar 100 images.
# Thus we train ONLY the softmax linear scope (that has 100 neurons, now),
# keeping constant any other previosly trained layer
# We load the weights from the previous trained model, or better
# DyTB saves the "best" model (w.r.t. a metric) in a separate folder
# So we extract the info["paths"]["best"] path, that's the path of the best
# model trained so far.
cifar100 = Cifar100.Cifar100()
with tf.device(device):
    transferInfo = train(
        model=vgg,
        dataset=cifar100,
        hyperparameters={
            "epochs": 1,
            "batch_size": 50,
            "regularizations": {
                "l2": 1e-5,
                "augmentation": {
                    "name": "FlipLR",
                    "fn": tf.image.random_flip_left_right
                }
            },
            "gd": {
                "optimizer": tf.train.AdamOptimizer,
                "args": {
                    "learning_rate": 1e-3,
                    "beta1": 0.9,
                    "beta2": 0.99,
                    "epsilon": 1e-8
                    }
                }
        },
        force_restart=True,
        surgery={
            "checkpoint_path": vggInfo["paths"]["best"],
            "exclude_scopes": "VGG/softmax_linear",
            "trainable_scopes": "VGG/softmax_linear"
        })

2017-03-24 16:43:43.683366: step 0, loss = 4.8052 (29.3 examples/sec; 1.706 sec/batch)
2017-03-24 16:43:49.694822: step 100, loss = 4.6455 (875.6 examples/sec; 0.057 sec/batch)
2017-03-24 16:43:55.605838: step 200, loss = 4.6465 (873.1 examples/sec; 0.057 sec/batch)
2017-03-24 16:44:01.482621: step 300, loss = 4.6347 (870.6 examples/sec; 0.057 sec/batch)
2017-03-24 16:44:07.348843: step 400, loss = 4.6364 (878.9 examples/sec; 0.057 sec/batch)
2017-03-24 16:44:13.226265: step 500, loss = 4.6301 (876.2 examples/sec; 0.057 sec/batch)
2017-03-24 16:44:19.098956: step 600, loss = 4.6263 (878.5 examples/sec; 0.057 sec/batch)
2017-03-24 16:44:24.959186: step 700, loss = 4.6244 (882.0 examples/sec; 0.057 sec/batch)
2017-03-24 16:44:30.792740: step 800, loss = 4.6268 (888.5 examples/sec; 0.056 sec/batch)
2017-03-24 16:44:36.677475: step 900, loss = 4.6200 (872.5 examples/sec; 0.057 sec/batch)
2017-03-24 16:44:42.553147: step 1000, loss = 4.6147 (876.8 examples/sec; 0.057 sec/batch)
2017-03-24 1

In [11]:
# 4: FINE TUNING:
# Use the model pointed by vggInfo to fine tune the whole network
# and tune it on Cifar100.
# Let's retrain the whole network end-to-end, starting from the learned weights
# Just remove the "traiable_scopes" section from the surgery parameter
with tf.device(device):
    fineTuningInfo = train(
        model=vgg,
        dataset=cifar100,
        hyperparameters={
            "epochs": 1,
            "batch_size": 50,
            "regularizations": {
                "l2": 1e-5,
                "augmentation": {
                    "name": "FlipLR",
                    "fn": tf.image.random_flip_left_right
                }
            },
            "gd": {
                "optimizer": tf.train.AdamOptimizer,
                "args": {
                    "learning_rate": 1e-3,
                    "beta1": 0.9,
                    "beta2": 0.99,
                    "epsilon": 1e-8
                }
            }
        },
        force_restart=True,
        surgery={
            "checkpoint_path": vggInfo["paths"]["best"],
            "exclude_scopes": "VGG/softmax_linear"
        })


2017-03-24 16:45:39.587934: step 0, loss = 4.8366 (31.3 examples/sec; 1.598 sec/batch)
2017-03-24 16:45:45.607712: step 100, loss = 4.6532 (875.9 examples/sec; 0.057 sec/batch)
2017-03-24 16:45:51.431524: step 200, loss = 4.6394 (882.9 examples/sec; 0.057 sec/batch)
2017-03-24 16:45:57.311907: step 300, loss = 4.6367 (873.3 examples/sec; 0.057 sec/batch)
2017-03-24 16:46:03.182901: step 400, loss = 4.6375 (879.0 examples/sec; 0.057 sec/batch)
2017-03-24 16:46:09.049887: step 500, loss = 4.6347 (880.4 examples/sec; 0.057 sec/batch)
2017-03-24 16:46:14.878901: step 600, loss = 4.6308 (882.5 examples/sec; 0.057 sec/batch)
2017-03-24 16:46:20.715646: step 700, loss = 4.6263 (878.3 examples/sec; 0.057 sec/batch)
2017-03-24 16:46:26.543737: step 800, loss = 4.6249 (879.6 examples/sec; 0.057 sec/batch)
2017-03-24 16:46:32.418109: step 900, loss = 4.6243 (881.8 examples/sec; 0.057 sec/batch)
2017-03-24 16:46:38.268707: step 1000, loss = 4.6169 (877.8 examples/sec; 0.057 sec/batch)
2017-03-24 1

In [12]:
# Compare the performance of Transfer learning and Fine Tuning
accuracies = {key: value["accuracy"] for key, value in transferInfo["stats"].items()}
df = pd.DataFrame.from_records(accuracies, index=["accuracy"])
df

Unnamed: 0,test,train,validation
accuracy,0.01,0.01006,0.01


In [13]:
accuracies = {key: value["accuracy"] for key, value in fineTuningInfo["stats"].items()}
df = pd.DataFrame.from_records(accuracies, index=["accuracy"])
df

Unnamed: 0,test,train,validation
accuracy,0.01,0.01024,0.01


In [14]:
# For completeness, lets see what a info object contains
pprint.pprint(info, indent=4)

{   'args': {   'batch_size': 50,
                'checkpoint_path': '',
                'comment': '',
                'dataset': <dytb.inputs.predefined.Cifar10.Cifar10 object at 0x7f3f642650b8>,
                'epochs': 2,
                'exclude_scopes': '',
                'force_restart': False,
                'gd': {   'args': {   'beta1': 0.9,
                                      'beta2': 0.99,
                                      'epsilon': 1e-08,
                                      'learning_rate': 0.001},
                          'optimizer': <class 'tensorflow.python.training.adam.AdamOptimizer'>},
                'lr_decay': {'enabled': False, 'epochs': 25, 'factor': 0.1},
                'model': <dytb.models.predefined.VGG.VGG object at 0x7f3f642c6f98>,
                'regularizations': {   'augmentation': <function random_flip_left_right at 0x7f3f0db04c80>,
                                       'l2': 1e-05},
                'trainable_scopes': ''},
    'paths'