In [1]:
# Let's see how to use DBT to:
# 1: train a VGG-like network on CIFAR-10
# 2: continue a train from the last iteration
# 3: do TRANSFER LEARNING from the trained model to another model that will be able to classify CIFAR-100
# 4: do FINE TUNING of the model trained on CIFAR-10 to solve the CIFAR-100 classification problem
# 5: compare the train/validation/test performance of the models

import pandas as pd
import pprint
import tensorflow as tf
from dytb.inputs.predefined import Cifar10, Cifar100
from dytb.train import train
from dytb.models.predefined.VGG import VGG

In [2]:
# Instantiate the model
vgg = VGG()

In [3]:
# Instantiate the CIFAR-10 input source
cifar10 = Cifar10.Cifar10()

In [4]:
# 1: Train VGG on Cifar10 for an Epoch

# Place the train process on GPU:0
device = '/gpu:0'
with tf.device(device):
    info = train(
        model=vgg,
        dataset=cifar10,
        hyperparameters={
            "epochs": 1,
            "batch_size": 50,
            "regularizations": {
                "l2": 1e-5,
                "augmentation": {
                    "name": "FlipLR",
                    "fn": tf.image.random_flip_left_right
                }
            },
            "gd": {
                "optimizer": tf.train.AdamOptimizer,
                "args": {
                    "learning_rate": 1e-3,
                    "beta1": 0.9,
                    "beta2": 0.99,
                    "epsilon": 1e-8
                }
            }
        },
        force_restart=True)

2017-03-22 11:03:34.528382: step 0, loss = 2.4389 (22.1 examples/sec; 2.262 sec/batch)
2017-03-22 11:03:40.453107: step 100, loss = 2.3493 (877.2 examples/sec; 0.057 sec/batch)
2017-03-22 11:03:46.303900: step 200, loss = 2.3435 (886.9 examples/sec; 0.056 sec/batch)
2017-03-22 11:03:52.140747: step 300, loss = 1.9785 (874.9 examples/sec; 0.057 sec/batch)
2017-03-22 11:03:57.979964: step 400, loss = 2.0475 (881.1 examples/sec; 0.057 sec/batch)
2017-03-22 11:04:03.845082: step 500, loss = 1.8387 (891.0 examples/sec; 0.056 sec/batch)
2017-03-22 11:04:09.663344: step 600, loss = 1.7706 (880.7 examples/sec; 0.057 sec/batch)
2017-03-22 11:04:15.484470: step 700, loss = 1.7600 (884.0 examples/sec; 0.057 sec/batch)
2017-03-22 11:04:21.333627: step 800, loss = 1.8535 (892.4 examples/sec; 0.056 sec/batch)
2017-03-22 11:04:27.159882: step 900, loss = 1.5366 (884.8 examples/sec; 0.057 sec/batch)
2017-03-22 11:04:33.027119: step 1000, loss = 1.6242 (877.1 examples/sec; 0.057 sec/batch)
2017-03-22 1

In [5]:
# Info containes every information related to the trained model.
# We're interested in stats only, thus we extract only them from the info dict
# Display the results in a table. Let's use a Pandas DataFrame for that

# Extract the accuracyes measured in every set (train/validation/test)
accuracies = {key: value["accuracy"] for key, value in info["stats"].items()}
df = pd.DataFrame.from_records(accuracies, index=["accuracy"])
df

Unnamed: 0,test,train,validation
accuracy,0.3487,0.3531,0.3487


In [6]:
# Extract the confusion matrices 
confusion_matrices = {key: value["confusion_matrix"] for key, value in info["stats"].items() if type(value) is dict}
# Display the confusione matrices for the training set
df = pd.DataFrame(confusion_matrices["train"])
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,1690,516,380,164,0,264,38,147,1419,380
1,31,3467,18,54,0,89,2,70,106,1154
2,451,21,726,558,19,1833,862,258,87,148
3,82,12,218,368,1,3678,180,230,34,190
4,250,18,461,491,69,1742,1215,563,70,143
5,49,5,164,255,5,4015,148,275,9,96
6,48,11,318,635,14,2256,1418,160,21,162
7,78,21,174,197,15,2106,104,1997,32,301
8,851,1056,165,114,0,111,4,34,2205,397
9,57,2467,22,93,0,144,1,244,181,1798


In [7]:
# 2: train it again for another epoch
# Note the `force_restart` parameter removed.
# `epochs` is the TOTAL number of epoch for the trained model
# Thus since we trained it before for a single epoch,
# we set "epochs": 2 in order to train it for another epoch

with tf.device(device):
    info = train(
        model=vgg,
        dataset=cifar10,
        hyperparameters={
            "epochs": 2,
            "batch_size": 50,
            "regularizations": {
                "l2": 1e-5,
                "augmentation": {
                    "name": "FlipLR",
                    "fn": tf.image.random_flip_left_right
                }
            },
            "gd": {
                "optimizer": tf.train.AdamOptimizer,
                "args": {
                    "learning_rate": 1e-3,
                    "beta1": 0.9,
                    "beta2": 0.99,
                    "epsilon": 1e-8
                }
            }
        })

2017-03-22 11:05:34.374507: step 1100, loss = 1.4109 (886.4 examples/sec; 0.056 sec/batch)
2017-03-22 11:05:40.311744: step 1200, loss = 1.3996 (883.7 examples/sec; 0.057 sec/batch)
2017-03-22 11:05:46.183052: step 1300, loss = 1.4360 (879.2 examples/sec; 0.057 sec/batch)
2017-03-22 11:05:52.039435: step 1400, loss = 1.7960 (880.8 examples/sec; 0.057 sec/batch)
2017-03-22 11:05:57.873782: step 1500, loss = 1.2612 (874.2 examples/sec; 0.057 sec/batch)
2017-03-22 11:06:03.717252: step 1600, loss = 1.4995 (879.0 examples/sec; 0.057 sec/batch)
2017-03-22 11:06:09.561353: step 1700, loss = 1.3791 (872.0 examples/sec; 0.057 sec/batch)
2017-03-22 11:06:15.398064: step 1800, loss = 1.1221 (875.4 examples/sec; 0.057 sec/batch)
2017-03-22 11:06:21.244588: step 1900, loss = 1.3535 (879.7 examples/sec; 0.057 sec/batch)
2017-03-22 11:06:27.071126: step 2000, loss = 1.0066 (876.3 examples/sec; 0.057 sec/batch)
2017-03-22 11:06:30.873485 (2): train accuracy = 0.660 validation accuracy = 0.583


In [8]:
# Display the results in a table. Let's use a Pandas DataFrame for that
accuracies = {key: value["accuracy"] for key, value in info["stats"].items()}
df = pd.DataFrame.from_records(accuracies, index=["accuracy"])
df

Unnamed: 0,test,train,validation
accuracy,0.5835,0.5887,0.5835


In [9]:
# Save last trained model info
vggInfo = info

In [10]:
# 3: TRANSFER LEARNING
# Use the best model trained on Cifar10, to classify Cifar 100 images.
# Thus we train ONLY the softmax linear scope (that has 100 neurons, now),
# keeping constant any other previosly trained layer
# We load the weights from the previous trained model, or better
# DyTB saves the "best" model (w.r.t. a metric) in a separate folder
# So we extract the info["paths"]["best"] path, that's the path of the best
# model trained so far.
cifar100 = Cifar100.Cifar100()
with tf.device(device):
    transferInfo = train(
        model=vgg,
        dataset=cifar100,
        hyperparameters={
            "epochs": 1,
            "batch_size": 50,
            "regularizations": {
                "l2": 1e-5,
                "augmentation": {
                    "name": "FlipLR",
                    "fn": tf.image.random_flip_left_right
                }
            },
            "gd": {
                "optimizer": tf.train.AdamOptimizer,
                "args": {
                    "learning_rate": 1e-3,
                    "beta1": 0.9,
                    "beta2": 0.99,
                    "epsilon": 1e-8
                    }
                }
        },
        force_restart=True,
        surgery={
            "checkpoint_path": vggInfo["paths"]["best"],
            "exclude_scopes": "VGG/softmax_linear",
            "trainable_scopes": "VGG/softmax_linear"
        })

2017-03-22 11:07:22.929769: step 0, loss = 4.7206 (31.8 examples/sec; 1.575 sec/batch)
2017-03-22 11:07:28.922326: step 100, loss = 4.6433 (870.9 examples/sec; 0.057 sec/batch)
2017-03-22 11:07:34.815198: step 200, loss = 4.6456 (885.5 examples/sec; 0.056 sec/batch)
2017-03-22 11:07:40.850390: step 300, loss = 4.6375 (622.4 examples/sec; 0.080 sec/batch)
2017-03-22 11:07:46.978545: step 400, loss = 4.6362 (873.8 examples/sec; 0.057 sec/batch)
2017-03-22 11:07:52.874105: step 500, loss = 4.6377 (875.8 examples/sec; 0.057 sec/batch)
2017-03-22 11:07:58.754069: step 600, loss = 4.6313 (872.5 examples/sec; 0.057 sec/batch)
2017-03-22 11:08:04.591320: step 700, loss = 4.6270 (876.2 examples/sec; 0.057 sec/batch)
2017-03-22 11:08:10.469653: step 800, loss = 4.6238 (877.6 examples/sec; 0.057 sec/batch)
2017-03-22 11:08:16.393702: step 900, loss = 4.6236 (874.7 examples/sec; 0.057 sec/batch)
2017-03-22 11:08:22.341908: step 1000, loss = 4.6161 (877.0 examples/sec; 0.057 sec/batch)
2017-03-22 1

In [11]:
# 4: FINE TUNING:
# Use the model pointed by vggInfo to fine tune the whole network
# and tune it on Cifar100.
# Let's retrain the whole network end-to-end, starting from the learned weights
# Just remove the "traiable_scopes" section from the surgery parameter
with tf.device(device):
    fineTuningInfo = train(
        model=vgg,
        dataset=cifar100,
        hyperparameters={
            "epochs": 1,
            "batch_size": 50,
            "regularizations": {
                "l2": 1e-5,
                "augmentation": {
                    "name": "FlipLR",
                    "fn": tf.image.random_flip_left_right
                }
            },
            "gd": {
                "optimizer": tf.train.AdamOptimizer,
                "args": {
                    "learning_rate": 1e-3,
                    "beta1": 0.9,
                    "beta2": 0.99,
                    "epsilon": 1e-8
                }
            }
        },
        force_restart=True,
        surgery={
            "checkpoint_path": vggInfo["paths"]["best"],
            "exclude_scopes": "VGG/softmax_linear"
        })


2017-03-22 11:09:17.863176: step 0, loss = 4.8634 (32.5 examples/sec; 1.540 sec/batch)
2017-03-22 11:09:23.859887: step 100, loss = 4.6499 (878.9 examples/sec; 0.057 sec/batch)
2017-03-22 11:09:29.753352: step 200, loss = 4.6397 (879.3 examples/sec; 0.057 sec/batch)
2017-03-22 11:09:35.580706: step 300, loss = 4.6451 (879.3 examples/sec; 0.057 sec/batch)
2017-03-22 11:09:41.441008: step 400, loss = 4.6383 (874.9 examples/sec; 0.057 sec/batch)
2017-03-22 11:09:47.305666: step 500, loss = 4.6336 (883.6 examples/sec; 0.057 sec/batch)
2017-03-22 11:09:53.173104: step 600, loss = 4.6297 (873.6 examples/sec; 0.057 sec/batch)
2017-03-22 11:09:59.026746: step 700, loss = 4.6234 (882.3 examples/sec; 0.057 sec/batch)
2017-03-22 11:10:04.880436: step 800, loss = 4.6228 (877.0 examples/sec; 0.057 sec/batch)
2017-03-22 11:10:10.740961: step 900, loss = 4.6206 (882.8 examples/sec; 0.057 sec/batch)
2017-03-22 11:10:16.586785: step 1000, loss = 4.6100 (873.6 examples/sec; 0.057 sec/batch)
2017-03-22 1

In [12]:
# Compare the performance of Transfer learning and Fine Tuning
accuracies = {key: value["accuracy"] for key, value in transferInfo["stats"].items()}
df = pd.DataFrame.from_records(accuracies, index=["accuracy"])
df

Unnamed: 0,test,train,validation
accuracy,0.01,0.0102,0.01


In [13]:
accuracies = {key: value["accuracy"] for key, value in fineTuningInfo["stats"].items()}
df = pd.DataFrame.from_records(accuracies, index=["accuracy"])
df

Unnamed: 0,test,train,validation
accuracy,0.01,0.01014,0.01


In [14]:
# For completeness, lets see what a info object contains
pprint.pprint(info, indent=4)

{   'args': {   'batch_size': 50,
                'checkpoint_path': '',
                'comment': '',
                'dataset': <dytb.inputs.predefined.Cifar10.Cifar10 object at 0x7fe5b42f6048>,
                'epochs': 2,
                'exclude_scopes': '',
                'force_restart': False,
                'gd': {   'args': {   'beta1': 0.9,
                                      'beta2': 0.99,
                                      'epsilon': 1e-08,
                                      'learning_rate': 0.001},
                          'optimizer': <class 'tensorflow.python.training.adam.AdamOptimizer'>},
                'lr_decay': {'enabled': False, 'epochs': 25, 'factor': 0.1},
                'model': <dytb.models.predefined.VGG.VGG object at 0x7fe5b7bc0b00>,
                'regularizations': {   'augmentation': <function random_flip_left_right at 0x7fe55eb84c80>,
                                       'l2': 1e-05},
                'trainable_scopes': ''},
    'paths'