In [1]:
# Let's see how to use DBT to:
# 1: train a VGG-like network on CIFAR-10
# 2: continue a train from the last iteration
# 3: do TRANSFER LEARNING from the trained model to another model that will be able to classify CIFAR-100
# 4: do FINE TUNING of the model trained on CIFAR-10 to solve the CIFAR-100 classification problem
# 5: compare the train/validation/test performance of the models

In [2]:
import tensorflow as tf
from dtb.inputs import Cifar10, Cifar100
from dtb.train import train
from dtb.models.VGG import VGG
import pandas as pd

In [3]:
# Instantiate the model
vgg = VGG()

In [4]:
# Instantiate the CIFAR-10 input source
cifar10 = Cifar10.Cifar10()

In [5]:
# 1: Train VGG on Cifar10 for an Epoch

# Place the train process on GPU:0
device = '/gpu:0'
with tf.device(device):
    info = train(
        model=vgg,
        dataset=cifar10,
        hyperparameters={
            "epochs": 1,
            "batch_size": 50,
            "regularizations": {
                "l2": 1e-5,
                "augmentation": lambda image: tf.image.random_flip_left_right(image)
            },
            "gd": {
                "optimizer": tf.train.AdamOptimizer,
                "args": {
                    "learning_rate": 1e-3,
                    "beta1": 0.9,
                    "beta2": 0.99,
                    "epsilon": 1e-8
                }
            }
        },
        force_restart=True)

2017-03-06 10:59:16.367742: step 0, loss = 2.4937 (13.6 examples/sec; 3.684 sec/batch)
2017-03-06 10:59:24.941985: step 100, loss = 2.1792 (604.2 examples/sec; 0.083 sec/batch)
2017-03-06 10:59:33.379265: step 200, loss = 1.8821 (605.1 examples/sec; 0.083 sec/batch)
2017-03-06 10:59:41.865065: step 300, loss = 1.7530 (605.1 examples/sec; 0.083 sec/batch)
2017-03-06 10:59:50.291389: step 400, loss = 1.8293 (606.8 examples/sec; 0.082 sec/batch)
2017-03-06 10:59:58.750471: step 500, loss = 1.6395 (608.0 examples/sec; 0.082 sec/batch)
2017-03-06 11:00:07.154172: step 600, loss = 1.5307 (607.3 examples/sec; 0.082 sec/batch)
2017-03-06 11:00:15.607922: step 700, loss = 1.5863 (607.2 examples/sec; 0.082 sec/batch)
2017-03-06 11:00:24.052722: step 800, loss = 1.5072 (609.6 examples/sec; 0.082 sec/batch)
2017-03-06 11:00:32.450109: step 900, loss = 1.5394 (610.0 examples/sec; 0.082 sec/batch)
2017-03-06 11:00:40.938016: step 1000, loss = 1.4443 (598.2 examples/sec; 0.084 sec/batch)
2017-03-06 1

In [6]:
# Info containes every information related to the trained model.
# We're interested in stats only, thus we extract only them from the info dict
# Display the results in a table. Let's use a Pandas DataFrame for that
df = pd.DataFrame.from_records(info["stats"], index=[0])
df

Unnamed: 0,dataset,model,test,train,validation
0,CIFAR-10,VGG,0.4802,0.49092,0.4802


In [7]:
# 2: train it again for another epoch
# Note the `force_restart` parameter removed.
# `epochs` is the TOTAL number of epoch for the trained model
# Thus since we trained it before for a single epoch,
# we set "epochs": 2 in order to train it for another epoch

In [8]:
with tf.device(device):
    info = train(
        model=vgg,
        dataset=cifar10,
        hyperparameters={
            "epochs": 2,
            "batch_size": 50,
            "regularizations": {
                "l2": 1e-5,
                "augmentation": lambda image: tf.image.random_flip_left_right(image)
            },
            "gd": {
                "optimizer": tf.train.AdamOptimizer,
                "args": {
                    "learning_rate": 1e-3,
                    "beta1": 0.9,
                    "beta2": 0.99,
                    "epsilon": 1e-8
                }
            }
        })

2017-03-06 11:01:37.944289: step 1100, loss = 1.4983 (575.4 examples/sec; 0.087 sec/batch)
2017-03-06 11:01:47.054296: step 1200, loss = 1.2373 (567.4 examples/sec; 0.088 sec/batch)
2017-03-06 11:01:56.097009: step 1300, loss = 1.2526 (570.1 examples/sec; 0.088 sec/batch)
2017-03-06 11:02:05.093629: step 1400, loss = 1.1995 (565.7 examples/sec; 0.088 sec/batch)
2017-03-06 11:02:14.115203: step 1500, loss = 1.4037 (567.3 examples/sec; 0.088 sec/batch)
2017-03-06 11:02:23.105468: step 1600, loss = 1.3013 (569.3 examples/sec; 0.088 sec/batch)
2017-03-06 11:02:32.071529: step 1700, loss = 1.1229 (566.1 examples/sec; 0.088 sec/batch)
2017-03-06 11:02:41.063755: step 1800, loss = 1.1351 (565.9 examples/sec; 0.088 sec/batch)
2017-03-06 11:02:50.044661: step 1900, loss = 0.8582 (567.1 examples/sec; 0.088 sec/batch)
2017-03-06 11:02:59.027356: step 2000, loss = 0.9880 (567.7 examples/sec; 0.088 sec/batch)
2017-03-06 11:03:04.643492 (2): train accuracy = 0.680 validation accuracy = 0.615


In [9]:
# Display the results in a table. Let's use a Pandas DataFrame for that
df = pd.DataFrame.from_records(info["stats"], index=[0])
df

Unnamed: 0,dataset,model,test,train,validation
0,CIFAR-10,VGG,0.6147,0.63224,0.6147


In [10]:
# Save last trained model info
vggInfo = info

In [11]:
# 3: TRANSFER LEARNING
# Use the best model trained on Cifar10, to classify Cifar 100 images.
# Thus we train ONLY the softmax linear scope (that has 100 neurons, now),
# keeping constant any other previosly trained layer
# We load the weights from the previous trained model, or better
# DTB saves the "best" model (w.r.t. a metric) in a separate folder
# So we extract the info["paths"]["best"] path, that's the path of the best
# model trained so far.
cifar100 = Cifar100.Cifar100()
with tf.device("/gpu:0"):
    transferInfo = train(
        model=vgg,
        dataset=cifar100,
        hyperparameters={
            "epochs": 1,
            "batch_size": 50,
            "regularizations": {
                "l2": 1e-5,
                "augmentation": lambda image: tf.image.random_flip_left_right(image)
            },
            "gd": {
                "optimizer": tf.train.AdamOptimizer,
                "args": {
                    "learning_rate": 1e-3,
                    "beta1": 0.9,
                    "beta2": 0.99,
                    "epsilon": 1e-8
                    }
                }
        },
        force_restart=True,
        surgery={
            "checkpoint_path": vggInfo["paths"]["best"],
            "exclude_scopes": "VGG/softmax_linear",
            "trainable_scopes": "VGG/softmax_linear"
        })

2017-03-06 11:03:54.293972: step 0, loss = 4.7942 (18.6 examples/sec; 2.692 sec/batch)
2017-03-06 11:04:03.336622: step 100, loss = 4.6445 (570.9 examples/sec; 0.088 sec/batch)
2017-03-06 11:04:12.272120: step 200, loss = 4.6419 (570.8 examples/sec; 0.088 sec/batch)
2017-03-06 11:04:21.190201: step 300, loss = 4.6386 (572.5 examples/sec; 0.087 sec/batch)
2017-03-06 11:04:30.124745: step 400, loss = 4.6390 (572.8 examples/sec; 0.087 sec/batch)
2017-03-06 11:04:39.070610: step 500, loss = 4.6340 (573.1 examples/sec; 0.087 sec/batch)
2017-03-06 11:04:48.024152: step 600, loss = 4.6310 (574.0 examples/sec; 0.087 sec/batch)
2017-03-06 11:04:56.940200: step 700, loss = 4.6291 (573.8 examples/sec; 0.087 sec/batch)
2017-03-06 11:05:05.875556: step 800, loss = 4.6259 (572.4 examples/sec; 0.087 sec/batch)
2017-03-06 11:05:14.849692: step 900, loss = 4.6221 (572.0 examples/sec; 0.087 sec/batch)
2017-03-06 11:05:23.794200: step 1000, loss = 4.6149 (572.9 examples/sec; 0.087 sec/batch)
2017-03-06 1

In [12]:
# 4: FINE TUNING:
# Use the model pointed by vggInfo to fine tune the whole network
# and tune it on Cifar100.
# Let's retrain the whole network end-to-end, starting from the learned weights
# Just remove the "traiable_scopes" section from the surgery parameter
with tf.device("/gpu:0"):
    fineTuningInfo = train(
        model=vgg,
        dataset=cifar100,
        hyperparameters={
            "epochs": 1,
            "batch_size": 50,
            "regularizations": {
                "l2": 1e-5,
                "augmentation": lambda image: tf.image.random_flip_left_right(image)
            },
            "gd": {
                "optimizer": tf.train.AdamOptimizer,
                "args": {
                    "learning_rate": 1e-3,
                    "beta1": 0.9,
                    "beta2": 0.99,
                    "epsilon": 1e-8
                }
            }
        },
        force_restart=True,
        surgery={
            "checkpoint_path": vggInfo["paths"]["best"],
            "exclude_scopes": "VGG/softmax_linear"
        })


2017-03-06 11:06:12.555247: step 0, loss = 4.8401 (18.2 examples/sec; 2.741 sec/batch)
2017-03-06 11:06:21.582085: step 100, loss = 4.6511 (573.8 examples/sec; 0.087 sec/batch)
2017-03-06 11:06:30.477807: step 200, loss = 4.6383 (574.4 examples/sec; 0.087 sec/batch)
2017-03-06 11:06:39.414354: step 300, loss = 4.6384 (571.7 examples/sec; 0.087 sec/batch)
2017-03-06 11:06:48.321369: step 400, loss = 4.6325 (573.8 examples/sec; 0.087 sec/batch)
2017-03-06 11:06:57.282613: step 500, loss = 4.6286 (574.5 examples/sec; 0.087 sec/batch)
2017-03-06 11:07:06.228359: step 600, loss = 4.6255 (573.9 examples/sec; 0.087 sec/batch)
2017-03-06 11:07:15.124878: step 700, loss = 4.6227 (573.1 examples/sec; 0.087 sec/batch)
2017-03-06 11:07:24.066611: step 800, loss = 4.6208 (572.0 examples/sec; 0.087 sec/batch)
2017-03-06 11:07:32.980238: step 900, loss = 4.6149 (573.5 examples/sec; 0.087 sec/batch)
2017-03-06 11:07:41.920004: step 1000, loss = 4.6121 (572.3 examples/sec; 0.087 sec/batch)
2017-03-06 1

In [13]:
# Compare the performance of Transfer learning and Fine Tuning
df = pd.DataFrame.from_records(transferInfo["stats"], index=[0])
df

Unnamed: 0,dataset,model,test,train,validation
0,CIFAR-100,VGG,0.01,0.0104,0.01


In [14]:
df = pd.DataFrame.from_records(fineTuningInfo["stats"], index=[0])
df

Unnamed: 0,dataset,model,test,train,validation
0,CIFAR-100,VGG,0.01,0.00994,0.01


In [15]:
# For completeness, lets see what a info object contains
import pprint
pprint.pprint(info, indent=4)

{   'args': {   'batch_size': 50,
                'checkpoint_path': '',
                'comment': '',
                'dataset': <dtb.inputs.Cifar10.Cifar10 object at 0x7f500ff05a58>,
                'epochs': 2,
                'exclude_scopes': '',
                'force_restart': False,
                'gd': {   'args': {   'beta1': 0.9,
                                      'beta2': 0.99,
                                      'epsilon': 1e-08,
                                      'learning_rate': 0.001},
                          'optimizer': <class 'tensorflow.python.training.adam.AdamOptimizer'>},
                'lr_decay': {'enabled': False, 'epochs': 25, 'factor': 0.1},
                'model': <dtb.models.VGG.VGG object at 0x7f500ff05978>,
                'regularizations': {   'augmentation': <function <lambda> at 0x7f500e46ca60>,
                                       'l2': 1e-05},
                'trainable_scopes': ''},
    'paths': {   'best': '/mnt/data/pgaleone/dtb_