In [1]:
# Let's see how to use DBT to:
# 1: train a VGG-like network on CIFAR-10
# 2: continue a train from the last iteration
# 3: do TRANSFER LEARNING from the trained model to another model that will be able to classify CIFAR-100
# 4: do FINE TUNING of the model trained on CIFAR-10 to solve the CIFAR-100 classification problem
# 5: compare the train/validation/test performance of the models

In [2]:
import pandas as pd
import pprint
import tensorflow as tf
from dytb.inputs import Cifar10, Cifar100
from dytb.train import train
from dytb.models.VGG import VGG

In [3]:
# Instantiate the model
vgg = VGG()

In [4]:
# Instantiate the CIFAR-10 input source
cifar10 = Cifar10.Cifar10()

In [5]:
# 1: Train VGG on Cifar10 for an Epoch

# Place the train process on GPU:0
device = '/gpu:0'
with tf.device(device):
    info = train(
        model=vgg,
        dataset=cifar10,
        hyperparameters={
            "epochs": 1,
            "batch_size": 50,
            "regularizations": {
                "l2": 1e-5,
                "augmentation": {
                    "name": "FlipLR",
                    "fn": tf.image.random_flip_left_right
                }
            },
            "gd": {
                "optimizer": tf.train.AdamOptimizer,
                "args": {
                    "learning_rate": 1e-3,
                    "beta1": 0.9,
                    "beta2": 0.99,
                    "epsilon": 1e-8
                }
            }
        },
        force_restart=True)

2017-03-09 10:22:02.488145: step 0, loss = 2.3831 (13.0 examples/sec; 3.850 sec/batch)
2017-03-09 10:22:11.150772: step 100, loss = 2.3359 (578.8 examples/sec; 0.086 sec/batch)
2017-03-09 10:22:20.163013: step 200, loss = 2.3398 (570.5 examples/sec; 0.088 sec/batch)
2017-03-09 10:22:29.187936: step 300, loss = 2.2427 (570.7 examples/sec; 0.088 sec/batch)
2017-03-09 10:22:38.141480: step 400, loss = 1.9855 (569.9 examples/sec; 0.088 sec/batch)
2017-03-09 10:22:47.100676: step 500, loss = 1.9199 (572.5 examples/sec; 0.087 sec/batch)
2017-03-09 10:22:56.063644: step 600, loss = 2.0006 (571.0 examples/sec; 0.088 sec/batch)
2017-03-09 10:23:05.067732: step 700, loss = 1.8361 (573.2 examples/sec; 0.087 sec/batch)
2017-03-09 10:23:14.003720: step 800, loss = 1.8202 (570.3 examples/sec; 0.088 sec/batch)
2017-03-09 10:23:23.006292: step 900, loss = 1.9278 (569.8 examples/sec; 0.088 sec/batch)
2017-03-09 10:23:31.998452: step 1000, loss = 1.7752 (571.4 examples/sec; 0.087 sec/batch)
2017-03-09 1

In [6]:
# Info containes every information related to the trained model.
# We're interested in stats only, thus we extract only them from the info dict
# Display the results in a table. Let's use a Pandas DataFrame for that
df = pd.DataFrame.from_records(info["stats"], index=[0])
df

Unnamed: 0,dataset,model,test,train,validation
0,CIFAR-10,VGG,0.3509,0.35594,0.3509


In [7]:
# 2: train it again for another epoch
# Note the `force_restart` parameter removed.
# `epochs` is the TOTAL number of epoch for the trained model
# Thus since we trained it before for a single epoch,
# we set "epochs": 2 in order to train it for another epoch

In [8]:
with tf.device(device):
    info = train(
        model=vgg,
        dataset=cifar10,
        hyperparameters={
            "epochs": 2,
            "batch_size": 50,
            "regularizations": {
                "l2": 1e-5,
                "augmentation": {
                    "name": "FlipLR",
                    "fn": tf.image.random_flip_left_right
                }
            },
            "gd": {
                "optimizer": tf.train.AdamOptimizer,
                "args": {
                    "learning_rate": 1e-3,
                    "beta1": 0.9,
                    "beta2": 0.99,
                    "epsilon": 1e-8
                }
            }
        })

2017-03-09 10:24:30.244383: step 1100, loss = 1.6141 (569.0 examples/sec; 0.088 sec/batch)
2017-03-09 10:24:39.355711: step 1200, loss = 1.5945 (568.4 examples/sec; 0.088 sec/batch)
2017-03-09 10:24:48.377192: step 1300, loss = 1.5742 (567.7 examples/sec; 0.088 sec/batch)
2017-03-09 10:24:57.380686: step 1400, loss = 1.6840 (566.8 examples/sec; 0.088 sec/batch)
2017-03-09 10:25:06.408212: step 1500, loss = 1.7028 (568.9 examples/sec; 0.088 sec/batch)
2017-03-09 10:25:15.413003: step 1600, loss = 1.3821 (564.5 examples/sec; 0.089 sec/batch)
2017-03-09 10:25:24.401205: step 1700, loss = 1.3084 (567.8 examples/sec; 0.088 sec/batch)
2017-03-09 10:25:33.378194: step 1800, loss = 1.5210 (566.6 examples/sec; 0.088 sec/batch)
2017-03-09 10:25:42.365851: step 1900, loss = 1.3080 (566.6 examples/sec; 0.088 sec/batch)
2017-03-09 10:25:51.353771: step 2000, loss = 0.9364 (566.4 examples/sec; 0.088 sec/batch)
2017-03-09 10:25:57.054625 (2): train accuracy = 0.500 validation accuracy = 0.559


In [9]:
# Display the results in a table. Let's use a Pandas DataFrame for that
df = pd.DataFrame.from_records(info["stats"], index=[0])
df

Unnamed: 0,dataset,model,test,train,validation
0,CIFAR-10,VGG,0.559,0.57408,0.559


In [10]:
# Save last trained model info
vggInfo = info

In [11]:
# 3: TRANSFER LEARNING
# Use the best model trained on Cifar10, to classify Cifar 100 images.
# Thus we train ONLY the softmax linear scope (that has 100 neurons, now),
# keeping constant any other previosly trained layer
# We load the weights from the previous trained model, or better
# DyTB saves the "best" model (w.r.t. a metric) in a separate folder
# So we extract the info["paths"]["best"] path, that's the path of the best
# model trained so far.
cifar100 = Cifar100.Cifar100()
with tf.device(device):
    transferInfo = train(
        model=vgg,
        dataset=cifar100,
        hyperparameters={
            "epochs": 1,
            "batch_size": 50,
            "regularizations": {
                "l2": 1e-5,
                "augmentation": {
                    "name": "FlipLR",
                    "fn": tf.image.random_flip_left_right
                }
            },
            "gd": {
                "optimizer": tf.train.AdamOptimizer,
                "args": {
                    "learning_rate": 1e-3,
                    "beta1": 0.9,
                    "beta2": 0.99,
                    "epsilon": 1e-8
                    }
                }
        },
        force_restart=True,
        surgery={
            "checkpoint_path": vggInfo["paths"]["best"],
            "exclude_scopes": "VGG/softmax_linear",
            "trainable_scopes": "VGG/softmax_linear"
        })

2017-03-09 10:26:40.740317: step 0, loss = 4.7881 (18.2 examples/sec; 2.751 sec/batch)
2017-03-09 10:26:49.798616: step 100, loss = 4.6482 (571.8 examples/sec; 0.087 sec/batch)
2017-03-09 10:26:58.722593: step 200, loss = 4.6415 (570.5 examples/sec; 0.088 sec/batch)
2017-03-09 10:27:07.663329: step 300, loss = 4.6426 (567.4 examples/sec; 0.088 sec/batch)
2017-03-09 10:27:16.674381: step 400, loss = 4.6374 (572.5 examples/sec; 0.087 sec/batch)
2017-03-09 10:27:25.641717: step 500, loss = 4.6342 (568.5 examples/sec; 0.088 sec/batch)
2017-03-09 10:27:34.601394: step 600, loss = 4.6304 (570.7 examples/sec; 0.088 sec/batch)
2017-03-09 10:27:43.630895: step 700, loss = 4.6243 (567.8 examples/sec; 0.088 sec/batch)
2017-03-09 10:27:52.641419: step 800, loss = 4.6219 (570.7 examples/sec; 0.088 sec/batch)
2017-03-09 10:28:01.675475: step 900, loss = 4.6192 (569.8 examples/sec; 0.088 sec/batch)
2017-03-09 10:28:10.648537: step 1000, loss = 4.6115 (567.3 examples/sec; 0.088 sec/batch)
2017-03-09 1

In [12]:
# 4: FINE TUNING:
# Use the model pointed by vggInfo to fine tune the whole network
# and tune it on Cifar100.
# Let's retrain the whole network end-to-end, starting from the learned weights
# Just remove the "traiable_scopes" section from the surgery parameter
with tf.device(device):
    fineTuningInfo = train(
        model=vgg,
        dataset=cifar100,
        hyperparameters={
            "epochs": 1,
            "batch_size": 50,
            "regularizations": {
                "l2": 1e-5,
                "augmentation": {
                    "name": "FlipLR",
                    "fn": tf.image.random_flip_left_right
                }
            },
            "gd": {
                "optimizer": tf.train.AdamOptimizer,
                "args": {
                    "learning_rate": 1e-3,
                    "beta1": 0.9,
                    "beta2": 0.99,
                    "epsilon": 1e-8
                }
            }
        },
        force_restart=True,
        surgery={
            "checkpoint_path": vggInfo["paths"]["best"],
            "exclude_scopes": "VGG/softmax_linear"
        })


2017-03-09 10:29:00.201507: step 0, loss = 4.6236 (18.2 examples/sec; 2.743 sec/batch)
2017-03-09 10:29:09.263967: step 100, loss = 4.6435 (570.5 examples/sec; 0.088 sec/batch)
2017-03-09 10:29:18.263172: step 200, loss = 4.6449 (571.9 examples/sec; 0.087 sec/batch)
2017-03-09 10:29:27.198371: step 300, loss = 4.6407 (573.9 examples/sec; 0.087 sec/batch)
2017-03-09 10:29:36.164529: step 400, loss = 4.6365 (572.2 examples/sec; 0.087 sec/batch)
2017-03-09 10:29:45.133434: step 500, loss = 4.6321 (569.9 examples/sec; 0.088 sec/batch)
2017-03-09 10:29:54.071067: step 600, loss = 4.6356 (570.5 examples/sec; 0.088 sec/batch)
2017-03-09 10:30:03.000208: step 700, loss = 4.6269 (574.1 examples/sec; 0.087 sec/batch)
2017-03-09 10:30:11.989766: step 800, loss = 4.6260 (571.1 examples/sec; 0.088 sec/batch)
2017-03-09 10:30:20.974848: step 900, loss = 4.6198 (568.8 examples/sec; 0.088 sec/batch)
2017-03-09 10:30:30.015285: step 1000, loss = 4.6232 (567.9 examples/sec; 0.088 sec/batch)
2017-03-09 1

In [13]:
# Compare the performance of Transfer learning and Fine Tuning
df = pd.DataFrame.from_records(transferInfo["stats"], index=[0])
df

Unnamed: 0,dataset,model,test,train,validation
0,CIFAR-100,VGG,0.01,0.01024,0.01


In [14]:
df = pd.DataFrame.from_records(fineTuningInfo["stats"], index=[0])
df

Unnamed: 0,dataset,model,test,train,validation
0,CIFAR-100,VGG,0.01,0.01038,0.01


In [15]:
# For completeness, lets see what a info object contains
pprint.pprint(info, indent=4)

{   'args': {   'batch_size': 50,
                'checkpoint_path': '',
                'comment': '',
                'dataset': <dytb.inputs.Cifar10.Cifar10 object at 0x7f896c19a1d0>,
                'epochs': 2,
                'exclude_scopes': '',
                'force_restart': False,
                'gd': {   'args': {   'beta1': 0.9,
                                      'beta2': 0.99,
                                      'epsilon': 1e-08,
                                      'learning_rate': 0.001},
                          'optimizer': <class 'tensorflow.python.training.adam.AdamOptimizer'>},
                'lr_decay': {'enabled': False, 'epochs': 25, 'factor': 0.1},
                'model': <dytb.models.VGG.VGG object at 0x7f896c19a128>,
                'regularizations': {   'augmentation': <function random_flip_left_right at 0x7f89109cb0d0>,
                                       'l2': 1e-05},
                'trainable_scopes': ''},
    'paths': {   'best': '/mnt/da