In [1]:
# Let's see how to use DBT to:
# 1: train a VGG-like network on CIFAR-10
# 2: continue a train from the last iteration
# 3: do TRANSFER LEARNING from the trained model to another model that will be able to classify CIFAR-100
# 4: do FINE TUNING of the model trained on CIFAR-10 to solve the CIFAR-100 classification problem
# 5: compare the train/validation/test performance of the models

In [2]:
import tensorflow as tf
from dtb.inputs import Cifar10, Cifar100
from dtb.train import train
from dtb.models.VGG import VGG
import pandas as pd

In [3]:
# Instantiate the model
vgg = VGG()

In [4]:
# Instantiate the CIFAR-10 input source
cifar10 = Cifar10.Cifar10()

In [5]:
# 1: Train VGG on Cifar10 for an Epoch

# Place the train process on GPU:0
device = '/gpu:0'
with tf.device(device):
    info = train(
        model=vgg,
        dataset=cifar10,
        hyperparameters={
            "epochs": 1,
            "batch_size": 50,
            "regularizations": {
                "l2": 1e-5,
                "augmentation": lambda image: tf.image.random_flip_left_right(image)
            },
            "gd": {
                "optimizer": tf.train.AdamOptimizer,
                "args": {
                    "learning_rate": 1e-3,
                    "beta1": 0.9,
                    "beta2": 0.99,
                    "epsilon": 1e-8
                }
            }
        },
        force_restart=True)

2017-03-04 11:03:15.326962: step 0, loss = 2.6192 (16.6 examples/sec; 3.016 sec/batch)
2017-03-04 11:03:23.936923: step 100, loss = 2.2548 (608.9 examples/sec; 0.082 sec/batch)
2017-03-04 11:03:32.367810: step 200, loss = 1.9043 (606.1 examples/sec; 0.082 sec/batch)
2017-03-04 11:03:40.835439: step 300, loss = 1.7957 (608.4 examples/sec; 0.082 sec/batch)
2017-03-04 11:03:49.315331: step 400, loss = 1.9559 (610.3 examples/sec; 0.082 sec/batch)
2017-03-04 11:03:57.787514: step 500, loss = 2.0098 (611.3 examples/sec; 0.082 sec/batch)
2017-03-04 11:04:06.246194: step 600, loss = 1.6377 (605.0 examples/sec; 0.083 sec/batch)
2017-03-04 11:04:14.943923: step 700, loss = 1.7483 (578.4 examples/sec; 0.086 sec/batch)
2017-03-04 11:04:23.910537: step 800, loss = 1.4907 (572.0 examples/sec; 0.087 sec/batch)
2017-03-04 11:04:32.906995: step 900, loss = 1.6349 (569.5 examples/sec; 0.088 sec/batch)
2017-03-04 11:04:41.970235: step 1000, loss = 1.5109 (567.9 examples/sec; 0.088 sec/batch)
2017-03-04 1

In [7]:
# Display the results in a table. Let's use a Pandas DataFrame for that
df = pd.DataFrame.from_records(info, index=[0], exclude=["checkpoint_path"])
df

Unnamed: 0,dataset,model,test,train,validation
0,CIFAR-10,VGG,0.4149,0.42068,0.4149


In [8]:
# 2: train it again for another epoch
# Note the `force_restart` parameter removed.
# `epochs` is the TOTAL number of epoch for the trained model
# Thus since we trained it before for a single epoch,
# we set "epochs": 2 in order to train it for another epoch

In [9]:
with tf.device(device):
    info = train(
        model=vgg,
        dataset=cifar10,
        hyperparameters={
            "epochs": 2,
            "batch_size": 50,
            "regularizations": {
                "l2": 1e-5,
                "augmentation": lambda image: tf.image.random_flip_left_right(image)
            },
            "gd": {
                "optimizer": tf.train.AdamOptimizer,
                "args": {
                    "learning_rate": 1e-3,
                    "beta1": 0.9,
                    "beta2": 0.99,
                    "epsilon": 1e-8
                }
            }
        })

2017-03-04 11:06:38.600618: step 1100, loss = 1.1378 (606.9 examples/sec; 0.082 sec/batch)
2017-03-04 11:06:47.345128: step 1200, loss = 1.8463 (568.2 examples/sec; 0.088 sec/batch)
2017-03-04 11:06:56.400219: step 1300, loss = 1.3092 (566.9 examples/sec; 0.088 sec/batch)
2017-03-04 11:07:05.447712: step 1400, loss = 1.4720 (565.6 examples/sec; 0.088 sec/batch)
2017-03-04 11:07:14.512162: step 1500, loss = 1.1799 (567.3 examples/sec; 0.088 sec/batch)
2017-03-04 11:07:23.532545: step 1600, loss = 1.2896 (567.8 examples/sec; 0.088 sec/batch)
2017-03-04 11:07:32.564840: step 1700, loss = 1.1191 (566.9 examples/sec; 0.088 sec/batch)
2017-03-04 11:07:41.617256: step 1800, loss = 1.3339 (566.8 examples/sec; 0.088 sec/batch)
2017-03-04 11:07:50.660223: step 1900, loss = 0.8843 (569.4 examples/sec; 0.088 sec/batch)
2017-03-04 11:07:59.701727: step 2000, loss = 0.9021 (565.3 examples/sec; 0.088 sec/batch)
2017-03-04 11:08:10.669972 (2): train accuracy = 0.640 validation accuracy = 0.623


In [10]:
# Display the results in a table. Let's use a Pandas DataFrame for that
df = pd.DataFrame.from_records(info, index=[0], exclude=["checkpoint_path"])
df

Unnamed: 0,dataset,model,test,train,validation
0,CIFAR-10,VGG,0.6233,0.63194,0.6233


In [11]:
# Save last trained model info
vggInfo = info

In [13]:
# 3: TRANSFER LEARNING
# Use the best model trained on Cifar10, to classify Cifar 100 images.
# Thus we train ONLY the softmax linear scope (that has 100 neurons, now),
# keeping constant any other previosly trained layer
cifar100 = Cifar100.Cifar100()
with tf.device("/gpu:0"):
    transferInfo = train(
        model=vgg,
        dataset=cifar100,
        hyperparameters={
            "epochs": 1,
            "batch_size": 50,
            "regularizations": {
                "l2": 1e-5,
                "augmentation": lambda image: tf.image.random_flip_left_right(image)
            },
            "gd": {
                "optimizer": tf.train.AdamOptimizer,
                "args": {
                    "learning_rate": 1e-3,
                    "beta1": 0.9,
                    "beta2": 0.99,
                    "epsilon": 1e-8
                    }
                }
        },
        force_restart=True,
        surgery={
            "checkpoint_path": vggInfo["checkpoint_path"],
            "exclude_scopes": "VGG/softmax_linear",
            "trainable_scopes": "VGG/softmax_linear"
        })

2017-03-04 11:10:41.172741: step 0, loss = 4.7992 (24.9 examples/sec; 2.008 sec/batch)
2017-03-04 11:10:49.769400: step 100, loss = 4.6473 (610.4 examples/sec; 0.082 sec/batch)
2017-03-04 11:10:58.212552: step 200, loss = 4.6428 (610.5 examples/sec; 0.082 sec/batch)
2017-03-04 11:11:06.706536: step 300, loss = 4.6416 (602.7 examples/sec; 0.083 sec/batch)
2017-03-04 11:11:15.596490: step 400, loss = 4.6367 (569.7 examples/sec; 0.088 sec/batch)
2017-03-04 11:11:24.631536: step 500, loss = 4.6333 (571.7 examples/sec; 0.087 sec/batch)
2017-03-04 11:11:33.612108: step 600, loss = 4.6270 (572.3 examples/sec; 0.087 sec/batch)
2017-03-04 11:11:42.590059: step 700, loss = 4.6290 (571.0 examples/sec; 0.088 sec/batch)
2017-03-04 11:11:51.569099: step 800, loss = 4.6209 (574.1 examples/sec; 0.087 sec/batch)
2017-03-04 11:12:00.594690: step 900, loss = 4.6149 (568.0 examples/sec; 0.088 sec/batch)
2017-03-04 11:12:09.587846: step 1000, loss = 4.6160 (569.8 examples/sec; 0.088 sec/batch)
2017-03-04 1

In [15]:
# 4: FINE TUNING:
# Use the model pointed by vggInfo to fine tune the whole network
# and tune it on Cifar100.
# Let's retrain the whole network end-to-end, starting from the learned weights
# Just remove the "traiable_scopes" section from the surgery parameter
with tf.device("/gpu:0"):
    fineTuningInfo = train(
        model=vgg,
        dataset=cifar100,
        hyperparameters={
            "epochs": 1,
            "batch_size": 50,
            "regularizations": {
                "l2": 1e-5,
                "augmentation": lambda image: tf.image.random_flip_left_right(image)
            },
            "gd": {
                "optimizer": tf.train.AdamOptimizer,
                "args": {
                    "learning_rate": 1e-3,
                    "beta1": 0.9,
                    "beta2": 0.99,
                    "epsilon": 1e-8
                }
            }
        },
        force_restart=True,
        surgery={
            "checkpoint_path": vggInfo["checkpoint_path"],
            "exclude_scopes": "VGG/softmax_linear"
        })


2017-03-04 11:16:28.748857: step 0, loss = 4.6900 (24.9 examples/sec; 2.005 sec/batch)
2017-03-04 11:16:37.754521: step 100, loss = 4.6454 (568.3 examples/sec; 0.088 sec/batch)
2017-03-04 11:16:46.741190: step 200, loss = 4.5441 (573.5 examples/sec; 0.087 sec/batch)
2017-03-04 11:16:55.690840: step 300, loss = 4.5330 (572.5 examples/sec; 0.087 sec/batch)
2017-03-04 11:17:04.721462: step 400, loss = 4.2281 (573.6 examples/sec; 0.087 sec/batch)
2017-03-04 11:17:13.713107: step 500, loss = 4.2387 (573.5 examples/sec; 0.087 sec/batch)
2017-03-04 11:17:22.674802: step 600, loss = 4.3677 (569.6 examples/sec; 0.088 sec/batch)
2017-03-04 11:17:31.670357: step 700, loss = 4.2438 (572.0 examples/sec; 0.087 sec/batch)
2017-03-04 11:17:40.686201: step 800, loss = 4.2133 (573.3 examples/sec; 0.087 sec/batch)
2017-03-04 11:17:49.674840: step 900, loss = 4.2656 (573.1 examples/sec; 0.087 sec/batch)
2017-03-04 11:17:58.704426: step 1000, loss = 4.0167 (572.1 examples/sec; 0.087 sec/batch)
2017-03-04 1

In [17]:
# Compare the performance of Transfer learning and Fine Tuning
df = pd.DataFrame.from_records(transferInfo, index=[0], exclude=["checkpoint_path"])
df

Unnamed: 0,dataset,model,test,train,validation
0,CIFAR-100,VGG,0.01,0.00992,0.01


In [18]:
df = pd.DataFrame.from_records(fineTuningInfo, index=[0], exclude=["checkpoint_path"])
df

Unnamed: 0,dataset,model,test,train,validation
0,CIFAR-100,VGG,0.0438,0.04466,0.0438
