In [1]:
"""
Example on how to use HDF5 dataset with TFLearn. HDF5 is a data model,
library, and file format for storing and managing data. It can handle large
dataset that could not fit totally in ram memory. Note that this example
just give a quick compatibility demonstration. In practice, there is no so
real need to use HDF5 for small dataset such as CIFAR-10.
"""

from __future__ import division, print_function, absolute_import

import tflearn
from tflearn.layers.core import *
from tflearn.layers.conv import *
from tflearn.data_utils import *
from tflearn.layers.normalization import *
from tflearn.layers.estimator import regression

import h5py

Scipy not supported!


In [2]:
# CIFAR-10 Dataset
from tflearn.datasets import cifar10
(X, Y), (X_test, Y_test) = cifar10.load_data()
Y = to_categorical(Y, 10)
Y_test = to_categorical(Y_test, 10)

# Create a hdf5 dataset from CIFAR-10 numpy array
import h5py
h5f = h5py.File('data.h5', 'w')
h5f.create_dataset('cifar10_X', data=X)
h5f.create_dataset('cifar10_Y', data=Y)
h5f.create_dataset('cifar10_X_test', data=X_test)
h5f.create_dataset('cifar10_Y_test', data=Y_test)
h5f.close()

In [3]:
# Load hdf5 dataset
h5f = h5py.File('data.h5', 'r')
X_h = h5f['cifar10_X']
Y_h = h5f['cifar10_Y']
X_test_h = h5f['cifar10_X_test']
Y_test_h = h5f['cifar10_Y_test']

In [4]:
# Build network
network = input_data(shape=[None, 32, 32, 3], dtype=tf.float32)
network = conv_2d(network, 32, 3, activation='relu')
network = max_pool_2d(network, 2)
network = conv_2d(network, 64, 3, activation='relu')
network = conv_2d(network, 64, 3, activation='relu')
network = max_pool_2d(network, 2)
network = fully_connected(network, 512, activation='relu')
network = dropout(network, 0.5)
network = fully_connected(network, 10, activation='softmax')
network = regression(network, optimizer='adam',
                     loss='categorical_crossentropy',
                     learning_rate=0.001)

In [12]:
import time

class Timer(object):
    def __init__(self, verbose=False):
        self.verbose = verbose

    def __enter__(self):
        self.start = time.time()
        return self

    def __exit__(self, *args):
        self.end = time.time()
        self.secs = self.end - self.start
        self.msecs = self.secs * 1000  # millisecs
        if self.verbose:
            print ('elapsed time: %f ms'%self.msecs)

In [17]:
# Training using H5py Data
with Timer() as t:
    model = tflearn.DNN(network, tensorboard_verbose=0)
    model.fit(X_h, Y_h, n_epoch=2, shuffle=True, validation_set=(X_test_h, Y_test_h),
              show_metric=True, batch_size=96, run_id='cifar10_cnn_h5')

    h5f.close()
print ("=> elasped lpush: %s s" % t.secs)

Training Step: 1042  | total loss: [1m[32m0.99687[0m[0m
| Adam | epoch: 002 | loss: 0.99687 - acc: 0.6543 | val_loss: 0.96694 - val_acc: 0.6569 -- iter: 50000/50000
Training Step: 1042  | total loss: [1m[32m0.99687[0m[0m
| Adam | epoch: 002 | loss: 0.99687 - acc: 0.6543 | val_loss: 0.96694 - val_acc: 0.6569 -- iter: 50000/50000
--
=> elasped lpush: 212.398781061 s


In [14]:
# Training using loaded data
with Timer() as t:
    model = tflearn.DNN(network, tensorboard_verbose=0)
    model.fit(X, Y, n_epoch=2, shuffle=True, validation_set=(X_test, Y_test),
              show_metric=True, batch_size=96, run_id='cifar10_cnn')
print ("=> elasped lpush: %s s" % t.secs)

Training Step: 1042  | total loss: [1m[32m0.95795[0m[0m
| Adam | epoch: 002 | loss: 0.95795 - acc: 0.6690 | val_loss: 0.98746 - val_acc: 0.6522 -- iter: 50000/50000
Training Step: 1042  | total loss: [1m[32m0.95795[0m[0m
| Adam | epoch: 002 | loss: 0.95795 - acc: 0.6690 | val_loss: 0.98746 - val_acc: 0.6522 -- iter: 50000/50000
--
=> elasped lpush: 159.402976036 s
