In [1]:
DATASET_PATH="/data/edu/projects/learn/ml/deeplearning/data/caltech101"

### Split the dataset into training, validation and test sets

In [3]:
def copy_files(src, dst):
    import os, shutil
    
    if not os.path.exists(dst):
        os.makedirs(dst)
    
    for file in src:
        shutil.copy2(file, dst)

def split_data(dataset, ratio=(0.6,0.2,0.2)):
    import os, shutil
    from random import shuffle
    
    data = {}
    data['labels'] = [x for x in os.listdir(os.path.join(dataset,'raw')) if os.path.isdir(os.path.join(dataset, 'raw', x))]
    data['training'], data['validation'], data['test'] = {}, {}, {}
    
    for label in data['labels']:
        images = [x for x in os.listdir(os.path.join(dataset, 'raw', label))]
        
        shuffle(images)
        
        train_idx = int(len(images) * ratio[0])
        val_idx = train_idx + int(len(images) * ratio[1])
        training, validation, test = images[slice(0,train_idx)], images[slice(train_idx,val_idx)], images[slice(val_idx,len(images))]
        
        data['training'][label] = [os.path.join(dataset,'raw',label,image) for image in training]
        data['validation'][label] = [os.path.join(dataset,'raw',label,image) for image in validation]
        data['test'][label] = [os.path.join(dataset,'raw',label,image) for image in test]
        
        copy_files(data['training'][label], os.path.join(dataset, 'run', 'training', label))
        copy_files(data['validation'][label], os.path.join(dataset, 'run', 'validation', label))
        copy_files(data['test'][label], os.path.join(dataset, 'run', 'test', label))
    
    return data

data = split_data(DATASET_PATH)

### Create One Hot encoding for labels

In [4]:
from sklearn.preprocessing import LabelBinarizer
labels_onehot = LabelBinarizer().fit_transform(data['labels'])

### Setup Preprocessing for images

In [5]:
import os
import keras
import tensorflow as tf
from keras.preprocessing.image import ImageDataGenerator

train_idg = ImageDataGenerator(
      samplewise_center            = True
    , samplewise_std_normalization = True
    , fill_mode                    = 'nearest'
    , rescale                      = 1./255
)

train_idg_generator = train_idg.flow_from_directory(
      directory   = os.path.join(DATASET_PATH,'run','training')
    , target_size = (224,224)
    , color_mode  = 'rgb'
    , class_mode  = 'categorical'
    , batch_size  = 32
)

val_idg = ImageDataGenerator(
      rescale                      = 1./255
)

val_idg_generator = val_idg.flow_from_directory(
      directory   = os.path.join(DATASET_PATH,'run','validation')
    , target_size = ()
    , color_mode  = 'rgb'
    , class_mode  = 'categorical'
)

Using TensorFlow backend.


Found 5171 images belonging to 101 classes.
Found 1695 images belonging to 101 classes.


### Build VGG19 Network

In [6]:
from keras.models import Sequential
from keras.layers import Conv2D, BatchNormalization, Activation, MaxPooling2D, Flatten, Dense, Dropout
from keras.initializers import he_normal

vgg19 = [
    
    ## Block 1
      Conv2D(filters=64, kernel_size=(3,3), padding='same', kernel_regularizer=keras.regularizers.l2(0.01), name='conv1_1', input_shape=(224,224,3))
    , BatchNormalization()
    , Activation('relu')
    , Conv2D(filters=64, kernel_size=(3,3), padding='same', kernel_initializer=he_normal(), name='conv1_2')
    , BatchNormalization()
    , Activation('relu')
    , MaxPooling2D((2,2), strides=(2,2), name='conv1_pool')
    
    ## Block 2
    , Conv2D(filters=128, kernel_size=(3,3), padding='same', kernel_initializer=he_normal(), name='conv2_1')
    , BatchNormalization()
    , Activation('relu')
    , Conv2D(filters=128, kernel_size=(3,3), padding='same', kernel_initializer=he_normal(), name='conv2_2')
    , BatchNormalization()
    , Activation('relu')
    , MaxPooling2D((2,2), strides=(2,2), name='conv2_pool')
    
    ## Block 3
    , Conv2D(filters=256, kernel_size=(3,3), padding='same', kernel_initializer=he_normal(), name='conv3_1')
    , BatchNormalization()
    , Activation('relu')
    , Conv2D(filters=256, kernel_size=(3,3), padding='same', kernel_initializer=he_normal(), name='conv3_2')
    , BatchNormalization()
    , Activation('relu')
    , Conv2D(filters=256, kernel_size=(3,3), padding='same', kernel_initializer=he_normal(), name='conv3_3')
    , BatchNormalization()
    , Activation('relu')
    , Conv2D(filters=256, kernel_size=(3,3), padding='same', kernel_initializer=he_normal(), name='conv3_4')
    , BatchNormalization()
    , Activation('relu')
    , MaxPooling2D((2,2), strides=(2,2), name='conv3_pool')
    
    ## Block 4
    , Conv2D(filters=512, kernel_size=(3,3), padding='same', kernel_initializer=he_normal(), name='conv4_1')
    , BatchNormalization()
    , Activation('relu')
    , Conv2D(filters=512, kernel_size=(3,3), padding='same', kernel_initializer=he_normal(), name='conv4_2')
    , BatchNormalization()
    , Activation('relu')
    , Conv2D(filters=512, kernel_size=(3,3), padding='same', kernel_initializer=he_normal(), name='conv4_3')
    , BatchNormalization()
    , Activation('relu')
    , Conv2D(filters=512, kernel_size=(3,3), padding='same', kernel_initializer=he_normal(), name='conv4_4')
    , BatchNormalization()
    , Activation('relu')
    , MaxPooling2D((2,2), strides=(2,2), name='conv4_pool')
    
    ## Block 5
    , Conv2D(filters=1024, kernel_size=(3,3), padding='same', kernel_initializer=he_normal(), name='conv5_1')
    , BatchNormalization()
    , Activation('relu')
    , Conv2D(filters=1024, kernel_size=(3,3), padding='same', kernel_initializer=he_normal(), name='conv5_2')
    , BatchNormalization()
    , Activation('relu')
    , Conv2D(filters=1024, kernel_size=(3,3), padding='same', kernel_initializer=he_normal(), name='conv5_3')
    , BatchNormalization()
    , Activation('relu')
    , Conv2D(filters=1024, kernel_size=(3,3), padding='same', kernel_initializer=he_normal(), name='conv5_4')
    , BatchNormalization()
    , Activation('relu')
    , MaxPooling2D((2,2), strides=(2,2), name='conv5_pool')
    
    ## Block 6
    , Flatten(name='flatten')
    , Dense(units=4096, use_bias=True, kernel_initializer=he_normal(), name='fc1')
    , BatchNormalization()
    , Activation('relu')
    , Dropout(0.4)
    , Dense(units=4096, use_bias=True, kernel_initializer=he_normal(), name='fc2')
    , BatchNormalization()
    , Activation('relu')
    , Dropout(0.4)
    , Dense(units=101, kernel_initializer=he_normal(), name='predictions')
    , BatchNormalization()
    , Activation('softmax')
]

model = Sequential(vgg19)

### Compile the mode

In [7]:
from keras.optimizers import Adam

model.compile(
      optimizer = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, decay=1e-6)
    , loss      = 'categorical_crossentropy'
    , metrics   = ['acc']
)

### Train the model

In [8]:
from keras.callbacks import TensorBoard

model.fit_generator(
      generator        = train_idg_generator
    , validation_data  = val_idg_generator
    , epochs           = 50
    , steps_per_epoch  = 5171/32
    , validation_steps = 1695/32
    , max_queue_size   = 100
    , callbacks        = [TensorBoard(
          log_dir='/data/tmp/trash/caltech101'
    )]
)

ResourceExhaustedError: OOM when allocating tensor with shape[50176,4096] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
	 [[node training/Adam/Variable_64/Assign (defined at /opt/anaconda2/envs/tensorflow/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py:402)  = Assign[T=DT_FLOAT, _grappler_relax_allocator_constraints=true, use_locking=true, validate_shape=true, _device="/job:localhost/replica:0/task:0/device:GPU:0"](training/Adam/Variable_64, training/Adam/zeros_140)]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.


Caused by op 'training/Adam/Variable_64/Assign', defined at:
  File "/opt/anaconda2/envs/tensorflow/lib/python3.6/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/opt/anaconda2/envs/tensorflow/lib/python3.6/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/opt/anaconda2/envs/tensorflow/lib/python3.6/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/opt/anaconda2/envs/tensorflow/lib/python3.6/site-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/opt/anaconda2/envs/tensorflow/lib/python3.6/site-packages/ipykernel/kernelapp.py", line 505, in start
    self.io_loop.start()
  File "/opt/anaconda2/envs/tensorflow/lib/python3.6/site-packages/tornado/platform/asyncio.py", line 132, in start
    self.asyncio_loop.run_forever()
  File "/opt/anaconda2/envs/tensorflow/lib/python3.6/asyncio/base_events.py", line 427, in run_forever
    self._run_once()
  File "/opt/anaconda2/envs/tensorflow/lib/python3.6/asyncio/base_events.py", line 1440, in _run_once
    handle._run()
  File "/opt/anaconda2/envs/tensorflow/lib/python3.6/asyncio/events.py", line 145, in _run
    self._callback(*self._args)
  File "/opt/anaconda2/envs/tensorflow/lib/python3.6/site-packages/tornado/ioloop.py", line 758, in _run_callback
    ret = callback()
  File "/opt/anaconda2/envs/tensorflow/lib/python3.6/site-packages/tornado/stack_context.py", line 300, in null_wrapper
    return fn(*args, **kwargs)
  File "/opt/anaconda2/envs/tensorflow/lib/python3.6/site-packages/tornado/gen.py", line 1233, in inner
    self.run()
  File "/opt/anaconda2/envs/tensorflow/lib/python3.6/site-packages/tornado/gen.py", line 1147, in run
    yielded = self.gen.send(value)
  File "/opt/anaconda2/envs/tensorflow/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 357, in process_one
    yield gen.maybe_future(dispatch(*args))
  File "/opt/anaconda2/envs/tensorflow/lib/python3.6/site-packages/tornado/gen.py", line 326, in wrapper
    yielded = next(result)
  File "/opt/anaconda2/envs/tensorflow/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 267, in dispatch_shell
    yield gen.maybe_future(handler(stream, idents, msg))
  File "/opt/anaconda2/envs/tensorflow/lib/python3.6/site-packages/tornado/gen.py", line 326, in wrapper
    yielded = next(result)
  File "/opt/anaconda2/envs/tensorflow/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 534, in execute_request
    user_expressions, allow_stdin,
  File "/opt/anaconda2/envs/tensorflow/lib/python3.6/site-packages/tornado/gen.py", line 326, in wrapper
    yielded = next(result)
  File "/opt/anaconda2/envs/tensorflow/lib/python3.6/site-packages/ipykernel/ipkernel.py", line 294, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/opt/anaconda2/envs/tensorflow/lib/python3.6/site-packages/ipykernel/zmqshell.py", line 536, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/opt/anaconda2/envs/tensorflow/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2819, in run_cell
    raw_cell, store_history, silent, shell_futures)
  File "/opt/anaconda2/envs/tensorflow/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2845, in _run_cell
    return runner(coro)
  File "/opt/anaconda2/envs/tensorflow/lib/python3.6/site-packages/IPython/core/async_helpers.py", line 67, in _pseudo_sync_runner
    coro.send(None)
  File "/opt/anaconda2/envs/tensorflow/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 3020, in run_cell_async
    interactivity=interactivity, compiler=compiler, result=result)
  File "/opt/anaconda2/envs/tensorflow/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 3191, in run_ast_nodes
    if (yield from self.run_code(code, result)):
  File "/opt/anaconda2/envs/tensorflow/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 3267, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-8-208c2136301e>", line 11, in <module>
    log_dir='/data/tmp/trash/caltech101'
  File "/opt/anaconda2/envs/tensorflow/lib/python3.6/site-packages/keras/legacy/interfaces.py", line 91, in wrapper
    return func(*args, **kwargs)
  File "/opt/anaconda2/envs/tensorflow/lib/python3.6/site-packages/keras/engine/training.py", line 1418, in fit_generator
    initial_epoch=initial_epoch)
  File "/opt/anaconda2/envs/tensorflow/lib/python3.6/site-packages/keras/engine/training_generator.py", line 40, in fit_generator
    model._make_train_function()
  File "/opt/anaconda2/envs/tensorflow/lib/python3.6/site-packages/keras/engine/training.py", line 509, in _make_train_function
    loss=self.total_loss)
  File "/opt/anaconda2/envs/tensorflow/lib/python3.6/site-packages/keras/legacy/interfaces.py", line 91, in wrapper
    return func(*args, **kwargs)
  File "/opt/anaconda2/envs/tensorflow/lib/python3.6/site-packages/keras/optimizers.py", line 487, in get_updates
    ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
  File "/opt/anaconda2/envs/tensorflow/lib/python3.6/site-packages/keras/optimizers.py", line 487, in <listcomp>
    ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
  File "/opt/anaconda2/envs/tensorflow/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py", line 704, in zeros
    return variable(v, dtype=dtype, name=name)
  File "/opt/anaconda2/envs/tensorflow/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py", line 402, in variable
    v = tf.Variable(value, dtype=tf.as_dtype(dtype), name=name)
  File "/opt/anaconda2/envs/tensorflow/lib/python3.6/site-packages/tensorflow/python/ops/variables.py", line 183, in __call__
    return cls._variable_v1_call(*args, **kwargs)
  File "/opt/anaconda2/envs/tensorflow/lib/python3.6/site-packages/tensorflow/python/ops/variables.py", line 146, in _variable_v1_call
    aggregation=aggregation)
  File "/opt/anaconda2/envs/tensorflow/lib/python3.6/site-packages/tensorflow/python/ops/variables.py", line 125, in <lambda>
    previous_getter = lambda **kwargs: default_variable_creator(None, **kwargs)
  File "/opt/anaconda2/envs/tensorflow/lib/python3.6/site-packages/tensorflow/python/ops/variable_scope.py", line 2444, in default_variable_creator
    expected_shape=expected_shape, import_scope=import_scope)
  File "/opt/anaconda2/envs/tensorflow/lib/python3.6/site-packages/tensorflow/python/ops/variables.py", line 187, in __call__
    return super(VariableMetaclass, cls).__call__(*args, **kwargs)
  File "/opt/anaconda2/envs/tensorflow/lib/python3.6/site-packages/tensorflow/python/ops/variables.py", line 1329, in __init__
    constraint=constraint)
  File "/opt/anaconda2/envs/tensorflow/lib/python3.6/site-packages/tensorflow/python/ops/variables.py", line 1481, in _init_from_args
    validate_shape=validate_shape).op
  File "/opt/anaconda2/envs/tensorflow/lib/python3.6/site-packages/tensorflow/python/ops/state_ops.py", line 221, in assign
    validate_shape=validate_shape)
  File "/opt/anaconda2/envs/tensorflow/lib/python3.6/site-packages/tensorflow/python/ops/gen_state_ops.py", line 61, in assign
    use_locking=use_locking, name=name)
  File "/opt/anaconda2/envs/tensorflow/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
    op_def=op_def)
  File "/opt/anaconda2/envs/tensorflow/lib/python3.6/site-packages/tensorflow/python/util/deprecation.py", line 488, in new_func
    return func(*args, **kwargs)
  File "/opt/anaconda2/envs/tensorflow/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 3274, in create_op
    op_def=op_def)
  File "/opt/anaconda2/envs/tensorflow/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1770, in __init__
    self._traceback = tf_stack.extract_stack()

ResourceExhaustedError (see above for traceback): OOM when allocating tensor with shape[50176,4096] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
	 [[node training/Adam/Variable_64/Assign (defined at /opt/anaconda2/envs/tensorflow/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py:402)  = Assign[T=DT_FLOAT, _grappler_relax_allocator_constraints=true, use_locking=true, validate_shape=true, _device="/job:localhost/replica:0/task:0/device:GPU:0"](training/Adam/Variable_64, training/Adam/zeros_140)]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.

