# Bottlenecking

## 1. Setup and configuration

In [1]:
# Necessary imports
import numpy  as np
import pandas as pd
from keras.models import Sequential
from keras.layers import Convolution2D, MaxPooling2D, ZeroPadding2D
from keras.layers import Activation, Dropout, Flatten, Dense
from keras.layers.normalization import BatchNormalization
from keras.utils.data_utils import get_file
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img
from scipy.misc import imresize
from keras import optimizers
import matplotlib.pyplot as plt

Using TensorFlow backend.


In [2]:
%matplotlib inline

In [3]:
# Seed for reproducibility
np.random.seed(0)

In [4]:
# Local data paths
TRAIN_PATH = "../../../../data/wiki_folds/gender/train_1.csv"
VALID_PATH = "../../../../data/wiki_folds/gender/valid_1.csv"
IMG_PATH  = "../../../../data/wiki/%s"

In [5]:
# Image dimensions and num epochs
IMG_WIDTH  = 224
IMG_HEIGHT = 224
IMG_DIMS = (IMG_WIDTH,IMG_HEIGHT)

# VGG16-Face Pre-trained weights download links
TF_WEIGHTS_PATH = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg16_weights_tf_dim_ordering_tf_kernels.h5'
VGG16_LOCAL_W   = "'vgg16_weights_tf_dim_ordering_tf_kernels.h5'"

In [6]:
# Experiment config
NUM_EPOCHS     = 1000
BATCH_SIZE     = 32
TARGET_FEATURE = "age"

## 2. Loading the data

In [7]:
# Read training and validation sets of this fold
train = pd.read_csv(TRAIN_PATH)
valid = pd.read_csv(VALID_PATH)

# Drop unecessary columns
train = train[["full_path","gender","age"]]
valid = valid[["full_path","gender","age"]]

In [8]:
# Setting data path to be relative to the current directory
true_path = lambda x: IMG_PATH % x
train["full_path"] = train["full_path"].apply(lambda x: true_path(x))
valid["full_path"] = valid["full_path"].apply(lambda x: true_path(x))

In [9]:
# Preview training set
train.head()

Unnamed: 0,full_path,gender,age
0,../../../../data/wiki/79/3781079_1888-02-15_19...,1.0,93
1,../../../../data/wiki/72/8801772_1950-11-09_20...,1.0,62
2,../../../../data/wiki/10/33256210_1979-04-06_2...,1.0,33
3,../../../../data/wiki/07/14853007_1984-05-07_2...,0.0,23
4,../../../../data/wiki/27/11914827_1986-05-21_2...,1.0,28


In [10]:
# Preview validation set
valid.head()

Unnamed: 0,full_path,gender,age
0,../../../../data/wiki/17/8740317_1961-08-29_20...,1.0,48
1,../../../../data/wiki/40/31819740_1984-10-18_2...,1.0,30
2,../../../../data/wiki/81/32702381_1929-02-17_1...,1.0,27
3,../../../../data/wiki/69/43906569_1930-04-24_1...,1.0,22
4,../../../../data/wiki/69/16116069_1942-04-20_2...,0.0,69


## 3. Declaring the network

In [11]:
# VGG16 network
model = Sequential()
model.add(ZeroPadding2D((1, 1), input_shape=(IMG_WIDTH, IMG_HEIGHT,3)))

model.add(Convolution2D(64, 3, 3, activation='relu', name='conv1_1'))
model.add(ZeroPadding2D((1, 1)))
model.add(Convolution2D(64, 3, 3, activation='relu', name='conv1_2'))
model.add(MaxPooling2D((2, 2), strides=(2, 2), dim_ordering="tf"))

model.add(ZeroPadding2D((1, 1)))
model.add(Convolution2D(128, 3, 3, activation='relu', name='conv2_1'))
model.add(ZeroPadding2D((1, 1)))
model.add(Convolution2D(128, 3, 3, activation='relu', name='conv2_2'))
model.add(MaxPooling2D((2, 2), strides=(2, 2), dim_ordering="tf"))

model.add(ZeroPadding2D((1, 1)))
model.add(Convolution2D(256, 3, 3, activation='relu', name='conv3_1'))
model.add(ZeroPadding2D((1, 1)))
model.add(Convolution2D(256, 3, 3, activation='relu', name='conv3_2'))
model.add(ZeroPadding2D((1, 1)))
model.add(Convolution2D(256, 3, 3, activation='relu', name='conv3_3'))
model.add(MaxPooling2D((2, 2), strides=(2, 2), dim_ordering="tf"))

model.add(ZeroPadding2D((1, 1)))
model.add(Convolution2D(512, 3, 3, activation='relu', name='conv4_1'))
model.add(ZeroPadding2D((1, 1)))
model.add(Convolution2D(512, 3, 3, activation='relu', name='conv4_2'))
model.add(ZeroPadding2D((1, 1)))
model.add(Convolution2D(512, 3, 3, activation='relu', name='conv4_3'))
model.add(MaxPooling2D((2, 2), strides=(2, 2), dim_ordering="tf"))

model.add(ZeroPadding2D((1, 1)))
model.add(Convolution2D(512, 3, 3, activation='relu', name='conv5_1'))
model.add(ZeroPadding2D((1, 1)))
model.add(Convolution2D(512, 3, 3, activation='relu', name='conv5_2'))
model.add(ZeroPadding2D((1, 1)))
model.add(Convolution2D(512, 3, 3, activation='relu', name='conv5_3'))
model.add(MaxPooling2D((2, 2), strides=(2, 2), dim_ordering="tf"))

In [12]:
model.add(Flatten())
model.add(Dense(4096, activation='relu', name="fc_1"))
model.add(Dense(4096, activation='relu', name="fc_w"))
model.add(Dense(1000, activation='softmax',  name="predictions"))

In [13]:
# Obtain VGG16-Face weights
weights_path = get_file(VGG16_LOCAL_W,TF_WEIGHTS_PATH, cache_subdir='models')

In [14]:
# Loading VGG16-Face weights to VGG16 network
model.load_weights(weights_path)

ResourceExhaustedError: OOM when allocating tensor with shape[25088,4096]
	 [[Node: random_uniform_13/RandomUniform = RandomUniform[T=DT_INT32, dtype=DT_FLOAT, seed=87654321, seed2=204159575, _device="/job:localhost/replica:0/task:0/gpu:0"](random_uniform_13/shape)]]

Caused by op u'random_uniform_13/RandomUniform', defined at:
  File "/usr/lib/python2.7/runpy.py", line 162, in _run_module_as_main
    "__main__", fname, loader, pkg_name)
  File "/usr/lib/python2.7/runpy.py", line 72, in _run_code
    exec code in run_globals
  File "/usr/local/lib/python2.7/dist-packages/ipykernel/__main__.py", line 3, in <module>
    app.launch_new_instance()
  File "/usr/local/lib/python2.7/dist-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/usr/local/lib/python2.7/dist-packages/ipykernel/kernelapp.py", line 474, in start
    ioloop.IOLoop.instance().start()
  File "/usr/local/lib/python2.7/dist-packages/zmq/eventloop/ioloop.py", line 177, in start
    super(ZMQIOLoop, self).start()
  File "/usr/local/lib/python2.7/dist-packages/tornado/ioloop.py", line 887, in start
    handler_func(fd_obj, events)
  File "/usr/local/lib/python2.7/dist-packages/tornado/stack_context.py", line 275, in null_wrapper
    return fn(*args, **kwargs)
  File "/usr/local/lib/python2.7/dist-packages/zmq/eventloop/zmqstream.py", line 440, in _handle_events
    self._handle_recv()
  File "/usr/local/lib/python2.7/dist-packages/zmq/eventloop/zmqstream.py", line 472, in _handle_recv
    self._run_callback(callback, msg)
  File "/usr/local/lib/python2.7/dist-packages/zmq/eventloop/zmqstream.py", line 414, in _run_callback
    callback(*args, **kwargs)
  File "/usr/local/lib/python2.7/dist-packages/tornado/stack_context.py", line 275, in null_wrapper
    return fn(*args, **kwargs)
  File "/usr/local/lib/python2.7/dist-packages/ipykernel/kernelbase.py", line 276, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "/usr/local/lib/python2.7/dist-packages/ipykernel/kernelbase.py", line 228, in dispatch_shell
    handler(stream, idents, msg)
  File "/usr/local/lib/python2.7/dist-packages/ipykernel/kernelbase.py", line 390, in execute_request
    user_expressions, allow_stdin)
  File "/usr/local/lib/python2.7/dist-packages/ipykernel/ipkernel.py", line 196, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/usr/local/lib/python2.7/dist-packages/ipykernel/zmqshell.py", line 501, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/usr/local/lib/python2.7/dist-packages/IPython/core/interactiveshell.py", line 2717, in run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "/usr/local/lib/python2.7/dist-packages/IPython/core/interactiveshell.py", line 2821, in run_ast_nodes
    if self.run_code(code, result):
  File "/usr/local/lib/python2.7/dist-packages/IPython/core/interactiveshell.py", line 2881, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-12-29a1bbc65e50>", line 2, in <module>
    model.add(Dense(4096, activation='relu', name="fc_1"))
  File "/usr/local/lib/python2.7/dist-packages/Keras-1.2.0-py2.7.egg/keras/models.py", line 327, in add
    output_tensor = layer(self.outputs[0])
  File "/usr/local/lib/python2.7/dist-packages/Keras-1.2.0-py2.7.egg/keras/engine/topology.py", line 543, in __call__
    self.build(input_shapes[0])
  File "/usr/local/lib/python2.7/dist-packages/Keras-1.2.0-py2.7.egg/keras/layers/core.py", line 752, in build
    constraint=self.W_constraint)
  File "/usr/local/lib/python2.7/dist-packages/Keras-1.2.0-py2.7.egg/keras/engine/topology.py", line 415, in add_weight
    weight = initializer(shape, name=name)
  File "/usr/local/lib/python2.7/dist-packages/Keras-1.2.0-py2.7.egg/keras/initializations.py", line 60, in glorot_uniform
    return uniform(shape, s, name=name)
  File "/usr/local/lib/python2.7/dist-packages/Keras-1.2.0-py2.7.egg/keras/initializations.py", line 33, in uniform
    return K.random_uniform_variable(shape, -scale, scale, name=name)
  File "/usr/local/lib/python2.7/dist-packages/Keras-1.2.0-py2.7.egg/keras/backend/tensorflow_backend.py", line 619, in random_uniform_variable
    low, high, dtype=tf_dtype, seed=seed)(shape)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/init_ops.py", line 171, in _initializer
    return random_ops.random_uniform(shape, minval, maxval, dtype, seed=seed)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/random_ops.py", line 245, in random_uniform
    seed2=seed2)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/gen_random_ops.py", line 220, in _random_uniform
    seed=seed, seed2=seed2, name=name)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/op_def_library.py", line 759, in apply_op
    op_def=op_def)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 2240, in create_op
    original_op=self._default_original_op, op_def=op_def)
  File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 1128, in __init__
    self._traceback = _extract_stack()

ResourceExhaustedError (see above for traceback): OOM when allocating tensor with shape[25088,4096]
	 [[Node: random_uniform_13/RandomUniform = RandomUniform[T=DT_INT32, dtype=DT_FLOAT, seed=87654321, seed2=204159575, _device="/job:localhost/replica:0/task:0/gpu:0"](random_uniform_13/shape)]]


In [None]:
# Compile
model.compile(loss='mean_squared_error', optimizer=optimizers.SGD(lr=0.0091, decay=1e-6, momentum=0.5, nesterov=True), metrics=['mse'])

## 4. Setting up the training pipeline

In [None]:
# Handy utility to read the image raw data from its ath
def get_image(x):
    try:
        val = img_to_array(imresize(load_img(x),IMG_DIMS))
    except:
        val = "ERR"
    return val

In [None]:
# Data generator
def generate_data(df, batch_size, target_feature):
    start, end = 0, batch_size
    while True:
        data   = df.sample(frac=1).reset_index(drop=True)
        sample = data[start:end]
        
        X         = pd.DataFrame(sample["full_path"].apply(lambda x:get_image(x)))
        good_rows = X["full_path"] != "ERR"
        X         = X[good_rows]
        X.reset_index(inplace=True)
        X = X["full_path"].apply(lambda x: x.reshape((1,)+ x.shape))
        X = np.vstack(X)
        #X = model.predict(X, X.shape[0])
        
        Y = sample[good_rows]
        Y = Y[target_feature].as_matrix()
        yield (X,Y)

In [None]:
train_gen = generate_data(train, BATCH_SIZE, TARGET_FEATURE)
valid_gen = generate_data(valid, BATCH_SIZE, TARGET_FEATURE)

In [None]:
for layer in model.layers[:25]:
    layer.trainable = False

In [None]:
# Train
history = model.fit_generator(train_gen, 
                              validation_data=valid_gen,
                              nb_val_samples=BATCH_SIZE,
                              samples_per_epoch=BATCH_SIZE, 
                              nb_epoch=NUM_EPOCHS)

In [None]:
 # Plotting training accuracy and testing accuracy acros epochs
plt.figure(figsize=(20,10))
plt.plot(history.history['mean_squared_error'])
plt.plot(history.history['val_mean_squared_error'])
plt.title('Training and Validation MSE')
plt.ylabel('MSE')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

In [None]:
# serialize model to JSON
model_json = model.to_json()
with open("model_ba.json", "w") as json_file:
    json_file.write(model_json)
# serialize weights to HDF5
model.save_weights("model_ba.h5")
print("Saved model to disk")

In [None]:
#Binary data
np.save("yaya.npy", history.history["mean_squared_error"])
np.savetxt("yaya.txt", history.history["val_mean_squared_error"])
