In [1]:
import os
import sys
import random
sys.path.append('../..')

import keras
from keras.preprocessing.image import ImageDataGenerator
from keras import optimizers
from keras.models import Sequential
from keras.layers import Dense, Flatten, Dropout
from keras.utils import to_categorical
import numpy as np
import pandas as pd
import tensorflow as tf
from keras import backend as K

import cr_interface as cri
import keras_utils as ku

Using TensorFlow backend.


In [2]:
SEED = 37
def reset_random():
    os.environ['PYTHONHASHSEED'] = '0'
    np.random.seed(SEED)
    random.seed(SEED)
    session_conf = tf.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)
    tf.set_random_seed(SEED)
    sess = tf.Session(graph=tf.get_default_graph(), config=session_conf)
    K.set_session(sess)

In [3]:
ku.applications

{'mobilenet': <keras_utils.Application at 0x12ba006d8>,
 'mobilenetv2': <keras_utils.Application at 0x12c403cc0>,
 'inceptionresnetv2': <keras_utils.Application at 0x12c403c18>,
 'inceptionv3': <keras_utils.Application at 0x12c403c50>,
 'nasnet': <keras_utils.Application at 0x12c403d68>,
 'resnet50': <keras_utils.Application at 0x12c403da0>,
 'vgg16': <keras_utils.Application at 0x12c403dd8>,
 'vgg19': <keras_utils.Application at 0x12c403e10>,
 'xception': <keras_utils.Application at 0x12c403e48>}

In [44]:
application = ku.applications['vgg19']

In [5]:
splits = cri.DATA_DIRS.keys()

In [6]:
def get_generators():
    transform_parameters = {
        'zx': 0.6,
        'zy': 0.6,
    }
    zoom_gen = ImageDataGenerator()
    zoom = lambda x: zoom_gen.apply_transform(x, transform_parameters)

    generators = dict()
    for split in splits:
        if split == 'test':
            augment_kwargs = dict()
        else:
            augment_kwargs = dict(
                rotation_range=45,
                fill_mode='nearest'
            )

        generators[split] = ImageDataGenerator(
            **augment_kwargs,
            #preprocessing_function=zoom)
        )

    return generators

In [7]:
def get_iterators():
    generators = get_generators()

    iterators = dict()

    kwargs = dict(
        target_size=application.image_size,
        batch_size=32,
        class_mode='categorical',
        shuffle=True,
        seed=SEED)

    for split, gen in generators.items():
        iterators[split] = gen.flow_from_directory(
            directory=cri.DATA_DIRS[split],
            **kwargs)
        
    return iterators

# Controlling Randomness
- New sequential model from seed reproducable
- Save/load/attach top model (with same, reproducable results)
- Save/load/attach randomly trained top model (testing)

## Bottleneck

In [8]:
def get_labels(iterator, multiplier=1):
    # reset seed parameters
    # note that you need to use the same iterator to reproduce order
    iterator.total_batches_seen = 0
    iterator.batch_index = 0
    
    labels = None
    for i, batch in enumerate(iterator):
        if i == int(len(iterator) * multiplier):
            break
        if labels is None:
            labels = np.array(batch[1])
        else:
            labels = np.append(labels, np.array(batch[1]), axis=0)
            
    return labels

In [9]:
LOADED = False

In [45]:
# create bottlenecks & save
iterators = get_iterators()
bottlenecks = dict()
labels = dict()
application.free_model()

kwargs = dict(
    verbose=1,
    workers=8,
    use_multiprocessing=True)

for split, it in iterators.items():
    bottlenecks[split] = application.get_model().predict_generator(it, steps=len(it), **kwargs)
    labels[split] = get_labels(it)
    
LOADED = True

Found 251 images belonging to 3 classes.
Found 472 images belonging to 3 classes.
Found 1682 images belonging to 3 classes.
loading vgg19 model


In [None]:
# Save Bottlenecks
for split, data in bottlenecks.items():
    np.save(open('b_{}'.format(split), 'wb'), data)
    
for split, data in labels.items():
    np.save(open('l_{}'.format(split), 'wb'), data)

In [None]:
# load bottlenecks
bottlenecks = dict()
labels = dict()
for split in splits:
    bottlenecks[split] = np.load(open('b_{}'.format(split), 'rb'))
    labels[split] = np.load(open('l_{}'.format(split), 'rb'))

In [12]:
bottlenecks.keys()

dict_keys(['test', 'validation', 'train'])

In [13]:
labels.keys()

dict_keys(['test', 'validation', 'train'])

#### No problem with bottleneck save / load

In [54]:
def compile_model(model):
    sgd = optimizers.SGD(lr=1.0e-4, decay=1e-6, momentum=0.9, nesterov=True)
    model.compile(
        loss='categorical_crossentropy',
        optimizer=sgd,
        #optimizer='rmsprop',
        metrics=['accuracy'])

In [51]:
def load_model(compiled=True):
    model = Sequential()
    model.add(Flatten(input_shape=application.get_model().output_shape[1:]))
    model.add(Dense(1024,
                        activation='relu',
                        kernel_initializer=keras.initializers.glorot_uniform(seed=SEED)))
    model.add(Dropout(0.5,
                         seed=SEED))
    model.add(Dense(3, 
                        activation='softmax',
                        kernel_initializer=keras.initializers.glorot_uniform(seed=SEED)))

    if compiled:
        compile_model(model)
    
    return model

In [None]:
reset_random()
top_model = load_model()
bottle_predictions = top_model.predict(bottlenecks['test'], verbose=1)

In [None]:
len(bottlenecks['validation'])
len(labels['validation'])

In [52]:
bottlenecks['validation'][0].shape

(5, 5, 512)

In [55]:
#reset_random()
top_model = load_model()
top_model.fit(bottlenecks['train'], labels['train'],
              validation_data=(bottlenecks['validation'], labels['validation']),
              shuffle=True,
              batch_size=32,
              epochs=10)
#top_model.save_weights('temp_inc.hdf5')

Train on 1682 samples, validate on 472 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x167f834a8>

In [39]:

print(len(labels['test']))
count = 0
for l in labels['test']:
    if l[0] == 1:
        count += 1
print(count / len(labels['test']))

251
0.7330677290836654


In [56]:
top_model.evaluate(bottlenecks['test'], labels['test'])



[2.0578135408728246, 0.8167330667792089]

In [57]:
top_model.evaluate(bottlenecks['validation'], labels['validation'])



[2.2996570053747143, 0.783898306094994]

In [None]:
for element in top_model.get_weights():
    print(element)

In [None]:
reset_random()
top_model = load_model()
top_model.fit(bottlenecks['train'], labels['train'],
              validation_data=(bottlenecks['validation'], labels['validation']),
              shuffle=True,
              batch_size=32,
              epochs=1)
top_model.save_weights('temp.hdf5')

In [None]:
p1 = top_model.predict(bottlenecks['test'])

In [None]:
p1

In [None]:
top_model = load_model(compiled=False)
top_model.load_weights('temp.hdf5')
compile_model(top_model)

In [None]:
p2 = top_model.predict(bottlenecks['test'])

In [None]:
p2

In [None]:
#iterators = get_iterators() reuse
reset_random()
model = Sequential()
application.free_model()
pre_model = application.get_model()
for layer in pre_model.layers:
    layer.trainable = False
model.add(pre_model)
top_model = load_model(compiled=False)
top_model.load_weights('temp.hdf5')
model.add(top_model)
compile_model(model)

iterators = get_iterators()
p3 = model.predict_generator(iterators['test'], verbose=1)
p3

## Raw Image

In [None]:
#iterators = get_iterators() reuse
model = Sequential()
application.free_model()
pre_model = application.get_model()
for layer in pre_model.layers:
    layer.trainable = False
model.add(pre_model)
top_model = load_model()
model.add(top_model)

sgd = optimizers.SGD(lr=1.0e-4, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(
    loss='categorical_crossentropy',
    optimizer=sgd,
    metrics=['accuracy'])

image_predictions = model.predict_generator(iterators['train'], verbose=1)
    
if False:
    model.fit_generator(iterators['train'],
                               validation_data=iterators['validation'],
                               shuffle=True,
                               epochs=1)

In [None]:
image_predictions

In [None]:
#iterators = get_iterators() reuse
model = Sequential()
pre_model = application.get_model()
for layer in pre_model.layers:
    layer.trainable = False
model.add(pre_model)
top_model = load_model()
model.add(top_model)
model.summary()

sgd = optimizers.SGD(lr=1.0e-4, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(
    loss='categorical_crossentropy',
    optimizer=sgd,
    metrics=['accuracy'])

image_predictions = model.predict_generator(iterators['train'], verbose=1)
    
if False:
    model.fit_generator(iterators['train'],
                               validation_data=iterators['validation'],
                               shuffle=True,
                               epochs=1)

In [None]:
model = Sequential()
pre_model = application.get_model()
for layer in pre_model.layers:
    layer.trainable = False
model.add(pre_model)
sgd = optimizers.SGD(lr=1.0e-4, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(
    loss='categorical_crossentropy',
    optimizer=sgd,
    metrics=['accuracy'])

image_predictions = model.predict_generator(iterators['test'], verbose=1)
image_predictions

In [None]:
model = Sequential()
pre_model = application.get_model()
for layer in pre_model.layers:
    layer.trainable = False
model.add(pre_model)
sgd = optimizers.SGD(lr=1.0e-4, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(
    loss='categorical_crossentropy',
    optimizer=sgd,
    metrics=['accuracy'])

image_predictions0 = model.predict_generator(iterators['test'], verbose=1)
image_predictions0

In [None]:
model = Sequential()
pre_model = application.get_model()
model.add(pre_model)
sgd = optimizers.SGD(lr=1.0e-4, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(
    loss='categorical_crossentropy',
    optimizer=sgd,
    metrics=['accuracy'])

image_predictions1 = model.predict_generator(iterators['test'], verbose=1)
image_predictions1

In [None]:
image_predictions

In [None]:
#iterators = get_iterators() reuse
for it in iterators:
    it.batch_index = 0
    it.total_batches_seen = 0
model = Sequential()
pre_model = application.get_model()
for layer in pre_model.layers:
    layer.trainable = False
model.add(pre_model)
top_model = load_model(compiled=False)
model.add(top_model)
model.summary()
for layer in model.layers:
    print(layer)
    print(layer.trainable)

if False:
    sgd = optimizers.SGD(lr=1.0e-4, decay=1e-6, momentum=0.9, nesterov=True)
    model.compile(
        loss='categorical_crossentropy',
        optimizer=sgd,
        metrics=['accuracy'])

    model.fit_generator(iterators['train'],
                               validation_data=iterators['validation'],
                               shuffle=True,
                               epochs=1)

In [None]:
top_model.evaluate(bottlenecks['test'], labels['test'])