In [None]:
#### keras supports multi GPU for tensorflow 1.4 but the only missing function which cause conflicts is 
#### tf.Session.sess.list_devices()
#### Making a dummy here
from tensorflow.python.client import device_lib
from keras import backend as K

def get_local_devices():
    local_devices = device_lib.list_local_devices()
    for device in local_devices:
        device.name = device.name.replace('/', 'device:')
    return local_devices

sess = K.get_session()
sess.list_devices = get_local_devices

In [None]:
%matplotlib inline
import tensorflow as tf
from matplotlib import pyplot as plt
import threading
import sklearn
import numpy as np
import os

from keras.utils import multi_gpu_model
from keras.models import Model
from keras.applications.vgg16 import VGG16
from keras.layers import Dense, Flatten, Conv2D, MaxPooling2D, Input, Maximum
from keras.optimizers import SGD

import exploratory_model.tsahelper as tsa

In [None]:
# PARAMETERS
DATASET_PATH = "../../dataset/preprocessed-a3daps/"

In [None]:
class threadsafe_iter:
    """Takes an iterator/generator and makes it thread-safe by
    serializing call to the `next` method of given iterator/generator.
    """
    def __init__(self, it):
        self.it = it
        self.lock = threading.Lock()

    def __iter__(self):
        return self

    def __next__(self):
        with self.lock:
            return self.it.__next__()


def threadsafe_generator(f):
    """A decorator that takes a generator function and makes it thread-safe.
    """
    print('decorating')
    def g(*a, **kw):
        print('thread safing')
        return threadsafe_iter(f(*a, **kw))
    return g

In [None]:
sublect_list = os.listdir(DATASET_PATH)
np.random.shuffle(sublect_list)

In [None]:
FREEZE_LAYERS = 18
INPUT_SHAPE = (660, 660, 3)

with tf.device('/cpu:0'):
    base_model = VGG16(include_top=False, input_shape=INPUT_SHAPE, pooling='None', weights='imagenet')
    layer_1 = base_model.layers[1]
    block1_conv1_weight = layer_1.get_weights()

    x = None
    inputs = []
    new_layer_1 = []
    for i in range(4):
        x = Input(shape=INPUT_SHAPE, name=('input_' + str(i+1)))
        inputs.append(x)
        x = type(layer_1)(
                filters=layer_1.filters,
                strides=layer_1.strides,
                kernel_size=layer_1.kernel_size,
                activation=layer_1.activation,
                weights=block1_conv1_weight
            )(x)
        new_layer_1.append(x)

    x = Maximum()(new_layer_1)

    for layer in base_model.layers[2:]:
        if type(layer) == MaxPooling2D:
            x = type(layer)(
                    pool_size=layer.pool_size,
                    strides=layer.strides,
                    padding=layer.padding,
                    data_format=layer.data_format
                )(x)
        elif type(layer) == Conv2D:
            x = type(layer)(
                    filters=layer.filters,
                    strides=layer.strides,
                    kernel_size=layer.kernel_size,
                    activation=layer.activation,
                    weights=layer.get_weights()
                )(x)

    x = Flatten()(x)

    # # let's add a fully-connected layer
    x = Dense(4096, activation='relu', kernel_initializer='truncated_normal', bias_initializer='random_uniform')(x)
    x = Dense(4096, activation='relu', kernel_initializer='truncated_normal', bias_initializer='random_uniform')(x)

    # # and a logistic layer, activation=sigmoid since we have multi label classification
    predictions = Dense(17, activation='sigmoid', kernel_initializer='truncated_normal',
                        bias_initializer='random_uniform', name='output')(x)

    # # this is the model we will train
    model = Model(inputs=inputs, outputs=predictions)

    # # Freeze first FREEZE_LAYERS in the model
    for layer in model.layers[:FREEZE_LAYERS]:
        layer.trainable = False

In [None]:
# # Learning rate is changed to 0.001 = 1/10 of actual vgg16 since we are fine tuning
model = multi_gpu_model(model, gpus=4)
# sgd = SGD(lr=1e-3, decay=1e-6, momentum=0.9, nesterov=True)
# # loss=binary_crossentropy for sigmoid unitsfrom keras import backend as K
model.compile(optimizer='sgd', loss='binary_crossentropy')

In [None]:
def augment(image):
    X = []
    image[image<0]=0
    image = np.pad(image, ((0,0), (74,74)), 'edge')
    image = image.reshape((660,660,1))
    image = np.repeat(image, 3, axis=2)
    X.append(image)
    image = np.rot90(image)
    X.append(image)
    image = np.rot90(image)
    X.append(image)
    image = np.rot90(image)
    X.append(image)
    return X

In [None]:
TRAIN_TEST_SPLIT = 0.8
sublect_list = os.listdir(DATASET_PATH)
file_list = sublect_list
number_of_train = int(len(file_list)*TRAIN_TEST_SPLIT)
TRAIN_SET_FILE_LIST = file_list[:number_of_train]
TEST_SET_FILE_LIST = file_list[number_of_train:]

def get_io_tensors(in_size):
    return [[] for i in range(in_size)], []

@threadsafe_generator
def data_generator(subject_file_list, in_size, batch_size):
    print("Generator Initiated")
    in_tensor, out_tensor = get_io_tensors(in_size)
    counter = 0
    batch_number = 0
    for subject_image in subject_file_list:            
        npset = np.load(os.path.join(DATASET_PATH, subject_image))
        images = np.array(npset[0], dtype=np.float32)
        labels = np.array([i[1] for i in npset[1]], dtype=np.float32)
#         images = np.zeros((64, 660, 512), dtype=np.float32)
#         labels = np.zeros(17, dtype=np.float32)
        
        for i in range(16):
            # Containes four angle each with 4 augmentations
            input_augments = []
            for j in range(4):
                input_augments.append(augment(images[i + j*16]))

            for j in range(4):
                for in_i in range(in_size):
                    in_tensor[in_i].append(input_augments[in_i][j])
                out_tensor.append(labels)
                
                counter += 1
                if counter >= batch_size:
                    for in_i in range(in_size):
                        in_tensor[in_i] = np.array(in_tensor[in_i])
                    out_tensor = np.array(out_tensor)
#                     print("Yielding batch ", batch_number, "Size", len(in_tensor))
                    yield(in_tensor, out_tensor)
                    counter = counter%batch_size
                    batch_number += 1
                    in_tensor, out_tensor = get_io_tensors(in_size)

In [None]:
# Unit Test

# t = data_generator(TRAIN_SET_FILE_LIST, 4, 64)

# fig, ax = plt.subplots(4,4,figsize=(16,16))
# row = 0
# col = 0

# for in_tensors, out_tensors in t:
#     print('out', out_tensors.shape)
#     print(in_tensors[0].shape)

#     col = 0
#     for tensor in in_tensors:
#         ax[row, col].imshow(tensor[row])
#         col += 1
#     row += 1
#     if row == 4:
#         break

In [None]:
model.summary()

In [None]:
total_images = 917 * 16 * 2 * 2
batch_size = 4
step_per_epoch = 16
epochs = 917
in_size = len(inputs)

model.fit_generator(
    data_generator(TRAIN_SET_FILE_LIST, in_size, batch_size), step_per_epoch, epochs, verbose=2,
    use_multiprocessing=True, shuffle=True, initial_epoch=0, max_queue_size=4)

In [None]:
# evaluate_generator(self, generator, steps, max_queue_size=10, workers=1, use_multiprocessing=False)
# model.fit(X_train, y_train, epochs=5, batch_size=2000)

# preds = model.predict(X_test)
# preds[preds>=0.5] = 1
# preds[preds<0.5] = 0
# score = compare preds and y_test