In [None]:
from keras.layers import Layer
import keras.backend as K
%matplotlib inline
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.optimizers import SGD
from keras.layers import Conv2D, MaxPooling2D, Activation, Dropout, Flatten, Dense, Input, Dense, concatenate
import numpy as np
from keras.models import Model

from keras.optimizers import Optimizer
import pandas as pd

# OPTIMIZER

In [None]:
from keras.optimizers import Optimizer

class AdamIFT6135(Optimizer):
    def __init__(self, lr=0.001, beta_1=0.9, beta_2=0.999,
                 decay=0., use_momentum=True, use_rms=True, **kwargs):
        """
            use_momentum = True ... SGD with momentum
            use_rms = True ... RMSProp
            both = True ... Adam
        """
        #The parent class optimizer passes the following: self.updates, self.weights which
        #are both lists. Also, you inherit 'clipnorm' and 'clipvalue' as allowed arguments
        #The methods inherited are: get_updates which must be overriden and this is where the
        #optimization logic must be implemented, get_gradients, set_weights and get_weights
        super(AdamIFT6135, self).__init__(**kwargs)

        #Equivalent to with tf.name_scope('block1'). I took it from the
        #way that the in-package optimizers are developed, but I am not
        #sure about the pros and cons of using the name_scope vs just ignoring it.
        #But I assume it's a best practice.
        #https://stackoverflow.com/questions/42708989/why-do-we-use-tf-name-scope
        with K.name_scope(self.__class__.__name__):
            # A counter for iterations, useful for moving averages\momentum computation
            self.iterations = K.variable(0, dtype='int64', name='iterations')
            self.lr = K.variable(lr, name='lr')
            self.beta_1 = K.variable(beta_1, name='beta_1')
            self.beta_2 = K.variable(beta_2, name='beta_2')
            self.decay = K.variable(decay, name='decay')
            self.initial_decay = decay
            self.epsilon = K.epsilon()

            if not (use_momentum or use_rms):
                print("You must choose at least momentum or rms")
                return None
            self.use_momentum = use_momentum
            self.use_rms = use_rms
        
    def get_updates(self, loss, params):
        """
        This function is called once and only once. So whatever local variables are going
        to live as long as the optimizer object is living
        https://github.com/keras-team/keras/issues/4746
        https://stackoverflow.com/questions/41787873/how-adagrad-wroks-in-keras-what-does-self-weights-mean-in-keras-optimizer
        - Store your weights as GPU variables and update them with functions.
        - for all optimizers in Keras get_updates() implements the tensor logic for one step of updates
        """
        #Compute the gradients according to the loss and weights
        grads = self.get_gradients(loss, params)
        
        #It is not clear the role of self.updates in Keras, but apparently they exist for writing
        #even custom layers. I will just honor their way of doing things and will update the 
        #newly computed parameters in an append to this list.
        #self.updates (list of update tuples (tensor, new_tensor)).
        #Increment the self.iterations
        self.updates = [K.update_add(self.iterations, 1)]
        #t for the bias correction, I took this implementation from the library, casting to float
        t = K.cast(self.iterations, K.floatx()) + 1
        
        #In the beginning we don't know the shape of the network, so the moving average
        #accumulators cannot be initialized in the constructor. So we initialize them here.
        #As explained, we create the accumulators initialized with zeros
        #https://www.coursera.org/learn/deep-neural-network/lecture/w9VCZ/adam-optimization-algorithm
        #So we create a list for each layer, each item is a zero matrix with the shape of the layer
        if self.use_momentum and not self.use_rms:
            v_dws = [K.zeros(K.int_shape(p)) for p in params]
            self.weights = v_dws
        elif self.use_rms and not self.use_momentum:
            s_dws = [K.zeros(K.int_shape(p)) for p in params]
            self.weights = s_dws
        else:
            v_dws = [K.zeros(K.int_shape(p)) for p in params]
            s_dws = [K.zeros(K.int_shape(p)) for p in params]
            self.weights = [self.iterations] + v_dws + s_dws
        
        #Update each layer
        for param, grad, v_dw, s_dw in zip(params, grads, v_dws, s_dws):
            if self.use_momentum:
                v_dw_original = (self.beta_1 * v_dw) + ((1 - self.beta_1) * grad)
                #bias correction
                v_dw_corrected = v_dw_original / (1. - K.pow(self.beta_1, t))
                
            if self.use_rms:
                s_dw_original = (self.beta_2 * s_dw) + ((1 - self.beta_2) * K.square(grad))
                #bias correction
                s_dw_corrected = s_dw_original / (1. - K.pow(self.beta_2, t))
                
            #Update
            if self.use_momentum and not self.use_rms:
                #SGD with momentum update
                update = param - (self.lr * v_dw_corrected)
                #We update the original ones, not the bias corrected values
                self.updates.append(K.update(v_dw, v_dw_original))
            elif self.use_rms and not self.use_momentum:
                #RMSProp Update
                update = param - (self.lr * grad) / (K.sqrt(s_dw_corrected) + self.epsilon)
                self.updates.append(K.update(s_dw, s_dw_original))
            else:
                #Adam Update
                #Also unclear from the documentation why not write directly to the variable
                update = param - self.lr * v_dw_corrected / (K.sqrt(s_dw_corrected) + self.epsilon)
                
            
            
            self.updates.append(K.update(param, update))
            
        return self.updates
    
    def get_config(self):
        config = {'lr': float(K.get_value(self.lr)),
                  'beta_1': float(K.get_value(self.beta_1)),
                  'beta_2': float(K.get_value(self.beta_2))}
        base_config = super(AdamIFT6135, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))

In [None]:
def simple_inception_layer(x, n_filters):
    """
    Input is passed in parallel through the following:
        1x1 conv
        1x1 conv --> 3x3 conv
        1x1 conv --> 5x5 conv
        maxpool_3x3 --> 3x3 conv
        
    n_filters: all filters will have the same number of channels, for simplicity
    """
    
    conv_1x1 = Conv2D(n_filters, (1, 1), padding='same', activation='relu')(x)
    
    conv_1x1_3x3 = Conv2D(n_filters, (1, 1), padding='same', activation='relu')(x)
    conv_1x1_3x3 = Conv2D(n_filters, (3, 3), padding='same', activation='relu')(conv_1x1_3x3)
    
    conv_1x1_5x5 = Conv2D(n_filters, (1, 1), padding='same', activation='relu')(x)
    conv_1x1_5x5 = Conv2D(n_filters, (5, 5), padding='same', activation='relu')(conv_1x1_5x5)
    
    maxpl_3x3_conv_3x3 = MaxPooling2D(pool_size=(3, 3),strides=(1, 1), padding='same')(x)
    maxpl_3x3_conv_3x3 = Conv2D(n_filters, (3, 3), padding='same', activation='relu')(maxpl_3x3_conv_3x3)
    #print(conv_1x1, conv_1x1_3x3, conv_1x1_5x5, maxpl_3x3_conv_3x3)
    inception = concatenate([conv_1x1, conv_1x1_3x3, conv_1x1_5x5, maxpl_3x3_conv_3x3], axis=3)
    
    return inception

# The following layers were not used in the final model, but were only included for reference

In [None]:
class SimpleDropout(Layer):
    """
    This layer was not used, only included for reference
    """
    def __init__(self, rate, **kwargs):
        super(SimpleDropout, self).__init__(**kwargs)
        self.rate = min(1., max(0., rate))
        self.supports_masking = True

    def call(self, inputs, training=None):
        if 0. < self.rate < 1.:
            def dropped_inputs():
                mask = K.cast(K.random_uniform( K.shape(inputs) ) >= self.rate, 'float32')
                return inputs * mask
            
            #in_train_phase returns the first argument in training, the second otherwise
            return K.in_train_phase(dropped_inputs, 
                                    inputs,
                                    training=training)
        return inputs

    def get_config(self):
        config = {'rate': self.rate}
        base_config = super(SimpleDropout, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))

In [None]:
#https://towardsdatascience.com/understanding-residual-networks-9add4b664b03
def simple_res_double_ception(x, n_filters):
    res = x
    incept = simple_inception_layer(x, n_filters)
    incept = simple_inception_layer(incept, n_filters)
    return add([res,incept])

In [None]:
from keras.layers import Input, Add, Dense, Activation

def res_block(x, n_filters):
    identity_shortcut = Conv2D(n_filters, (1, 1), border_mode='same', activation=None)(x)
    main_flow = Conv2D(n_filters, (3, 3), border_mode='same', activation='relu')(x)
    main_flow = Conv2D(n_filters, (3, 3), border_mode='same', activation=None)(x)
    addition = Add()([main_flow, identity_shortcut])
    activated = Activation('relu')(addition)
    #print(K.int_shape(activated))
    return activated

# DATA LOADER

In [None]:
img_height, img_width, n_channels = (64, 64, 3)

input_shape = (img_height, img_width, n_channels)

train_data_dir = './trainset/'

batch_size=256

In [None]:
#https://stackoverflow.com/questions/42443936/keras-split-train-test-set-when-using-imagedatagenerator
train_datagen = ImageDataGenerator(rescale=1./255,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    validation_split=0.2) # set validation split

train_generator = train_datagen.flow_from_directory(
    train_data_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='binary',
    subset='training') # set as training data

validation_generator = train_datagen.flow_from_directory(
    train_data_dir, # same directory as training data
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='binary',
    subset='validation') # set as validation data

In [None]:
#8x%
input_img = Input(shape=input_shape)

initial_path = Conv2D(32, (3, 3), border_mode='same', activation='relu')(input_img)
initial_path = Conv2D(32, (3, 3), border_mode='same', activation='relu')(initial_path)
initial_path = MaxPooling2D(pool_size=(2, 2))(initial_path)

initial_path = Conv2D(64, (3, 3), border_mode='same', activation='relu')(initial_path)
initial_path = Conv2D(64, (3, 3), border_mode='same', activation='relu')(initial_path)
initial_path = MaxPooling2D(pool_size=(2, 2))(initial_path) #16x16x64

initial_path  = simple_inception_layer(initial_path, 64)

initial_path = Conv2D(128, (3, 3), border_mode='same', activation='relu')(initial_path)
initial_path = Conv2D(128, (3, 3), border_mode='same', activation='relu')(initial_path)
initial_path = MaxPooling2D(pool_size=(2, 2))(initial_path)

initial_path  = simple_inception_layer(initial_path, 64)

initial_path = Conv2D(256, (3, 3), border_mode='same', activation='relu')(initial_path)
initial_path = Conv2D(256, (3, 3), border_mode='same', activation='relu')(initial_path)
initial_path = MaxPooling2D(pool_size=(2, 2))(initial_path)

initial_path  = simple_inception_layer(initial_path, 64)
#initial_path  = simple_inception_layer(initial_path, 16)

x = Flatten()(initial_path)

x = Dense(256, activation='relu')(x)
x = Dense(256, activation='relu')(x)
out = Dense(1, activation='sigmoid')(x)

inception_model = Model(input_img, out)

In [None]:
inception_model.summary()

In [None]:
optim = AdamIFT6135( lr=0.00001, beta_1=0.9, beta_2=0.999, decay=0., use_momentum=False, use_rms=True)

inception_model.compile(loss='binary_crossentropy',
            optimizer=optim,
            metrics=['accuracy'])

In [None]:
# Training
hist_inception=inception_model.fit_generator(train_generator,
                           validation_data = validation_generator,
                           steps_per_epoch = np.ceil(train_generator.samples / batch_size),#train_generator.samples, 
                            validation_steps = np.ceil(validation_generator.samples / batch_size),#validation_generator.samples ,
                           epochs=80,
                           verbose=1)

In [None]:
#Change these for contiuing the training with different settings
#optim = AdamIFT6135( lr=0.00001, beta_1=0.9, beta_2=0.9, decay=0., use_momentum=False, use_rms=True)
#K.set_value(inception_model.optimizer.lr, 0.00001)
#k.set_value(inception_model.optimizer.use_momentum, False)
#k.set_value(inception_model.optimizer.beta_2, 0.9)

hist_inception2=inception_model.fit_generator(train_generator,
                           validation_data = validation_generator,
                           steps_per_epoch = np.ceil(train_generator.samples / batch_size),#train_generator.samples, 
                            validation_steps = np.ceil(validation_generator.samples / batch_size),#validation_generator.samples ,
                           epochs=8,
                           verbose=1)

# Concatenate the history of different runs

In [None]:
hist = [hist_inception.history, hist_inception2.history, hist_inception3.history]

In [None]:
total_h = concat_histories(hist)

In [None]:
def plot_history(h):
    items = list(h.keys())
    x_range = range(1, len(h[items[0]])+1 )
    fig = plt.figure(figsize=(10,5))
    ax = plt.subplot(111)

    for i in items:
        ax.plot(x_range, h[i], label=i)
        
    box = ax.get_position()
    ax.set_position([box.x0, box.y0, box.width * 0.8, box.height])

    # Put a legend to the right of the current axis
    ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
    plt.show()

# Prediction

In [None]:
test_path = './testset/'

In [None]:
test_datagen = ImageDataGenerator(rescale=1. / 255)

In [None]:
test_generator = test_datagen.flow_from_directory(
    directory=test_path,
    target_size=(64, 64),
    color_mode="rgb",
    batch_size=1,
    class_mode=None,
    shuffle=False
)

In [None]:
preds = inception_model.predict_generator(test_generator, 4999)

In [None]:
preds_file = [{"id":fname[5:-4], "label":"Dog" if pred >= 0.5 else "Cat", "prob":float(pred)} for fname, pred in zip(test_generator.filenames, preds)]

In [None]:
preds_df = pd.DataFrame(preds_file)

In [None]:
preds_df.id = preds_df.id.astype(int).sort_values()

In [None]:
preds_df.sort_values('id', inplace=True)
preds_df.reset_index(inplace=True, drop=True)

In [None]:
preds_df[['id','label']].to_csv('submission.csv', index=False)

# Visualizing Results

We used the code for visualizing the filters from https://fairyonice.github.io/Visualization%20of%20Filters%20with%20Keras.html

## Getting images with different confidence

In [None]:
def show_imgs(list_imgs, n_rows=2, n_cols=5):
    """
    Note that the function is not checking for the indices, so make sure the 
    list of images path has the same number of n_rows x n_cols
    """
    f, axs = plt.subplots(n_rows,n_cols)
    img_counter = 0
    for i in range(n_rows):
        for j in range(n_cols):
            image = mpimg.imread(list_imgs[img_counter])
            axs[i,j].imshow(image)
            img_counter += 1

In [None]:
# Get some predictions from the training set
val_preds = model2.predict_generator(validation_generator, steps=30)

In [None]:
preds_file = [{"id":fname, "actual":fname[:3],"prediction":"Dog" if pred >= 0.5 else "Cat", "prob":float(pred)} for fname, pred in zip(validation_generator.filenames, val_preds)]

In [None]:
import pandas as pd

preds_df = pd.DataFrame(preds_file)
#preds_df.id = preds_df.id.astype(int).sort_values()
preds_df.sort_values('id', inplace=True)
preds_df.reset_index(inplace=True, drop=True)
preds_df.head()

In [None]:
# Example of getting low confidence images
x = ["./trainset/"+ img for img in list(preds_df[ (preds_df['prob'] > 0.45) & (preds_df['prob'] < 0.55) ].sample(frac=1)[10:20].id) ]

In [None]:
show_imgs(x, n_rows=2, n_cols=5)