In [64]:
import numpy as np
from matplotlib import pyplot as plt
import pandas as pd

from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten, BatchNormalization, Conv2D, MaxPooling2D
from keras.layers.advanced_activations import LeakyReLU
from keras.optimizers import Adam as Adam

In [65]:
train_y_list = np.array(pd.read_csv('train_labels.csv', usecols=['Category']))
train_y = [label[0] for label in train_y_list]

dict = {}
for label in train_y:
    if label in dict:
        dict[label] = dict[label]+1
    else:
        dict[label] = 1
        
labels = dict.keys()
num_class = len(labels)

In [None]:
def cnn_model():
    # define sequential model
    model = Sequential()
    
    # input layer

    # filters detect the patterns on each image
    # Some filters detect edges, some circles, some corners and so on.
    # The deeper the network is, the more sophisticated the filters become
    # A filter can be thought of as a small matrix for which we decide how many rows and columns it has
    # Values within the matrix are initialized with random numbers
    # Let's say our matrix is 3 x 3
    # When this convolutional layer receives input, the filter will slide over each 3 x 3 set of pixels on the input image
    # This sliding is referred to as convolve

    num_filter = 32  # number of convolution filters to use
    num_row = 3  # number of rows in each convolution kernel 
    num_col = 3  # number of columns in each convolution kernel

    depth = 1
    width = 30
    height = 30

    model.add(Conv2D(num_filter, 
                     kernel_size=(num_row, num_col),
                     activation='relu',
                     input_shape=(width, height, depth),
                     strides=1,  # number units to shift for the next convolve
                     padding='same'))

    # After each conv layer, it is convention to apply a nonlinear layer (or activation layer) immediately afterward.
    # The purpose of this layer is to introduce nonlinearity to a system that basically has just been computing linear operations during the conv layers.
    # In the past, nonlinear functions like tanh and sigmoid were used, but researchers found out that ReLU layers work far better because the network is able to train a lot faster without making a significant difference to the accuracy. 
    # It also helps to alleviate the vanishing gradient problem, which is the issue where the lower layers of the network train very slowly because the gradient decreases exponentially through the layers . 
    # The ReLU layer applies the function f(x) = max(0, x) to all of the values in the input volume. In basic terms, this layer changes all the negative activations to 0.
    # This layer increases the nonlinear properties of the model and the overall network without affecting the receptive fields of the conv layer.
    model.add(LeakyReLU(alpha=0.3))

    # Normalize the activations of the previous layer at each batch, i.e. applies a transformation that maintains the
    # mean activation close to 0 and the activation standard deviation close to 1. 
    model.add(BatchNormalization(axis=-1))
    
    # 1st hidden layer
    model.add(Conv2D(num_filter, kernel_size=(num_row, num_col), strides=1, padding='same'))
    model.add(LeakyReLU(alpha=0.3))
    # Pool
    model.add(MaxPooling2D(pool_size=(2, 2), strides=2))
    model.add(BatchNormalization(axis=-1))

    # 2nd hidden layer
    model.add(Conv2D(num_filter, kernel_size=(num_row, num_col), strides=1, padding='same'))
    model.add(LeakyReLU(alpha=0.3))
    # pool
    model.add(MaxPooling2D(pool_size=(2, 2), strides=2))

    # 3rd hidden layer
    model.add(Conv2D(num_filter, kernel_size=(num_row, num_col), strides=1, padding='same'))
    model.add(LeakyReLU(alpha=0.3))
    # pool
    model.add(MaxPooling2D(pool_size=(2, 2), strides=2))
    # Flatten
    model.add(Flatten())
    model.add(BatchNormalization(axis=-1))
    # Fully connected
    model.add(Dense(1024))
    model.add(LeakyReLU(alpha=0.3))
    model.add(BatchNormalization(axis=-1))
    # Dropout
    model.add(Dropout(0.4))
    
    # output layer
    model.add(Dense(num_class, activation='softmax'))

    model.summary()

    model.compile(Adam(), loss='categorical_crossentropy', metrics=['accuracy'])

    return model


In [64]:
model = cnn_model()

epoch = 1000
batch_size = 512
model.fit(train_x, train_y, 
        validation_data=(test_x, test_y),
        shuffle=True, 
        epochs=epoch, 
        batch_size=batch_size, 
        verbose=2)

model.save_weights('my_model_weights.h5')

# Model prediction on testing data
best = model.predict(test, batch_size=batch_size)

best = np.argmax(best, axis=1) 

# Remap the indice of one hot encoded labels to its original label:
remap = lambda x: mapping[x]
best = best.tolist()        
best = [remap(indice) for indice in best]

# Write to prediction file
pred = pd.DataFrame(data=best)
pred.index += 1
pred.to_csv("cnn_KERAS_1000.csv", sep=',', header=['Label'], index=True, index_label='ID', encoding='utf-8')