In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 
import time
import warnings
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score, f1_score
from sklearn.model_selection import train_test_split

import keras
import tensorflow as tf
from keras import backend as K
from keras.models import Sequential
from keras.applications.inception_v3 import InceptionV3
from keras.layers import Dense, Flatten, Dropout, Conv2D, MaxPooling2D, BatchNormalization, Activation, AveragePooling2D, Input
from keras.optimizers import RMSprop, Adam, SGD
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import Callback, ModelCheckpoint, LearningRateScheduler, ReduceLROnPlateau, TensorBoard, EarlyStopping
from keras.utils.np_utils import to_categorical
from keras.regularizers import l2
from keras.models import Model, load_model

from os.path import join, exists, expanduser
from os import listdir, makedirs

warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.filterwarnings(action='ignore', category=UserWarning)

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))


In [None]:
# Define file paths
input_path = '../input/Kannada-MNIST/'

train_path = input_path + 'train.csv'
test_path = input_path + 'test.csv'
dig_path = input_path + 'Dig-MNIST.csv'
sample_path = input_path + 'sample_submission.csv'
save_path = ''

In [None]:
# Load data as Pandas DataFrame
train_data = pd.read_csv(train_path)
test_data = pd.read_csv(test_path)
dig_data = pd.read_csv(dig_path)
sample_submission_data = pd.read_csv(sample_path)

### Data Pre-processing

In [None]:
# Data pre-processing 
h = w = 28
num_train = train_data.shape[0]
num_test = test_data.shape[0]
num_dig = dig_data.shape[0]
input_shape = (h, w, 1)
num_classes = 10

## Labels
y_train = train_data['label']
y_dig = dig_data['label']

## Images from shape of 784 to (28, 28)
X_train = train_data.drop(['label'], axis = 1)
X_train = X_train.to_numpy().reshape(num_train, h, w, 1)

id_test = test_data['id']
X_test = test_data.drop(['id'], axis = 1)
X_test = X_test.to_numpy().reshape(num_test, h, w, 1)

X_dig = dig_data.drop(['label'], axis = 1)
X_dig = X_dig.to_numpy().reshape(num_dig, h, w, 1)

# One-hot encoding of labels (instead of 0, 1, 2, 3, etc.)
y_train = to_categorical(y_train, num_classes=num_classes)

# Normalising inputs
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_dig = X_dig.astype('float32')
X_train /= 255.0
X_test /= 255.0

# Create validation data
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.15, random_state=42)

# Create validation data
#X_train, X_val1, y_train, y_val1 = train_test_split(X_train, y_train, test_size=0.1, random_state=42)
#X_dig, X_val2, y_dig, y_val2 = train_test_split(X_dig, y_dig, test_size=0.1, random_state=42)
#y_val2 = to_categorical(y_val2, num_classes=num_classes)
#X_val = np.concatenate((X_val1, X_val2), axis = 0)
#y_val = np.concatenate((y_val1, y_val2), axis = 0)

print("Train data set shape: {}, number of labels: {}".format(X_train.shape, y_train.shape[1]))
print("Dig data set shape: {}, number of labels: {}".format(X_dig.shape, y_dig.shape))
print("Validation data set shape: {}, number of labels: {}".format(X_val.shape, y_val.shape))
print("Test data set shape: {}".format(X_test.shape))

In [None]:
# Check couple of images
rand_images = X_train[np.random.choice(X_train.shape[0], size=(10,))]

fig, ax = plt.subplots(2,5, figsize = (15,5))
ax = ax.ravel()
print("Couple of images to check....")
for i in range(10):
    ax[i].imshow(rand_images[i][:,:,0], cmap='gray')
    y_train_ = np.argmax(y_train[i])
    ax[i].set_title('Class number is: {}'.format(y_train_))
plt.show()   


In [None]:
class Metrics(Callback):
    def __init__(self, val_data):
        super().__init__()
        self.validation_data = val_data
    
    def on_train_begin(self, logs = {}):
        self.val_f1s = []
        
    def on_epoch_end(self, epoch, logs = {}):
        val_predict = (np.asarray(self.model.predict(self.validation_data[0]))).round()
        val_target = self.validation_data[1]
        _val_f1_score = f1_score(val_target, val_predict, average="micro")
        self.val_f1s.append(_val_f1_score)
        

### Convolutional Neural Network

In [None]:
class convModel():
    
    '''
    This object is to create a Convolutional Neural Network with a basic architecture for sanity check if a simple model
    can make accurate classifications on CIFAR 100 data set. 
    '''
    
    def __init__(self, input_shape, num_classes):
        
        '''
        Instantiate the convModel object with arguments below.
        '''
        self.input_shape = input_shape
        self.num_classes = num_classes
        
    
    # CONV MODEL
    def Conv(self, filters, kernel_size, activation = 'relu', input_shape = None):
        '''
        This method is to create a single Conv layer, which is defined over the order of the layer. When creating a 
        Conv after feeding the input, then we need to specify the input shape. For other Conv layer, we do not need to
        as we are using return_sequences = True, and feed them.

        Arguments:
        filters -- number of filters, int
        kernel_size -- dimension of kernel (filter), int, set to 3 (based on the paper)
        activation -- type of activation, set to 'relu', default = linear
        input_shape -- image dimensions, (width, height, channels)

        Return:
        Conv2D layer -- a Keras Conv layer 
        '''
        if input_shape:
            return Conv2D(filters = filters,
                          kernel_size = kernel_size,
                          activation = activation,
                          kernel_initializer = 'he_normal',
                          bias_initializer= 'glorot_normal',
                          padding = 'same',
                          input_shape = input_shape)

        else:
            return Conv2D(filters = filters,
                          kernel_size = kernel_size,
                          activation = activation,
                          kernel_initializer = 'he_normal',
                          bias_initializer= 'glorot_normal',
                          padding = 'same')

    # SANITY CHECK MODEL
    def CNN(self):
        '''
        This method is to create a CNN architecture as follows:
        Conv --> MaxPooling --> Dropout --> Conv --> MaxPooling --> Dropout --> Conv --> MaxPooling --> Dropout --> Dense
        --> Activation --> Dropout --> Dense --> Activation (prediction)
        
        Remark: Instead of using Activation layer separately, we applied 'relu' directly when creating Conv layer.
        
        Arguments:
        No arguments needed
        
        Returns:
        model -- a Keras CNN model 
        '''
        model = Sequential()

        model.add(self.Conv(64, (3,3), input_shape=self.input_shape))
        model.add(BatchNormalization())
        model.add(self.Conv(64, (3,3)))
        model.add(BatchNormalization())
        model.add(self.Conv(64, (3,3)))
        model.add(BatchNormalization())
        model.add(MaxPooling2D(pool_size=(2,2)))
        model.add(Dropout(0.3))

        model.add(self.Conv(128, (3,3)))
        model.add(BatchNormalization())
        model.add(self.Conv(128, (3,3)))
        model.add(BatchNormalization())
        model.add(self.Conv(128, (3,3)))
        model.add(BatchNormalization())
        model.add(MaxPooling2D(pool_size=(2,2)))
        model.add(Dropout(0.3))

        model.add(self.Conv(256, (3,3)))
        model.add(BatchNormalization())
        model.add(self.Conv(256, (3,3)))
        model.add(BatchNormalization())
        model.add(self.Conv(256, (3,3)))
        model.add(BatchNormalization())
        model.add(MaxPooling2D(pool_size=(2,2)))
        model.add(Dropout(0.3))
        
        model.add(self.Conv(512, (3,3)))
        model.add(BatchNormalization())
        model.add(self.Conv(512, (3,3)))
        model.add(BatchNormalization())
        model.add(MaxPooling2D(pool_size=(2,2)))
        model.add(Dropout(0.3))

        model.add(Flatten())
        model.add(Dense(1024,kernel_regularizer=l2(0.02)))
        model.add(BatchNormalization(momentum=0.9, epsilon=1e-5, gamma_initializer="uniform"))
        model.add(Dropout(0.3))
        model.add(Dense(1024,kernel_regularizer=l2(0.02)))
        model.add(BatchNormalization(momentum=0.9, epsilon=1e-5, gamma_initializer="uniform"))
        model.add(Dropout(0.3))
        #model.add(Dense(1024))
        #model.add(BatchNormalization())
        #model.add(Activation('relu'))
        #model.add(Dropout(0.3))

        model.add(Dense(self.num_classes))
        model.add(Activation('softmax'))

        return model

    def conv_optimizer(self, 
                       model, 
                       optimizer):
        '''
        This method is to optimize the model 
        '''
        
        model.compile(optimizer = optimizer , 
                      loss = "categorical_crossentropy", 
                      metrics=["accuracy"])
        
        #model.build()
  
        return model

In [None]:
# Hyperparameters 
num_epochs = 75
batch_size = 50

# Optimizers
adam_optimizer = Adam(lr = 0.001)
sgd_optimizer = SGD(lr=0.001, decay=0, momentum=0.9, nesterov=True)
rmsprop_optimizer = RMSprop(lr=0.001)

### Train Network

In [None]:
# CNN Model 
cnn_model = convModel(input_shape, num_classes = num_classes)
cnn_network = cnn_model.CNN()

# Build model
cnn_built_model = cnn_model.conv_optimizer(model = cnn_network, 
                                           optimizer = adam_optimizer)

# CALLBACKS 
## Checkpoint Callback for the process
model_name = 'cnn_model_checkpoints.h5'
filepath = os.path.join(save_path, model_name)
checkpoint = ModelCheckpoint(filepath=filepath,
                             monitor='val_accuracy',
                             mode='max', 
                             verbose=1,
                             save_best_only=True)

earlystopping = EarlyStopping(monitor = 'val_loss',
                              mode='min', 
                              verbose=1,
                              patience=50,
                              restore_best_weights=True)

learning_rate_reducer = ReduceLROnPlateau(monitor = 'val_accuracy',
                                          factor=0.75,
                                          cooldown=0,
                                          patience=3,
                                          verbose=1,
                                          mode='max',
                                          min_lr=0.5e-6)

callbacks = [checkpoint, learning_rate_reducer, earlystopping]

# Image augmentation on training data set
train_datagen = ImageDataGenerator(rescale=1.0,
                                   rotation_range=10,
                                   width_shift_range=0.25,
                                   height_shift_range=0.25,
                                   shear_range=0.1,
                                   zoom_range=0.2,
                                   horizontal_flip=False)

# Image augmentation on validation data set
val_datagen = ImageDataGenerator(rescale=1.0)

# Image generators
train_generator = train_datagen.flow(X_train, y_train, batch_size)
val_generator = val_datagen.flow(X_val, y_val, batch_size)


#
history = cnn_built_model.fit_generator(train_generator, 
                                        epochs = num_epochs,
                                        validation_data = val_generator,
                                        steps_per_epoch = X_train.shape[0]//batch_size,
                                        validation_steps = X_val.shape[0]//batch_size,
                                        verbose = 1,
                                        callbacks = callbacks)


## Load Model

In [None]:
# Load model
cnn_trained_network = load_model('cnn_model_checkpoints.h5')

# Check predictions on Dig_MNIST data set
y_dig_pred = cnn_trained_network.predict(X_dig)
y_dig_pred = np.argmax(y_dig_pred, axis = 1)

dig_acc = accuracy_score(y_dig_pred, y_dig)
print("Dig-MNIST data set accuracy is: {}".format(dig_acc))

In [None]:
# Test data set predictions and save
y_pred = cnn_trained_network.predict(X_test)
y_pred = np.argmax(y_pred, axis = 1)

sample_submission_data['label'] = y_pred
sample_submission_data.to_csv('submission.csv', index = False)