In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import seaborn as sns


# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory

import os

# fix random seed for reproducibility
seed = 7
np.random.seed(seed)

CommitFlag = True
enable_pca = False

if not CommitFlag:
    print(os.listdir("../input"))

# Any results you write to the current directory are saved as output.

In [None]:
# extract the training data
dataset = pd.read_csv('../input/train.csv')
X = dataset.iloc[:, 1:].values
y = dataset.iloc[:, :1].values

test_dataset = pd.read_csv('../input/test.csv')
test = test_dataset.iloc[:,:].values

# There are 42000 images so 42000 labels
print("The shape of y: {}".format(y.shape))
m = y.shape[0]
print("The number of images: m = {}".format(m))
input_count = X.shape[1]
print("The size of input: m = {}".format(input_count))


# feature scaling
#X = X/255.0
#test = test/255.0

X = X.reshape(-1,28,28,1)
test = test.reshape(-1,28,28,1)

# PCA
from sklearn.decomposition import PCA

if enable_pca:
    pca_component_count = 300
    pca = PCA(n_components=pca_component_count, whiten=True)
    pca.fit(X)
    X = pca.transform(X)
    test = pca.transform(test)
    
print(X.shape)
print(test.shape)

#print(y[:10])
if not CommitFlag:
    sns.countplot(np.squeeze(y))


In [None]:
# Check IF some Feature variables are NaN
if not CommitFlag:
    np.unique(np.isnan(X))[0]


In [None]:
# Check IF some Target Variables are NaN
if not CommitFlag:
    np.unique(np.isnan(y))[0]

In [None]:
# Encoding categorical data
from sklearn.preprocessing import OneHotEncoder
onehotencoder = OneHotEncoder(categorical_features = [0])
y = onehotencoder.fit_transform(y).toarray()

In [None]:
# Importing the Keras libraries and packages
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.constraints import maxnorm
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPool2D, BatchNormalization
from sklearn.model_selection import train_test_split
from keras.optimizers import SGD

# With data augmentation to prevent overfitting (accuracy 0.99286)
from keras.preprocessing.image import ImageDataGenerator
# Set a learning rate annealer
from keras.callbacks import ReduceLROnPlateau


In [None]:
nets_count = 15

# Fitting the ANN to the Training set
epochs_count = 0
batch_size_vl = 64

learning_rate_reduction = ReduceLROnPlateau(monitor='acc', 
                                            patience=3, 
                                            verbose=1, 
                                            factor=0.5, 
                                            min_lr=0.00001)

model = [0] * nets_count
history = [0] * nets_count

for j in range(0,nets_count):    
    if CommitFlag:
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.0)
    else:
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.1,stratify=y)
    print(X_train.shape)

    # Initialising the ANN
    model[j] = Sequential()
    weight_max = 3

    output_count = y.shape[1]
    print("input_count: "+str(input_count))
    print("output_count: " + str(output_count))
    #Add the first hidden layer, and specifying #inputs mean(10,784)=397)
    hidden_layers_count = 2
    #delta = (input_count-output_count)/ (hidden_layers_count+1)
    #nh = int(input_count - delta)
    nh = int(((input_count+output_count)/2))
    dropout_prob = 0.3
    #pdelta = (dropout_prob - 0.01)/ (hidden_layers_count)
    #pdelta = 0.2
    
    model[j] = Sequential()

    model[j].add(Conv2D(32, kernel_size = 3, activation='relu', input_shape = (28, 28, 1)))
    model[j].add(BatchNormalization())
    model[j].add(Conv2D(32, kernel_size = 3, activation='relu'))
    model[j].add(BatchNormalization())
    model[j].add(Conv2D(32, kernel_size = 5, strides=2, padding='same', activation='relu'))
    model[j].add(BatchNormalization())
    model[j].add(Dropout(0.4))

    model[j].add(Conv2D(64, kernel_size = 3, activation='relu'))
    model[j].add(BatchNormalization())
    model[j].add(Conv2D(64, kernel_size = 3, activation='relu'))
    model[j].add(BatchNormalization())
    model[j].add(Conv2D(64, kernel_size = 5, strides=2, padding='same', activation='relu'))
    model[j].add(BatchNormalization())
    model[j].add(Dropout(0.4))

    model[j].add(Conv2D(128, kernel_size = 4, activation='relu'))
    model[j].add(BatchNormalization())
    model[j].add(Flatten())
    model[j].add(Dropout(0.4))
    model[j].add(Dense(10, activation='softmax'))

    # COMPILE WITH ADAM OPTIMIZER AND CROSS ENTROPY COST
    model[j].compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])
    
    model[j].summary()
    
    # Compiling the ANN
    #model.compile(optimizer = SGD(lr=0.015, momentum=0.8, decay=0.0, nesterov=True), loss = 'categorical_crossentropy', metrics = ['accuracy'])
    #model[j].compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])
    
    #X_train = X_train.reshape(-1,28,28,1)
    datagen = ImageDataGenerator(
#            featurewise_center=True,  # set input mean to 0 over the dataset
#            samplewise_center=False,  # set each sample mean to 0
#            featurewise_std_normalization=True,  # divide inputs by std of the dataset
#            samplewise_std_normalization=False,  # divide each input by its std
#            zca_whitening=True,  # apply ZCA whitening
            rotation_range=10,  # randomly rotate images in the range (degrees, 0 to 180)
            zoom_range = 0.1, # Randomly zoom image 
            width_shift_range=0.1,  # randomly shift images horizontally (fraction of total width)
            height_shift_range=0.1,  # randomly shift images vertically (fraction of total height)
            horizontal_flip=False,  # randomly flip images
            vertical_flip=False)  # randomly flip images


    datagen.fit(X_train)
    
    if CommitFlag:
        epochs_count = 45
        history[j] = model[j].fit_generator(datagen.flow(X_train,y_train, batch_size = batch_size_vl),
                             epochs = epochs_count, 
                             steps_per_epoch=m// batch_size_vl, 
                             callbacks=[learning_rate_reduction])
    else:
        epochs_count = 25
        history[j] = model[j].fit_generator(datagen.flow(X_train,y_train, batch_size = batch_size_vl),
                             validation_data = (X_test, y_test), 
                             epochs = epochs_count, 
                             steps_per_epoch=m//batch_size_vl, 
                             callbacks=[learning_rate_reduction])


    
    if not CommitFlag:
        print("CNN {0:d}: Epochs={1:d}, Train accuracy={2:.5f}, Validation accuracy={3:.5f}".format(j+1,
                                                                                            epochs_count,max(history[j].history['acc']),
                                                                                            max(history[j].history['val_acc']) ))
        model_acc = model[j].evaluate(X_test, y_test)
        print(" Model Accuracy is : {0:.1f}%".format(model_acc[1]*100))

In [None]:
"""    model[j].add(MaxPool2D(pool_size=(1,1), input_shape = (28,28,1)))
    model[j].add(Flatten())
    for i in range(1,hidden_layers_count+1):
        print("nh: " + str(nh))
        print("dropout_prob: " + str(dropout_prob))
        model[j].add(Dense(units=nh,kernel_initializer='uniform',activation='relu',input_dim=input_count,kernel_constraint=maxnorm(weight_max)))
        model[j].add(Dropout(dropout_prob))
        nh = int(nh/2)
        #nh = int(nh - delta)
        #dropout_prob = dropout_prob / 2.0
        #dropout_prob = dropout_prob - pdelta
    #nh2 = int(nh1/2)
    #print("nh2: " + str(nh2))

    #Add the output layer, an analog digit value
    model[j].add(Dense(units=10,kernel_initializer='uniform',activation='sigmoid'))
"""

In [None]:
# ENSEMBLE PREDICTIONS AND SUBMIT
results = np.zeros( (test.shape[0],10) ) 
for j in range(nets_count):
    results = results + model[j].predict(test)
results = np.argmax(results,axis = 1)
results = pd.Series(results,name="Label")
submission = pd.concat([pd.Series(range(1,28001),name = "ImageId"),results],axis = 1)
submission.to_csv("submission.csv",index=False)
submission.head()



In [None]:
"""for i in range(1,10):
    index = np.random.randint(1,28001)
    plt.subplot(3,3,i)
    plt.imshow(test[index].reshape(28,28))
    plt.title("Predicted Label : {}".format(results[index]))
plt.subplots_adjust(hspace = 1.2, wspace = 1.2)
plt.show()"""

In [None]:
"""
# Plot training & validation accuracy values
plt.plot(history.history['acc'])
plt.plot(history.history['val_acc'])
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper left')
plt.show()

# Plot training & validation loss values
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper left')
plt.show()

# Look at confusion matrix 
from sklearn.metrics import confusion_matrix
import itertools

def plot_confusion_matrix(cm, classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):
    
   # This function prints and plots the confusion matrix.
   # Normalization can be applied by setting `normalize=True`.
   
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]

    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, cm[i, j],
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
if not CommitFlag:
    # Predict the values from the validation dataset
    Y_pred = model.predict(X_test)
    # Convert predictions classes to one hot vectors 
    Y_pred_classes = np.argmax(Y_pred,axis = 1) 
    # Convert validation observations to one hot vectors
    Y_true = np.argmax(y_test,axis = 1) 
    # compute the confusion matrix
    confusion_mtx = confusion_matrix(Y_true, Y_pred_classes) 
    # plot the confusion matrix
    plot_confusion_matrix(confusion_mtx, classes = range(10)) 
    
# Display some error results 

# Errors are difference between predicted labels and true labels
if not CommitFlag:

    errors = (Y_pred_classes - Y_true != 0)

    Y_pred_classes_errors = Y_pred_classes[errors]
    Y_pred_errors = Y_pred[errors]
    Y_true_errors = Y_true[errors]
    X_val_errors = X_test[errors]

def display_errors(errors_index,img_errors,pred_errors, obs_errors):
    # This function shows 6 images with their predicted and real labels
    n = 0
    nrows = 2
    ncols = 3
    fig, ax = plt.subplots(nrows,ncols,sharex=True,sharey=True)
    for row in range(nrows):
        for col in range(ncols):
            error = errors_index[n]
            ax[row,col].imshow((img_errors[error]).reshape((28,28)))
            ax[row,col].set_title("Predicted label :{}\nTrue label :{}".format(pred_errors[error],obs_errors[error]))
            n += 1

if not CommitFlag:
    # Probabilities of the wrong predicted numbers
    Y_pred_errors_prob = np.max(Y_pred_errors,axis = 1)

    # Predicted probabilities of the true values in the error set
    true_prob_errors = np.diagonal(np.take(Y_pred_errors, Y_true_errors, axis=1))

    # Difference between the probability of the predicted label and the true label
    delta_pred_true_errors = Y_pred_errors_prob - true_prob_errors

    # Sorted list of the delta prob errors
    sorted_dela_errors = np.argsort(delta_pred_true_errors)

    # Top 6 errors 
    most_important_errors = sorted_dela_errors[-6:]

    # Show the top 6 errors
    display_errors(most_important_errors, X_val_errors, Y_pred_classes_errors, Y_true_errors)
    """