# Digit Recognizer

The goal is to correctly identify digits from a dataset of tens of thousands of handwritten images.

The data files train.csv and test.csv contain gray-scale images of hand-drawn digits, from zero through nine. The test data set, (test.csv), is the same as the training set, except that it does not contain the "label" column.

#### Input Data

In [None]:
import tensorflow as tf
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPool2D
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

print(tf.__version__)

In [None]:
#Kaggle MNIST
import numpy as np
import pandas as pd

train_images = pd.read_csv("../input/train.csv")
test_images = pd.read_csv("../input/test.csv")

## Building the Neural Network



#### Processing Data:

In [None]:
Y_train = train_images["label"]

# Drop 'label' column
X_train = train_images.drop(labels = ["label"],axis = 1) 

Y_train.value_counts()

In [None]:
print("train from Kaggle: " , X_train.shape)
X_train.head()

In [None]:
print("test from Kaggle: ", test_images.shape)
test_images.head()

In [None]:
# Check the data
X_train.isnull().any().describe()

In [None]:
test_images.isnull().any().describe()

#### Normalize train & test data:

In [None]:
X_train = X_train / 255.0

test_images = test_images / 255.0

In [None]:
# Reshape image in 3 dimensions (height = 28px, width = 28px , canal = 1)
X_train = X_train.values.reshape(-1,28,28,1)
X_test = test_images.values.reshape(-1,28,28,1)

In [None]:
print("train from Kaggle: " , X_train.shape)
print("test from Kaggle: ", X_test.shape)

## Build the Model

#### Setup the layers

In [None]:
model = keras.Sequential([
    keras.layers.Conv2D(32, kernel_size = (4,4), activation='relu', input_shape = (28, 28, 1)),
    keras.layers.Conv2D(32, kernel_size = (4,4), activation='relu'),
    keras.layers.MaxPool2D(pool_size=(1, 1)),
    keras.layers.Dropout(0.12),
    keras.layers.Conv2D(64, kernel_size = (3,3), activation='relu'),
    keras.layers.Conv2D(64, kernel_size = (3,3), activation='relu'),
    keras.layers.MaxPool2D(pool_size=(2, 2)),
    keras.layers.Dropout(0.24),
    keras.layers.Conv2D(128, kernel_size = (2,2), activation='relu'),
    keras.layers.Conv2D(512, kernel_size = (2,2), activation='relu'),
    keras.layers.MaxPool2D(pool_size=(3, 3)),
    keras.layers.Dropout(0.48),
    keras.layers.Flatten(),
    keras.layers.Dense(512, activation=tf.nn.relu),
    keras.layers.Dropout(0.6),
    keras.layers.Dense(10, activation=tf.nn.softmax)
])

#### Compile the Model

In [None]:
model.compile(optimizer=tf.train.AdamOptimizer(), 
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

#### Train the model

In [None]:
batch_size = 128
epochs = 30

model.fit(X_train, Y_train, epochs=epochs, batch_size = batch_size)

## Data Plot and Evaluation

In [None]:
# convert class vectors to binary class matrices One Hot Encoding
X_train, X_test, Y_train, Y_test = train_test_split(X_train, Y_train, test_size = 0.66665, random_state=42)

print("test from Kaggle: ", X_test.shape)

#### Evaluate accuracy

In [None]:
## evaluate(x=None, y=None, batch_size=None, verbose=1, sample_weight=None, steps=None)

test_loss, test_acc = model.evaluate(X_test,Y_test)

print('Test accuracy:', test_acc)

## Final Report

#### Make predictions

In [None]:
#get the predictions for the test data
predicted_classes = model.predict_classes(X_test)

#get the indices to be plotted
y_true = X_test[:, 0]
correct = np.nonzero(predicted_classes==y_true)[0]
incorrect = np.nonzero(predicted_classes!=y_true)[0]

In [None]:
submissions=pd.DataFrame({"ImageId": list(range(1,len(predicted_classes)+1)),
                         "Label": predicted_classes})
submissions.to_csv("digi-recogn.csv", index=False, header=True)