In [0]:
PATH = '/content/drive/My Drive/Colab Notebooks/Digit Recognizer/'

**TRAINING**

In [0]:
# CNN model for Digit Classification

# Importing Files
import tensorflow as tf
import numpy as np
import pandas as pd

from tensorflow import keras
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
from keras.losses import SparseCategoricalCrossentropy
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Flatten
from keras.layers import Conv2D, MaxPool2D

# Reading training data
df_train = pd.read_csv(PATH+'data/train.csv')

# Splitting dataframe into Features(X) and Labels(y)
X=df_train.iloc[:,1:]
y=df_train.iloc[:,0]

# Splitting data into train and validation data
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.33, random_state=42)

# Scaling features into range from 0 to 1
scaler = preprocessing.MinMaxScaler((0,1))
X_train = scaler.fit_transform(X_train)
X_val = scaler.fit_transform(X_val)

# Reshaping data as 28 x 28 numpy array to feed in CNN
train_images=X_train.reshape(len(X_train),28,28,1)
val_images=X_val.reshape(len(X_val),28,28,1)

# Initializing Model
model = Sequential()

# Defining layers of the model
model.add(Conv2D(80,(3,3), init = 'he_uniform', activation='relu', input_shape=(28, 28, 1)))
model.add(MaxPool2D(2,2))
model.add(Conv2D(48,(3,3), init = 'he_uniform', activation='relu'))
model.add(MaxPool2D(2,2))
model.add(Conv2D(48,(3,3), init = 'he_uniform', activation='relu'))
model.add(Conv2D(32,(3,3), init = 'he_uniform', activation='relu'))
model.add(Flatten())
model.add(Dense(output_dim = 112, init = 'he_uniform',activation='relu'))
model.add(Dense(output_dim = 112, init = 'he_uniform',activation='relu'))
model.add(Dense(10, activation='softmax'))

model.compile(optimizer = 'adam', loss = SparseCategoricalCrossentropy(), metrics = ['accuracy'])

# Training Model
model.fit(train_images, np.array(y_train), validation_data=(val_images, y_val), epochs=10, validation_split=0.1)



Train on 28140 samples, validate on 13860 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.callbacks.History at 0x7f452f6d86d8>

**TESTING**

In [0]:
# Reading testing data 
df_test = pd.read_csv(PATH+'data/test.csv')

# Scaling features into range from 0 to 1
scaler = preprocessing.MinMaxScaler((0,1))
X_test = scaler.fit_transform(np.array(df_test))

# Reshaping data as 28 x 28 numpy array to feed in CNN
X_test = X_test.reshape(len(df_test),28,28,1)

# Predicting Test data output
preds = []
y_test = model.predict(X_test)
for i in range(len(y_test)):
  preds.append(np.argmax(y_test[i]))

# Creating and saving predicted data into a .csv file
# submission = pd.DataFrame({ 'ImageId': [i for i in range(1,len(preds)+1)], 'Label': [all[i] for i in range(len(preds))] })
# submission.to_csv("sample_submission.csv", index=False)