# License

MIT License

Copyright (c) 2024 cmacklin@gwu.edu

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

# Importing Required Packages

In [None]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from tensorflow.keras.datasets import mnist
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import MaxPooling2D
from tensorflow.keras.layers import AveragePooling2D
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import Flatten
from tensorflow.keras.optimizers import SGD
import tensorflow as tf
from sklearn.model_selection import train_test_split
!pip install scikeras
from scikeras.wrappers import KerasClassifier
from sklearn.metrics import roc_auc_score

# Load Data

In [2]:
#files available at https://www.kaggle.com/competitions/digit-recognizer/data
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')

# Image Pre-processing

In [3]:
Y_train = train["label"] #save labels to Y_train variable
X_train = train.drop(labels = ["label"],axis = 1)
X_train = X_train / 255.0  #normalize image values
test = test / 255.0
X_train = X_train.values.reshape(-1,28,28,1) #reshape image to size (28,28)
test = test.values.reshape(-1,28,28,1)
Y_train = tf.one_hot(Y_train, depth=10) #one-hot encode label values

In [4]:
X_train_np = X_train.numpy() if hasattr(X_train, 'numpy') else X_train #save X_train as numpy array
Y_train_np = Y_train.numpy() if hasattr(Y_train, 'numpy') else Y_train

In [5]:
#spit data into training and validation data sets with 80/20 split
trainX, testX, trainY, testY = train_test_split(X_train_np, Y_train_np, test_size=0.2, random_state=42)

# Train CNN Model

In [14]:
def define_model():
	model = Sequential()
	model.add(Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_uniform', input_shape=(28, 28, 1))) #conv layer with relu activation and 32 filters
	model.add(Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_uniform')) #conv layer with relu activation and 32 filters
	model.add(MaxPooling2D((2, 2))) #pooling layer to reduce dimensions
	model.add(Dropout(0.1))  #add dropout layer for regularization
	model.add(Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_uniform')) #conv layer with relu activation and 64 filters
	model.add(Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_uniform')) #conv layer with relu activation and 64 filters
	model.add(MaxPooling2D((2, 2))) #pooling layer to reduce dimensions
	model.add(Dropout(0.1))  #add dropout layer for regularization
	model.add(Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_uniform')) #conv layer with relu activation and 128 filters
	model.add(MaxPooling2D((2, 2))) #pooling layer to reduce dimensions
	model.add(Dropout(0.1))  #add dropout layer for regularization
	model.add(Flatten())  #flatten before dense layer
	model.add(Dense(10, activation='softmax')) #dense layer with softmax activation and 10 outputs for each digit
	# compile model
	model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])
	return model

'''
loss is 'categorical_crossentropy' because label values are one-hot encoded
'accuracy' is the test metric defined by kaggle
'''

model = define_model()
history = model.fit(trainX, trainY, epochs=10, batch_size=64, validation_data=(testX, testY), verbose=0) #hyperparameters: epochs=10, batch_size=64
_, acc = model.evaluate(testX, testY, verbose=0) #evaluate model on validation data


print('Accuracy:',acc)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Accuracy: 0.9916666746139526


In [15]:
_, acc = model.evaluate(trainX, trainY, verbose=0)  #evaluate model on training data
print('Train Accuracy:',acc)

Train Accuracy: 0.9987202286720276


In [16]:
#find auc score for training and validation data
y_pred = model.predict(testX)
y_pred_train = model.predict(trainX)
auc = roc_auc_score(trainY, y_pred_train, multi_class='ovr')
print(f"Train Overall AUC: {auc}")
auc = roc_auc_score(testY, y_pred, multi_class='ovr')
print(f"Valid Overall AUC: {auc}")

[1m263/263[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
[1m1050/1050[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
Train Overall AUC: 0.9999972711170475
Valid Overall AUC: 0.999967854146318


# Architecture Summary

In [17]:
model.summary()

# Create and Export Submission File

In [18]:
predictions = np.argmax(model.predict(test), axis=1)

submission = pd.DataFrame({'ImageId': range(1, len(predictions) + 1), 'Label': predictions})
submission.to_csv('submission.csv', index=False)
print("Submission file saved as 'submission.csv'")

[1m875/875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
Submission file saved as 'submission.csv'
