In [152]:
import pandas as pd
import os
import numpy as np
import keras
from keras import layers

# Confirm the files are in the correct location
assert os.path.exists('data/train.csv')
assert os.path.exists('data/test.csv')

# Load the training data csvs
train_csv = pd.read_csv('data/train.csv')
test_csv = pd.read_csv('data/test.csv')
test_csv.head()

Unnamed: 0,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [153]:
# Model / data parameters - used for reshaping
num_classes = 10
input_shape = (28, 28, 1)

# Convert the training table into x/y sets
train_ydata = pd.DataFrame(train_csv["label"], columns=["label"])
train_xdata = train_csv.iloc[:, 1:].to_numpy().reshape((train_csv.shape[0], *input_shape))

# Create x set for test set (for submission)
test_xdata = test_csv.to_numpy().reshape((test_csv.shape[0], *input_shape))

# One hot encoding for outputs
train_ydata = keras.utils.to_categorical(train_ydata, num_classes)


In [154]:
from sklearn.model_selection import train_test_split

# Normalize the pixel arrays 
train_xdata = train_xdata.astype(np.float32) / 255.0
test_xdata = test_xdata.astype(np.float32) / 255.0

# Split data to train and validation sets
x_train, x_val, y_train, y_val = train_test_split(train_xdata, train_ydata, test_size=0.1, random_state=0)


In [155]:
# Configure Model Structure
model = keras.Sequential(
    [
        keras.Input(shape=input_shape),
        layers.Conv2D(32, kernel_size=(3, 3), activation="relu"),
        layers.MaxPooling2D(pool_size=(2, 2)),
        layers.Conv2D(64, kernel_size=(3, 3), activation="relu"),
        layers.MaxPooling2D(pool_size=(2, 2)),
        layers.Flatten(),
        layers.Dropout(0.5),
        layers.Dense(num_classes, activation="softmax"),
    ]
)

model.summary()

Model: "sequential_20"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_40 (Conv2D)          (None, 26, 26, 32)        320       
                                                                 
 max_pooling2d_40 (MaxPoolin  (None, 13, 13, 32)       0         
 g2D)                                                            
                                                                 
 conv2d_41 (Conv2D)          (None, 11, 11, 64)        18496     
                                                                 
 max_pooling2d_41 (MaxPoolin  (None, 5, 5, 64)         0         
 g2D)                                                            
                                                                 
 flatten_20 (Flatten)        (None, 1600)              0         
                                                                 
 dropout_20 (Dropout)        (None, 1600)            

In [156]:
# Train the model and evaluate
batch_size = 128
epochs = 100

model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])

model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, 
validation_data=(x_val, y_val)
)

# score = model.evaluate(x_val, y_val, verbose=1)
# print("Validation loss:", score[0])
# print("Validation accuracy:", score[1])

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.callbacks.History at 0x22dde4cd100>

In [158]:
# Evaluate the model over the submission dataset
y_test = model.predict(test_xdata)

# Save the data to a file
output_data = pd.DataFrame({"ImageId": np.arange(len(y_test))+1, "Label": y_test.argmax(axis=-1)})
output_data.to_csv("prediction.csv", index=False)


