# Digit Recognition Notebook

In [3]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

In [4]:
train_dataset = pd.read_csv('datasets/train.csv')

In [5]:
train_dataset.head()

Unnamed: 0,label,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [7]:
num_pixels = train_dataset.shape[1] - 1
num_labels = len(train_dataset.label.unique())
ys = tf.one_hot(train_dataset.label, depth=num_labels)
xs = train_dataset.iloc[:, 1:]
xs.head()

Unnamed: 0,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


### *Creating a model*

In [11]:
# Define Sequential model with 3 layers
model = keras.Sequential(
    [
        keras.Input(shape=(num_pixels,)),
        layers.Dense(50, activation="sigmoid", name="hidden_layer1"),
        layers.Dense(20, activation="sigmoid", name="hidden_layer2"),
        layers.Dense(10, activation="sigmoid", name="output_layer"),
    ]
)

model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 hidden_layer1 (Dense)       (None, 50)                39250     
                                                                 
 hidden_layer2 (Dense)       (None, 20)                1020      
                                                                 
 output_layer (Dense)        (None, 10)                210       
                                                                 
Total params: 40,480
Trainable params: 40,480
Non-trainable params: 0
_________________________________________________________________


In [13]:
model.compile(optimizer='adam', loss='mean_squared_error', metrics=['accuracy'])

In [10]:
class StopOnPoint(tf.keras.callbacks.Callback):
    def __init__(self, point):
        super(StopOnPoint, self).__init__()
        self.point = point

    def on_epoch_end(self, epoch, logs=None): 
        accuracy = logs["accuracy"]
        self.epoch_end=epoch+1
        if accuracy >= self.point:
            self.model.stop_training = True
            

In [15]:
stop = StopOnPoint(0.95)
model.fit(
    x=xs,
    y=ys,
    epochs=10,
    validation_split=0.3,
    callbacks=[stop]
)
print(f'Training finished after {stop.epoch_end} epochs')

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25
Training finished after 25 epochs


In [16]:
test_dataset = pd.read_csv('datasets/test.csv')
test_dataset.head()

Unnamed: 0,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [35]:
outfile = 'submission.csv'

res = model.predict(test_dataset)
results = []
for i, res in enumerate(res):
    r = {'ImageId': i + 1, 'Label': np.argmax(res)}
    results.append(r)
    
submission = pd.DataFrame(results, columns=['ImageId', 'Label'])
submission.head()

Unnamed: 0,ImageId,Label
0,1,2
1,2,0
2,3,9
3,4,7
4,5,3


In [37]:
submission.to_csv(outfile, index=False)