In [5]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [6]:
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.model_selection import train_test_split

In [9]:
base_path = '/kaggle/input/digit-recognizer/'
df_train = pd.read_csv(os.path.join(base_path,'train.csv'))
df_test = pd.read_csv(os.path.join(base_path,'test.csv'))

In [10]:
df_train

In [11]:
df_test

In [41]:
y = df_train['label']
X = df_train.drop(columns=['label'])
X = np.array(X).reshape((X.shape[0], 784))
# X = X.values
x_train, x_val, y_train, y_val = train_test_split(X, y, test_size=0.22, random_state=42)

print(f'x_train: {x_train.shape}')
print(f'y_train: {y_train.shape}')
print(f'x_val: {x_val.shape}')
print(f'y_val: {y_val.shape}')

In [42]:
print(x_train.min())
print(x_train.max())

In [43]:

x_train = x_train/ 255
x_val = x_val / 255

num_classes = len(y.unique())

if not y_train.shape[-1] == num_classes:  # Avoid running multiple times
    y_train = keras.utils.to_categorical(y_train, num_classes)
    y_val = keras.utils.to_categorical(y_val, num_classes)

In [44]:
model = Sequential([
    Dense(units = 512, activation='relu', input_shape=(784,)),
    Dense(units=512, activation='relu'),
    Dense(units=num_classes, activation='softmax')
])

model.summary()

In [57]:
model.compile(
#     optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

In [58]:
epochs = 12
history = model.fit(x_train, y_train, epochs=epochs, verbose=1,
          validation_data=(x_val, y_val)
         )

In [59]:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']

In [60]:
from matplotlib import pyplot as plt

plt.subplot(2, 1, 1)
# r is for "solid red line"
plt.plot(np.arange(1,epochs+1), loss, 'r', label='Training loss')
# b is for "solid blue line"
plt.plot(np.arange(1,epochs+1), val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

plt.subplot(2, 1, 2)
plt.plot(np.arange(1,epochs+1), acc, 'r', label='Training acc')
plt.plot(np.arange(1,epochs+1), val_acc, 'b', label='Validation acc')
plt.title('Training and validation accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend(loc='lower right')
plt.show()

In [61]:
model.evaluate(x_val)

In [62]:
x_val

In [63]:
preds = model.predict(df_test)
preds = np.argmax(preds, axis=1)
print(preds)

In [65]:
output = pd.DataFrame({'Label': preds})
output.index = np.arange(1, len(output)+1)
output.reset_index(inplace=True)
output.rename(columns={'index': 'ImageId'}, inplace=True)
print(output)

In [66]:
output.to_csv('submission.csv', index=False)