In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from sklearn.model_selection import StratifiedKFold

In [None]:
SEED = 28
np.random.seed(SEED)
tf.random.set_seed(SEED)

In [None]:
df_train = pd.read_csv('./data/train.csv')
df_test = pd.read_csv('./data/test.csv')

In [None]:
df_train.tail()

In [None]:
df_test.tail()

In [None]:
print(df_train.shape, df_test.shape)

In [None]:
pd.unique(sorted(df_train.label))

In [None]:
df_train['label'].value_counts()

In [None]:
df_train.isna().sum().sum()

In [None]:
df_test.isna().sum().sum()

In [None]:
X_train = df_train.drop('label', axis= 1).values
X_test = df_test.values
y_train = df_train['label'].values
print(X_train.shape, y_train.shape, X_test.shape)

In [None]:
INDEX = 1234

np.set_printoptions(linewidth=120)
img_train = X_train[INDEX].reshape(28, 28)
print(img_train)

In [None]:
img_test = X_test[INDEX].reshape(28, 28)
print(img_test)

In [None]:
plt.imshow(img_train)
plt.title(f'{y_train[INDEX]}')
plt.show()

In [None]:
plt.imshow(img_test)
plt.show()

In [None]:
NUM_FOLD = 10
BATCH_SIZE = 32
SHUFFLE_SIZE = 1000
PREFETCH_SIZE = tf.data.AUTOTUNE
kf = StratifiedKFold(NUM_FOLD, shuffle= True, random_state= SEED)
fold_acc_hist, fold_loss_hist = [], []
histories = []
log_print = True

In [None]:
preprocessing_data = tf.keras.Sequential([
    tf.keras.Input(shape= (784,)),
    tf.keras.layers.Reshape((28, 28, 1)),
    tf.keras.layers.Rescaling(1./255)
])

In [None]:
def build_digit_model(input_size= (28, 28, 1), num_classes= 10):
    inputs = tf.keras.Input(shape= input_size)
    x = tf.keras.layers.Conv2D(32, (3, 3), activation= 'relu')(inputs)
    x = tf.keras.layers.MaxPooling2D((2, 2))(x)
    x = tf.keras.layers.Flatten()(x)
    x = tf.keras.layers.Dense(128, activation= 'relu')(x)
    outputs = tf.keras.layers.Dense(num_classes, activation= 'linear')(x)
    
    model = tf.keras.Model(inputs, outputs)
    
    return model

In [None]:
for i, (train_idx, val_idx) in enumerate(kf.split(X_train, y_train)):
    
    X_train_fold, X_val_fold = X_train[train_idx], X_train[val_idx]
    y_train_fold , y_val_fold = y_train[train_idx], y_train[val_idx]
    
    train_ds = tf.data.Dataset.from_tensor_slices((X_train_fold, y_train_fold))
    val_ds = tf.data.Dataset.from_tensor_slices((X_val_fold, y_val_fold))
    if log_print:
        print(f'Train and Vlidation Datasets are created successfully!')
    
    train_ds = (train_ds
                .cache()
                .shuffle(SHUFFLE_SIZE)
                .batch(BATCH_SIZE)
                .prefetch(PREFETCH_SIZE))
    val_ds = (val_ds
              .cache()
              .batch(BATCH_SIZE)
              .prefetch(PREFETCH_SIZE))
    
    processed_train_ds = train_ds.map(lambda x, y: (preprocessing_data(x), y))
    processed_val_ds = val_ds.map(lambda x, y: (preprocessing_data(x), y))
    if log_print:
        print(f'Processing datasets done!')
    
    model = build_digit_model()
    if log_print:
        model.summary()
        log_print = False
    model.compile(
        optimizer = tf.keras.optimizers.Adam(learning_rate= 1e-3),
        loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits= True),
        metrics= ['accuracy']
    )
    
    early_stopping = tf.keras.callbacks.EarlyStopping(patience= 5, restore_best_weights= True)
    reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(patience= 3, factor= 0.2, min_lr= 1e-5)
    
    print(f"----- Fold {i+1}/{NUM_FOLD} -----")
    history = model.fit(processed_train_ds,
              epochs= 50,
              validation_data= processed_val_ds,
              callbacks= [early_stopping, reduce_lr],
             verbose= 2)
    
    histories.append(history.history)
    
    val_loss, val_accuracy = model.evaluate(processed_val_ds, verbose= 0)
    print(f'Fold {i+1} Loss: {val_loss:.4f}')
    print(f'Fold {i+1} Accuracy: {val_accuracy:.4f}')
    fold_acc_hist.append(val_accuracy)
    fold_loss_hist.append(val_loss)

In [None]:
print(f'Average Validation Accuracy: {np.mean(fold_acc_hist):.4f}')
print(f'Average Validation Loss: {np.mean(fold_loss_hist):.4f}')

In [None]:
last_hist = histories[-1]
hist_df = pd.DataFrame(last_hist)

fig, axs = plt.subplots(1, 2, figsize= (12, 6))
axs[0].plot(hist_df['accuracy'], label= 'Train Accuracy')
axs[0].plot(hist_df['val_accuracy'], label= 'Validation Accuracy')
axs[0].set_title('Model Accuracy (Last Fold)')
axs[0].set_xlabel('Epoch', size= 10)
axs[0].set_ylabel('Accuracy', size= 10)
axs[0].legend(loc= 'lower right')

axs[1].plot(hist_df['loss'], label= 'Train Loss')
axs[1].plot(hist_df['val_loss'], label= 'Validation Loss')
axs[1].set_title('Model Loss (last Fold)')
axs[1].set_xlabel('Epoch', size= 10)
axs[1].set_ylabel('Loss', size= 10)
axs[1].legend(loc= 'upper right')

plt.show()

In [None]:
test_ds = tf.data.Dataset.from_tensor_slices(X_test)
test_ds = (test_ds
           .batch(BATCH_SIZE)
           .prefetch(PREFETCH_SIZE))
processed_test_ds = test_ds.map(lambda x: preprocessing_data(x))

In [None]:
preds = model.predict(processed_test_ds)
preds_label = tf.argmax(preds, axis= 1)
print(f'Sample Prediction: {preds_label[10]}')

In [None]:
submission = pd.DataFrame(
    {'ImageId': df_test.index,
     'Label': preds_label.numpy()}
)

In [None]:
submission.to_csv('./data/sample_submission.csv', index= False)
print(f'Submission CSV file created successfully!')
submission.head()