In [6]:
import kaggle as kg
import os
import pathlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tensorflow as tf
from keras.utils import to_categorical

from keras.layers import Input, Dense
from keras.models import Model
from keras.optimizers import RMSprop

In [None]:
os.environ["KAGGLE_USERNAME"] = "aimlrl"
os.environ["KAGGLE_KEY"] = "54d4150a6ca782d7b27af3f3754eddd8"

In [None]:
kg.api.authenticate()

In [None]:
kg.api.dataset_download_files(dataset="medahmedkrichen/devanagari-handwritten-character-datase",
                              path="dataset",unzip=True)

In [7]:
def train_test_df(path):

    img_path = list()
    img_label = list()

    for single_class_dir_path in pathlib.Path(path).glob("*"):

        for single_class_img_path in pathlib.Path(single_class_dir_path).glob("*.png"):

            img_path.append(str(single_class_img_path))
            #print(str(single_class_img_path).split("/")[-2].split("_")[-1])
            img_label.append(str(single_class_img_path).split("/")[-2].split("_")[-1])

    return pd.DataFrame(data={"img_path":img_path,"label":img_label})        

In [8]:
train_path = "dataset/DevanagariHandwrittenCharacterDataset/Train"
test_path = "dataset/DevanagariHandwrittenCharacterDataset/Test"

In [9]:
training_data = train_test_df(train_path)
testing_data = train_test_df(test_path)

In [10]:
character2int = dict(zip(training_data["label"].unique(),range(len(training_data["label"].unique()))))

In [11]:
training_data["label"].replace(to_replace=character2int.keys(),value=character2int.values(),
                               inplace=True)

testing_data.replace(to_replace=character2int.keys(),value=character2int.values(),
                     inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  training_data["label"].replace(to_replace=character2int.keys(),value=character2int.values(),
  training_data["label"].replace(to_replace=character2int.keys(),value=character2int.values(),
  testing_data.replace(to_replace=character2int.keys(),value=character2int.values(),


In [12]:
Y_true_train = to_categorical(y=training_data["label"],num_classes=46)
Y_true_test = to_categorical(y=testing_data["label"],num_classes=46)

In [13]:
def multiclass_dnn():

    input_to_dnn = Input(shape=(1024,))
    first_dense_out = Dense(units=1024,activation="relu") (input_to_dnn)
    output = Dense(units=46,activation="softmax") (first_dense_out)

    return Model(inputs=[input_to_dnn],outputs=[output])

In [14]:
def custom_data_generator(data_df, Y_true, mb_size):

    for time_step in range(data_df.shape[0]//mb_size):
        X_mb = list()

        for img_path in data_df.iloc[time_step*mb_size:(time_step+1)*mb_size,0]:

            img_np_array = plt.imread(img_path)
            reshaped_np_array = img_np_array.reshape(1024,)
            X_mb.append(reshaped_np_array)

        X_mb = np.array(X_mb)
        Y_true_mb = Y_true[time_step*mb_size:(time_step+1)*mb_size]

        yield X_mb, Y_true_mb

In [15]:
epochs = 10
training_data_mb_size = 782
testing_data_mb_size = 138
training_data_generator = custom_data_generator(training_data,Y_true_train,training_data_mb_size)
testing_data_generator = custom_data_generator(testing_data,Y_true_test,testing_data_mb_size)

In [24]:
model = multiclass_dnn()
model.summary()

Model: "model_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_3 (InputLayer)        [(None, 1024)]            0         
                                                                 
 dense_4 (Dense)             (None, 1024)              1049600   
                                                                 
 dense_5 (Dense)             (None, 46)                47150     
                                                                 
Total params: 1096750 (4.18 MB)
Trainable params: 1096750 (4.18 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [23]:
def loss_fn(Y_true_mb,Y_pred_mb):

    return tf.reduce_mean(tf.keras.losses.categorical_crossentropy(y_true=Y_true_mb,
                                                                   y_pred=Y_pred_mb))

optimizer = RMSprop(learning_rate=0.0001)

In [18]:
@tf.function
def training_step(X_true_train_mb,Y_true_train_mb):

    with tf.GradientTape() as tape:
            
        Y_pred_train_mb = model(X_train_mb, training=True)
        training_loss = loss_fn(Y_true_train_mb, Y_pred_train_mb)

    grads = tape.gradient(training_loss, model.trainable_weights)
    optimizer.apply_gradients(zip(grads, model.trainable_weights))

    train_acc_metric.update_state(Y_true_train_mb,Y_pred_train_mb)

    return training_loss

In [19]:
@tf.function
def testing_forward_pass(X_test_mb,Y_true_test_mb):

    Y_pred_test_mb = model(X_test_mb,training=False)
    testing_loss = loss_fn(Y_true_test_mb,Y_pred_test_mb)
    test_acc_metric.update_state(Y_true_test_mb,Y_pred_test_mb)

    return testing_loss

In [25]:
train_acc_metric = tf.keras.metrics.CategoricalAccuracy()
test_acc_metric = tf.keras.metrics.CategoricalAccuracy()

for epoch in range(epochs):

    training_data_generator = custom_data_generator(training_data,Y_true_train,782)

    for time_step, (X_train_mb, Y_true_train_mb) in enumerate(training_data_generator):
        training_loss = training_step(X_train_mb,Y_true_train_mb)

        #if (time_step+1) % 50 == 0:
        print("Epoch %d, Time Step %d, Training loss for one mini batch: %.4f"
            % (epoch+1, time_step+1, float(training_loss)))
            
    training_acc = train_acc_metric.result()    
    print("Epoch %d, Training Accuracy: %.2f" % (epoch+1,float(training_acc)))
    train_acc_metric.reset_state()

    for X_test_mb, Y_true_test_mb in testing_data_generator:
        testing_loss = testing_forward_pass(X_test_mb,Y_true_test_mb)

    print("\nEpoch %d, Testing Loss for last mini batch: %.4f" % (epoch+1,float(testing_loss)))
    testing_acc = test_acc_metric.result()
    print("Epoch %d, Testing Accuracy: %.2f" % (epoch+1,float(testing_acc)))
    test_acc_metric.reset_state()

    print("\n\n")

Epoch 1, Time Step 1, Training loss for one mini batch: 5.6247
Epoch 1, Time Step 2, Training loss for one mini batch: 4.1371
Epoch 1, Time Step 3, Training loss for one mini batch: 4.3814
Epoch 1, Time Step 4, Training loss for one mini batch: 3.2353
Epoch 1, Time Step 5, Training loss for one mini batch: 3.2523
Epoch 1, Time Step 6, Training loss for one mini batch: 2.7486
Epoch 1, Time Step 7, Training loss for one mini batch: 2.8350
Epoch 1, Time Step 8, Training loss for one mini batch: 3.3826
Epoch 1, Time Step 9, Training loss for one mini batch: 2.4872
Epoch 1, Time Step 10, Training loss for one mini batch: 2.8634
Epoch 1, Time Step 11, Training loss for one mini batch: 1.8471
Epoch 1, Time Step 12, Training loss for one mini batch: 3.9690
Epoch 1, Time Step 13, Training loss for one mini batch: 2.5065
Epoch 1, Time Step 14, Training loss for one mini batch: 4.1761
Epoch 1, Time Step 15, Training loss for one mini batch: 2.8203
Epoch 1, Time Step 16, Training loss for one mini