In [20]:
import os
import pathlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tensorflow as tf
from keras.utils import to_categorical
from keras.layers import Input, Dense, BatchNormalization
from keras.models import Model
from keras.optimizers import SGD

In [21]:
os.environ["KAGGLE_USERNAME"] = "dineshsingh09"
os.environ["KAGGLE_KEY"] = "d0c36def95197970dd807d8941622709"


In [22]:
import kaggle as kg
from kaggle.api.kaggle_api_extended import KaggleApi

In [23]:
api = KaggleApi()

In [24]:
kg.api.authenticate()

In [25]:
kg.api.dataset_download_files(dataset="medahmedkrichen/devanagari-handwritten-character-datase",
                              path="dataset",unzip=True)

Dataset URL: https://www.kaggle.com/datasets/medahmedkrichen/devanagari-handwritten-character-datase


In [26]:
def train_test_df(path):

    img_path = list()
    img_label = list()

    for single_class_dir_path in pathlib.Path(path).glob("*"):

        for single_class_img_path in pathlib.Path(single_class_dir_path).glob("*.png"):

            img_path.append(str(single_class_img_path))
            #print(str(single_class_img_path).split("/")[-2].split("_")[-1])
            img_label.append(str(single_class_img_path).split("/")[-2].split("_")[-1])

    return pd.DataFrame(data={"img_path":img_path,"label":img_label})        


In [27]:
train_path = "dataset/DevanagariHandwrittenCharacterDataset/Train"
test_path = "dataset/DevanagariHandwrittenCharacterDataset/Test"

In [28]:
training_data = train_test_df(train_path)
testing_data = train_test_df(test_path)

In [29]:
training_data

Unnamed: 0,img_path,label
0,dataset/DevanagariHandwrittenCharacterDataset/...,jha
1,dataset/DevanagariHandwrittenCharacterDataset/...,jha
2,dataset/DevanagariHandwrittenCharacterDataset/...,jha
3,dataset/DevanagariHandwrittenCharacterDataset/...,jha
4,dataset/DevanagariHandwrittenCharacterDataset/...,jha
...,...,...
78195,dataset/DevanagariHandwrittenCharacterDataset/...,pa
78196,dataset/DevanagariHandwrittenCharacterDataset/...,pa
78197,dataset/DevanagariHandwrittenCharacterDataset/...,pa
78198,dataset/DevanagariHandwrittenCharacterDataset/...,pa


In [30]:
testing_data

Unnamed: 0,img_path,label
0,dataset/DevanagariHandwrittenCharacterDataset/...,jha
1,dataset/DevanagariHandwrittenCharacterDataset/...,jha
2,dataset/DevanagariHandwrittenCharacterDataset/...,jha
3,dataset/DevanagariHandwrittenCharacterDataset/...,jha
4,dataset/DevanagariHandwrittenCharacterDataset/...,jha
...,...,...
13795,dataset/DevanagariHandwrittenCharacterDataset/...,pa
13796,dataset/DevanagariHandwrittenCharacterDataset/...,pa
13797,dataset/DevanagariHandwrittenCharacterDataset/...,pa
13798,dataset/DevanagariHandwrittenCharacterDataset/...,pa


In [31]:
character2int = dict(zip(training_data["label"].unique(),range(len(training_data["label"].unique()))))

In [32]:

character2int

{'jha': 0,
 'ka': 1,
 'da': 2,
 'yna': 3,
 'tha': 4,
 'daa': 5,
 'adna': 6,
 '8': 7,
 'yaw': 8,
 'kha': 9,
 'dhaa': 10,
 'petchiryakha': 11,
 'la': 12,
 'ba': 13,
 'bha': 14,
 'ga': 15,
 'chhya': 16,
 '4': 17,
 'ma': 18,
 'gha': 19,
 '9': 20,
 'chha': 21,
 'waw': 22,
 '2': 23,
 'tabala': 24,
 'cha': 25,
 '7': 26,
 'ha': 27,
 'pha': 28,
 'tra': 29,
 'ra': 30,
 'gya': 31,
 'na': 32,
 'dha': 33,
 'kna': 34,
 '1': 35,
 'motosaw': 36,
 '5': 37,
 '3': 38,
 'patalosaw': 39,
 '0': 40,
 'taamatar': 41,
 'ja': 42,
 'thaa': 43,
 '6': 44,
 'pa': 45}

In [33]:
training_data["label"].replace(to_replace=character2int.keys(),value=character2int.values(),
                               inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  training_data["label"].replace(to_replace=character2int.keys(),value=character2int.values(),
  training_data["label"].replace(to_replace=character2int.keys(),value=character2int.values(),


In [34]:

training_data

Unnamed: 0,img_path,label
0,dataset/DevanagariHandwrittenCharacterDataset/...,0
1,dataset/DevanagariHandwrittenCharacterDataset/...,0
2,dataset/DevanagariHandwrittenCharacterDataset/...,0
3,dataset/DevanagariHandwrittenCharacterDataset/...,0
4,dataset/DevanagariHandwrittenCharacterDataset/...,0
...,...,...
78195,dataset/DevanagariHandwrittenCharacterDataset/...,45
78196,dataset/DevanagariHandwrittenCharacterDataset/...,45
78197,dataset/DevanagariHandwrittenCharacterDataset/...,45
78198,dataset/DevanagariHandwrittenCharacterDataset/...,45


In [35]:
testing_data.replace(to_replace=character2int.keys(),value=character2int.values(),
                     inplace=True)

  testing_data.replace(to_replace=character2int.keys(),value=character2int.values(),


In [36]:

testing_data

Unnamed: 0,img_path,label
0,dataset/DevanagariHandwrittenCharacterDataset/...,0
1,dataset/DevanagariHandwrittenCharacterDataset/...,0
2,dataset/DevanagariHandwrittenCharacterDataset/...,0
3,dataset/DevanagariHandwrittenCharacterDataset/...,0
4,dataset/DevanagariHandwrittenCharacterDataset/...,0
...,...,...
13795,dataset/DevanagariHandwrittenCharacterDataset/...,45
13796,dataset/DevanagariHandwrittenCharacterDataset/...,45
13797,dataset/DevanagariHandwrittenCharacterDataset/...,45
13798,dataset/DevanagariHandwrittenCharacterDataset/...,45


In [37]:
Y_true_train = to_categorical(y=training_data["label"],num_classes=46)

In [38]:
Y_true_train

array([[1., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 1.],
       [0., 0., 0., ..., 0., 0., 1.],
       [0., 0., 0., ..., 0., 0., 1.]], dtype=float32)

In [39]:
Y_true_test = to_categorical(y=testing_data["label"],num_classes=46)

In [40]:
Y_true_test

array([[1., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 1.],
       [0., 0., 0., ..., 0., 0., 1.],
       [0., 0., 0., ..., 0., 0., 1.]], dtype=float32)

In [41]:
def multiclass_dnn():

    input_to_dnn = Input(shape=(1024,))
    first_dense_out = Dense(units=1024,activation="relu")(input_to_dnn)
    second_dense_out = Dense(units=1024,activation="relu")(first_dense_out)
    second_dense_out = BatchNormalization()(second_dense_out)
    output = Dense(units=46,activation="softmax")(second_dense_out)

    return Model(inputs=[input_to_dnn],outputs=[output])

In [42]:
def custom_data_generator(data_df, Y_true, mb_size):

    for time_step in range(data_df.shape[0]//mb_size):
        X_mb = list()

        for img_path in data_df.iloc[time_step*mb_size:(time_step+1)*mb_size,0]:

            img_np_array = plt.imread(img_path)
            reshaped_np_array = img_np_array.reshape(1024,)
            X_mb.append(reshaped_np_array)

        X_mb = np.array(X_mb)
        Y_true_mb = Y_true[time_step*mb_size:(time_step+1)*mb_size]

        yield X_mb, Y_true_mb

In [43]:
epochs = 50
training_data_mb_size = 782
testing_data_mb_size = 138

In [44]:
model = multiclass_dnn()
model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 1024)]            0         
                                                                 
 dense (Dense)               (None, 1024)              1049600   
                                                                 
 dense_1 (Dense)             (None, 1024)              1049600   
                                                                 
 batch_normalization (Batch  (None, 1024)              4096      
 Normalization)                                                  
                                                                 
 dense_2 (Dense)             (None, 46)                47150     
                                                                 
Total params: 2150446 (8.20 MB)
Trainable params: 2148398 (8.20 MB)
Non-trainable params: 2048 (8.00 KB)
______________________

In [45]:
def loss_fn(Y_true_mb,Y_pred_mb):

    return tf.reduce_mean(tf.keras.losses.categorical_crossentropy(y_true=Y_true_mb,
                                                                          y_pred=Y_pred_mb))

optimizer = SGD()

In [46]:
@tf.function
def training_step(X_train_mb,Y_true_train_mb):

    with tf.GradientTape() as tape:
            
        Y_pred_train_mb = model(X_train_mb, training=True)
        training_loss = loss_fn(Y_true_train_mb, Y_pred_train_mb)

    grads = tape.gradient(training_loss, model.trainable_weights)
    optimizer.apply_gradients(zip(grads, model.trainable_weights))

    train_acc_metric.update_state(Y_true_train_mb,Y_pred_train_mb)

    return training_loss


In [47]:
@tf.function
def testing_forward_pass(X_test_mb,Y_true_test_mb):

    Y_pred_test_mb = model(X_test_mb,training=False)
    testing_loss = loss_fn(Y_true_test_mb,Y_pred_test_mb)
    test_acc_metric.update_state(Y_true_test_mb,Y_pred_test_mb)

    return testing_loss

In [48]:
train_acc_metric = tf.keras.metrics.CategoricalAccuracy()
test_acc_metric = tf.keras.metrics.CategoricalAccuracy()

for epoch in range(epochs):

    training_data_generator = custom_data_generator(training_data,Y_true_train,782)

    for time_step, (X_train_mb, Y_true_train_mb) in enumerate(training_data_generator):
        training_loss = training_step(X_train_mb,Y_true_train_mb)

        if (time_step+1) % 50 == 0:
            print("Epoch %d, Time Step %d, Training loss for one mini batch: %.4f"
            % (epoch+1, time_step+1, float(training_loss)))
            
    training_acc = train_acc_metric.result()    
    print("Epoch %d, Training Accuracy: %.2f" % (epoch+1,float(training_acc)))
    train_acc_metric.reset_states()

    testing_data_generator = custom_data_generator(testing_data,Y_true_test,testing_data_mb_size)

    for X_test_mb, Y_true_test_mb in testing_data_generator:
        testing_loss = testing_forward_pass(X_test_mb,Y_true_test_mb)

    print("\nEpoch %d, Testing Loss for last mini batch: %.4f" % (epoch+1,float(testing_loss)))
    testing_acc = test_acc_metric.result()
    print("Epoch %d, Testing Accuracy: %.2f" % (epoch+1,float(testing_acc)))
    test_acc_metric.reset_states()

    print("\n\n")


Epoch 1, Time Step 50, Training loss for one mini batch: 4.3015
Epoch 1, Time Step 100, Training loss for one mini batch: 4.2170
Epoch 1, Training Accuracy: 0.03

Epoch 1, Testing Loss for last mini batch: 3.6345
Epoch 1, Testing Accuracy: 0.16



Epoch 2, Time Step 50, Training loss for one mini batch: 4.1595
Epoch 2, Time Step 100, Training loss for one mini batch: 4.1427
Epoch 2, Training Accuracy: 0.07

Epoch 2, Testing Loss for last mini batch: 3.5033
Epoch 2, Testing Accuracy: 0.24



