In [1]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import glob
import pandas as pd
from PIL import Image
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from keras.utils import to_categorical

In [2]:
# Parameters
DATA_DIR  = 'Udyam/'
H, W, C = 50, 200, 3  # height, width, channels
D = 6  # Number of characters per image
characters = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789'
N_LABELS = len(characters)  # 62 labels

In [3]:
# Mapping characters to numbers and vice versa
char_to_num = {char: idx for idx, char in enumerate(characters)}
num_to_char = {idx: char for char, idx in char_to_num.items()}

In [4]:
# Parse file path to extract label
def parse_filepath(filepath):
    try:
        path, filename = os.path.split(filepath)
        filename, ext = os.path.splitext(filename)
        return filename
    except Exception as e:
        print(f'Error parsing {filepath}: {e}')
        return None

In [5]:
# Load and prepare the data
files = glob.glob(os.path.join(DATA_DIR, "*.jpg"))
attributes = list(map(parse_filepath, files))
df = pd.DataFrame(attributes, columns=['label'])
df['file'] = files
df = df.dropna()

In [6]:
# Split data into training, validation, and test sets
train_df, test_df = train_test_split(df, test_size=0.1, random_state=42)
train_df, valid_df = train_test_split(train_df, test_size=0.1, random_state=42)

In [7]:
print(f'train count: {len(train_df)}, valid count: {len(valid_df)}, test count: {len(test_df)}')


train count: 19745, valid count: 2194, test count: 2438


In [8]:
# Data generator with augmentation
datagen = ImageDataGenerator(
    rescale=1.0/255.0,
    rotation_range=10,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.1,
    zoom_range=0.1,
    horizontal_flip=True,
    fill_mode='nearest'
)

In [9]:
def get_data_generator(df, batch_size=16, shuffle=True):
    while True:
        if shuffle:
            df = df.sample(frac=1).reset_index(drop=True)
        images, labels = [], []
        for idx, row in df.iterrows():
            file, label = row['file'], row['label']
            im = Image.open(file).resize((W, H))
            im = np.array(im) / 255.0
            images.append(im)
            label_array = np.array([to_categorical(char_to_num[char], N_LABELS) for char in label])
            labels.append(label_array)
            if len(images) >= batch_size:
                yield np.array(images), np.array(labels)
                images, labels = [], []

In [10]:
# Model definition
input_layer = tf.keras.Input(shape=(H, W, C))
x = layers.Conv2D(32, 3, activation='relu')(input_layer)
x = layers.MaxPooling2D((2, 2))(x)
x = layers.Conv2D(64, 3, activation='relu')(x)
x = layers.MaxPooling2D((2, 2))(x)
x = layers.Conv2D(128, 3, activation='relu')(x)
x = layers.MaxPooling2D((2, 2))(x)
x = layers.Conv2D(256, 3, activation='relu')(x)
x = layers.MaxPooling2D((2, 2))(x)
x = layers.Flatten()(x)
x = layers.Dense(1024, activation='relu')(x)
x = layers.Dropout(0.5)(x)
x = layers.Dense(D * N_LABELS, activation='softmax')(x)
x = layers.Reshape((D, N_LABELS))(x)


In [11]:
model = models.Model(inputs=input_layer, outputs=x)
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

In [12]:
# Train the model
batch_size = 64
train_gen = get_data_generator(train_df, batch_size=batch_size)
valid_gen = get_data_generator(valid_df, batch_size=batch_size)

In [13]:
history = model.fit(train_gen,
                    steps_per_epoch=len(train_df)//batch_size,
                    epochs=20,
                    validation_data=valid_gen,
                    validation_steps=len(valid_df)//batch_size)

Epoch 1/20
[1m 98/308[0m [32m━━━━━━[0m[37m━━━━━━━━━━━━━━[0m [1m1:57[0m 561ms/step - accuracy: 0.0304 - loss: 3.7049

KeyboardInterrupt: 

In [None]:
# Plot training history
def plot_train_history(history):
    fig, axes = plt.subplots(1, 2, figsize=(20, 5))
    axes[0].plot(history.history['accuracy'], label='Train accuracy')
    axes[0].plot(history.history['val_accuracy'], label='Val accuracy')
    axes[0].set_xlabel('Epochs')
    axes[0].legend()
    axes[1].plot(history.history['loss'], label='Training loss')
    axes[1].plot(history.history['val_loss'], label='Validation loss')
    axes[1].set_xlabel('Epochs')
    axes[1].legend()

In [None]:
plot_train_history(history)
plt.show()

In [None]:
# Evaluate the model
test_gen = get_data_generator(test_df, batch_size=128, shuffle=False)
evaluation = model.evaluate(test_gen, steps=len(test_df)//128)
print(dict(zip(model.metrics_names, evaluation)))

In [None]:
# Decode the predictions
def format_y(y):
    return ''.join(map(lambda x: num_to_char[int(x)], y))

In [None]:
# Display predictions
test_gen = get_data_generator(test_df, batch_size=128, shuffle=False)
x_test, y_test = next(test_gen)
y_pred = model.predict_on_batch(x_test)

In [None]:
y_true = tf.math.argmax(y_test, axis=-1)
y_pred = tf.math.argmax(y_pred, axis=-1)
n = 100
random_indices = np.random.permutation(n)
n_cols = 5
n_rows = np.ceil(n / n_cols).astype(int)
fig, axes = plt.subplots(n_rows, n_cols, figsize=(25, 40))
true_count = 0
false_count = 0
for i, img_idx in enumerate(random_indices):
    ax = axes.flat[i]
    ax.imshow(x_test[img_idx])
    ax.set_title(f'pred: {format_y(y_pred[img_idx])}')
    ax.set_xlabel(f'true: {format_y(y_true[img_idx])}')
    ax.set_xticks([])
    ax.set_yticks([])
    if np.array_equal(y_pred[img_idx], y_true[img_idx]):
        true_count += 1
    else:
        false_count += 1
plt.show()

In [None]:
print(f"True predicted = {true_count}")
print(f"False predicted = {false_count}")