## Install Libraries

In [1]:
%pip install torch torchvision
%pip install pillow
%pip install scikit-learn
%pip install opencv-python


Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


# Loading and splitting data

In [2]:
import os
from sklearn.model_selection import train_test_split
from PIL import Image
import numpy as np
from concurrent.futures import ThreadPoolExecutor

# Define the path to the images folder
image_folder = 'extracted_images'

# Get a list of all image files in the folder
image_files = [os.path.join(image_folder, file) for file in os.listdir(image_folder) if file.endswith(('.png', '.jpg', '.jpeg'))]

# Limit the number of images to 50,000
start = 0
end = len(image_files)-1
step = 2
image_files = image_files[start:end:step]

# Split the data into training, testing, and validation sets
train_files, test_files = train_test_split(image_files, test_size=0.2, random_state=42)
train_files, val_files = train_test_split(train_files, test_size=0.25, random_state=42)  # 0.25 x 0.8 = 0.2

# Function to extract the label from the filename
def extract_label_from_filename(filename):
    # Remove the extension from the filename
    return os.path.splitext(filename)[0]

# Function to load a single image and its label
def load_image_and_label(file):
    # Load the image
    image = Image.open(file).convert('L')  # Convert to grayscale
    image = np.array(image)

    # Extract the label from the filename
    label = extract_label_from_filename(os.path.basename(file))
    return image, label

# Function to load images and extract labels using parallel processing
def load_images_and_labels(files):
    images = []
    labels = []
    with ThreadPoolExecutor(max_workers=8) as executor:
        results = list(executor.map(load_image_and_label, files))
    for image, label in results:
        images.append(image)
        labels.append(label)
    
    return np.array(images), np.array(labels)

# Load images and labels for training, validation, and testing sets
train_images, train_labels = load_images_and_labels(train_files)
val_images, val_labels = load_images_and_labels(val_files)
test_images, test_labels = load_images_and_labels(test_files)

# Print the number of files in each set
print(f"Total images: {len(image_files)}")
print(f"Training set: {len(train_files)}")
print(f"Validation set: {len(val_files)}")
print(f"Testing set: {len(test_files)}")

# Optionally, print the first few labels to verify
print("Sample labels from training set:", train_labels[:5])
print("Sample labels from validation set:", val_labels[:5])
print("Sample labels from testing set:", test_labels[:5])


Total images: 130340
Training set: 78204
Validation set: 26068
Testing set: 26068
Sample labels from training set: ['05_32_27_32_36_30_erosion' '05_32_07_22_02_22_erosion'
 '05_32_20_24_07_34_erosion' '05_32_18_18_28_28_erosion'
 '05_32_27_32_22_24_dilation']
Sample labels from validation set: ['05_32_26_32_20_07_erosion' '05_32_34_36_20_18_shear'
 '05_32_27_27_26_32_dilation' '05_32_31_27_36_34_erosion'
 '05_32_26_28_22_26_rotation']
Sample labels from testing set: ['05_32_20_26_32_02_none' '05_32_20_31_07_33_none'
 '05_32_02_27_30_28_shear' '05_32_34_27_28_27_shear'
 '05_32_27_36_32_18_shear']


# Preprocess Data

In [3]:
import tensorflow as tf
import numpy as np
import cv2

def load_image(image_path):
    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)  # Load image in grayscale
    image = cv2.resize(image, (128, 32))  # Resize image to (128, 32)
    image = image.astype(np.float32) / 255.0  # Normalize pixel values
    image = np.expand_dims(image, axis=-1)  # Add channel dimension
    return image

def preprocess_data(image_files, labels):
    images = [load_image(file) for file in image_files]
    labels = [label.split('_')[1:-1] for label in labels]  # Extract character indices
    labels = [[int(char) for char in label] for label in labels]  # Convert to integers
    labels = np.array(labels)
    return np.array(images), labels

# Preprocess the data
train_images, train_labels = preprocess_data(train_files, train_labels)
val_images, val_labels = preprocess_data(val_files, val_labels)
test_images, test_labels = preprocess_data(test_files, test_labels)

print(len(train_images))
print(train_labels.shape)
print(val_labels.shape)



78204
(78204, 5)
(26068, 5)


# Convert Labels to Categorical Labels

In [4]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping

# Define the Urdu alphabet and create an index dictionary
urdu_alphabet = ["ا", "ب", "پ", "ت", "ٹ", "ث", "ج", "چ", "ح", "خ", "د", "ڈ", "ذ", "ر", "ڑ", "ز", "ژ", "س", "ش", "ص", "ض", "ط", "ظ", "ع", "غ", "ف", "ق", "ک", "گ", "ل", "م", "ن", "و", "ہ", "ء", "ی", "ے"]
urdu_index_dict = {char: idx + 1 for idx, char in enumerate(urdu_alphabet)}

# Function to convert label indices to categorical format using the index dictionary
def labels_to_categorical(labels, index_dict, num_classes):
    categorical_labels = []
    for label in labels:
        categorical_label = []
        for index in label:
            if index < 1 or index > len(index_dict):
                raise ValueError(f"Label index '{index}' is out of range or not found in urdu_alphabet.")
            categorical_label.append(tf.keras.utils.to_categorical(index - 1, num_classes=num_classes))  # Adjust index to start from 0
        categorical_labels.append(categorical_label)
    return np.array(categorical_labels)

# Convert labels to categorical format
train_labels_categorical = labels_to_categorical(train_labels, urdu_index_dict, len(urdu_alphabet))
val_labels_categorical = labels_to_categorical(val_labels, urdu_index_dict, len(urdu_alphabet))

# Print the length of converted labels for verification
print(len(train_labels_categorical))
print(len(val_labels_categorical))

# Optionally, you may print the index dictionary for verification
print(urdu_index_dict)


78204
26068
{'ا': 1, 'ب': 2, 'پ': 3, 'ت': 4, 'ٹ': 5, 'ث': 6, 'ج': 7, 'چ': 8, 'ح': 9, 'خ': 10, 'د': 11, 'ڈ': 12, 'ذ': 13, 'ر': 14, 'ڑ': 15, 'ز': 16, 'ژ': 17, 'س': 18, 'ش': 19, 'ص': 20, 'ض': 21, 'ط': 22, 'ظ': 23, 'ع': 24, 'غ': 25, 'ف': 26, 'ق': 27, 'ک': 28, 'گ': 29, 'ل': 30, 'م': 31, 'ن': 32, 'و': 33, 'ہ': 34, 'ء': 35, 'ی': 36, 'ے': 37}


# Ensure Correct Data Shapes

In [6]:
import numpy as np

# Verify the shapes of the data
print("Train images shape:", train_images.shape)
print("Train labels shape:", train_labels_categorical.shape)

# Ensure the input data is in the correct shape
train_images = np.array(train_images)
train_labels_categorical = np.array(train_labels_categorical)

# Ensure that the number of samples match
assert train_images.shape[0] == train_labels_categorical.shape[0], "Number of samples in train_images and train_labels_categorical do not match!"

# Check the shapes of validation data
print("Validation images shape:", val_images.shape)
print("Validation labels shape:", val_labels_categorical.shape)

# Ensure the validation data is in the correct shape
val_images = np.array(val_images)
val_labels_categorical = np.array(val_labels_categorical)

# Ensure that the number of samples match
assert val_images.shape[0] == val_labels_categorical.shape[0], "Number of samples in val_images and val_labels_categorical do not match!"

# Print shapes to verify
print("Corrected train images shape:", train_images.shape)
print("Corrected train labels shape:", train_labels_categorical.shape)
print("Corrected validation images shape:", val_images.shape)
print("Corrected validation labels shape:", val_labels_categorical.shape)


Train images shape: (78204, 32, 128, 1)
Train labels shape: (78204, 5, 37)
Validation images shape: (26068, 32, 128, 1)
Validation labels shape: (26068, 5, 37)
Corrected train images shape: (78204, 32, 128, 1)
Corrected train labels shape: (78204, 5, 37)
Corrected validation images shape: (26068, 32, 128, 1)
Corrected validation labels shape: (26068, 5, 37)


# Model Architecture Design

In [7]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, TimeDistributed, Reshape, BatchNormalization, Activation, LSTM, Bidirectional
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping

# Define the number of classes
num_classes = 37  # 36 Urdu characters + 1 blank for CTC

# Define the model
model = Sequential()

# First Conv layer
model.add(Conv2D(32, (3, 3), padding='same', input_shape=(32, 128, 1)))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))  # Output shape: (16, 64, 32)

# Second Conv layer
model.add(Conv2D(64, (3, 3), padding='same'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))  # Output shape: (8, 32, 64)

# Third Conv layer
model.add(Conv2D(128, (3, 3), padding='same'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))  # Output shape: (4, 16, 128)

# Fourth Conv layer
model.add(Conv2D(256, (3, 3), padding='same'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))  # Output shape: (2, 8, 256)

# Fifth Conv layer
model.add(Conv2D(512, (3, 3), padding='same'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))  # Output shape: (1, 4, 512)

# Flatten and reshape for TimeDistributed layer
model.add(Flatten())  # Output shape: (1 * 4 * 512,) = (2048,)
model.add(Dense(1280))  # Output shape: (1280,)
model.add(Reshape((5, 256)))  # Reshaping to (5, 256) to match the label shape (5 time steps)

# Add a Bidirectional LSTM layer
model.add(Bidirectional(LSTM(256, return_sequences=True)))

# Fully connected layer
model.add(TimeDistributed(Dense(128, activation='relu')))
model.add(TimeDistributed(Dropout(0.5)))
model.add(TimeDistributed(Dense(num_classes, activation='softmax')))

# Compile the model
optimizer = Adam(learning_rate=0.001)
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

# Print model summary
model.summary()

# Define callbacks
checkpoint = ModelCheckpoint('best_model.keras', monitor='val_loss', save_best_only=True, verbose=1)
early_stopping = EarlyStopping(monitor='val_loss', patience=10, verbose=1)


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [8]:
# Train the model
history = model.fit(
    x=train_images,
    y=train_labels_categorical,
    validation_data=(val_images, val_labels_categorical),
    epochs=10,
    batch_size=32,
    callbacks=[checkpoint, early_stopping]
)


Epoch 1/10
[1m2444/2444[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 272ms/step - accuracy: 0.3509 - loss: 1.9278
Epoch 1: val_loss improved from inf to 1.86226, saving model to best_model.keras
[1m2444/2444[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m722s[0m 288ms/step - accuracy: 0.3509 - loss: 1.9277 - val_accuracy: 0.4068 - val_loss: 1.8623
Epoch 2/10
[1m2444/2444[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 400ms/step - accuracy: 0.4540 - loss: 1.4753
Epoch 2: val_loss did not improve from 1.86226
[1m2444/2444[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1014s[0m 415ms/step - accuracy: 0.4540 - loss: 1.4753 - val_accuracy: 0.3927 - val_loss: 1.9812
Epoch 3/10
[1m2444/2444[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 469ms/step - accuracy: 0.4541 - loss: 1.4678
Epoch 3: val_loss improved from 1.86226 to 1.47837, saving model to best_model.keras
[1m2444/2444[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1187s[0m 486ms/step - accuracy: 0.4541

KeyboardInterrupt: 

# Validation

In [9]:
# Evaluate the model on validation data
val_loss, val_accuracy = model.evaluate(val_images, val_labels_categorical)
print(f'Validation Loss: {val_loss}')
print(f'Validation Accuracy: {val_accuracy}')


[1m815/815[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 50ms/step - accuracy: 0.4572 - loss: 1.4606
Validation Loss: 1.461388349533081
Validation Accuracy: 0.4572504460811615


# Testing

In [10]:

# Convert test labels to categorical format
test_labels_categorical = labels_to_categorical(test_labels, urdu_index_dict, num_classes)

# Ensure the input data is in the correct shape
test_images = np.array(test_images)
print("Test labels:", test_labels_categorical.shape)

# Evaluate the model on test data
test_loss, test_accuracy = model.evaluate(test_images, test_labels_categorical)
print(f'Test Loss: {test_loss}')
print(f'Test Accuracy: {test_accuracy}')


Test labels: (26068, 5, 37)
[1m815/815[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 51ms/step - accuracy: 0.4584 - loss: 1.4629
Test Loss: 1.461517572402954
Test Accuracy: 0.4585469365119934
