In [1]:
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import numpy as np
import os
import pandas as pd
from sklearn.model_selection import train_test_split

In [2]:

# Set random seed for reproducibility
tf.random.set_seed(42)
np.random.seed(42)

# Define constants
IMG_HEIGHT = 32
IMG_WIDTH = 128
BATCH_SIZE = 32
EPOCHS = 20

# Define paths
data_dir = 'Datasets/IAM_Words'
words_file = os.path.join(data_dir, 'words.txt')
images_dir = os.path.join(data_dir, 'words')

# Read and process words.txt
def process_words_file(file_path):
    data = []
    with open(file_path, 'r') as file:
        for line in file:
            if not line.startswith('#'):
                parts = line.strip().split()
                if len(parts) >= 9:
                    image_id = parts[0]
                    word = parts[-1]
                    data.append((image_id, word))
    return pd.DataFrame(data, columns=['image_id', 'word'])


In [3]:
import cv2
from PIL import Image
import logging

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

df = process_words_file(words_file)

# Split the data
train_df, val_df = train_test_split(df, test_size=0.2, random_state=42)

# Create a dictionary mapping words to integer labels
word_to_index = {word: idx for idx, word in enumerate(df['word'].unique())}
num_classes = len(word_to_index)

# Custom data generator
class WordDataGenerator(tf.keras.utils.Sequence):
    def __init__(self, dataframe, word_to_index, batch_size, img_size, images_dir, is_training=True):
        self.dataframe = dataframe
        self.word_to_index = word_to_index
        self.batch_size = batch_size
        self.img_size = img_size
        self.images_dir = images_dir
        self.is_training = is_training
        self.on_epoch_end()

    def __len__(self):
        return int(np.ceil(len(self.dataframe) / float(self.batch_size)))

    def __getitem__(self, idx):
        # logger.info(f"Fetching batch {idx+1}/{len(self)}")
        start_idx = idx * self.batch_size
        end_idx = min((idx + 1) * self.batch_size, len(self.dataframe))
        batch_df = self.dataframe.iloc[start_idx:end_idx]
        
        batch_x = []
        batch_y = []
        skipped_images = 0

        for _, row in batch_df.iterrows():
            img_path = os.path.join(self.images_dir, row['image_id'].split('-')[0], 
                                    '-'.join(row['image_id'].split('-')[:2]), 
                                    f"{row['image_id']}.png")
            
            try:
                with Image.open(img_path) as img:
                    img = img.convert('L')  # Convert to grayscale
                    img = img.resize(self.img_size)
                    img_array = np.array(img)
                    img_array = img_array.reshape(self.img_size[0], self.img_size[1], 1)
                    img_array = img_array / 255.0  # Normalize
                    
                batch_x.append(img_array)
                label = np.zeros(len(self.word_to_index))
                label[self.word_to_index[row['word']]] = 1
                batch_y.append(label)
            except (IOError, OSError, Image.UnidentifiedImageError):
                logger.warning(f"Error loading image: {img_path}")
                skipped_images += 1
                continue  # Skip this image and continue with the next one

        # logger.info(f"Batch {idx+1}: Loaded {len(batch_x)} images, skipped {skipped_images}")

        if not batch_x:  # If all images in the batch were invalid
            logger.warning(f"All images in batch {idx+1} were invalid. Trying next batch.")
            return self.__getitem__((idx + 1) % len(self))  # Try the next batch

        return np.array(batch_x), np.array(batch_y)

    def on_epoch_end(self):
        if self.is_training:
            # logger.info("Shuffling training data for next epoch")
            self.dataframe = self.dataframe.sample(frac=1).reset_index(drop=True)

# Create data generators
train_generator = WordDataGenerator(train_df, word_to_index, BATCH_SIZE, (IMG_HEIGHT, IMG_WIDTH), images_dir)
val_generator = WordDataGenerator(val_df, word_to_index, BATCH_SIZE, (IMG_HEIGHT, IMG_WIDTH), images_dir, is_training=False)

# Define the model
model = models.Sequential([
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(IMG_HEIGHT, IMG_WIDTH, 1)),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.Flatten(),
    layers.Dense(64, activation='relu'),
    layers.Dense(num_classes, activation='softmax')
])

# Compile the model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [4]:
# Calculate steps_per_epoch and validation_steps
steps_per_epoch = len(train_generator)
validation_steps = len(val_generator)


print(f"Training data size: {len(train_df)}")
print(f"Validation data size: {len(val_df)}")
print(f"Steps per epoch: {steps_per_epoch}")
print(f"Validation steps: {validation_steps}")


# Train the model
history = model.fit(
    train_generator,
    steps_per_epoch=steps_per_epoch,
    epochs=EPOCHS,
    validation_data=val_generator,
    validation_steps=validation_steps
)


Training data size: 92256
Validation data size: 23064
Steps per epoch: 2883
Validation steps: 721
Epoch 1/20


  self._warn_if_super_not_called()


[1m1143/2883[0m [32m━━━━━━━[0m[37m━━━━━━━━━━━━━[0m [1m5:19[0m 183ms/step - accuracy: 0.1018 - loss: 7.0082



[1m2883/2883[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 173ms/step - accuracy: 0.1628 - loss: 6.4286



[1m2883/2883[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m569s[0m 197ms/step - accuracy: 0.1628 - loss: 6.4283 - val_accuracy: 0.3276 - val_loss: 5.0796
Epoch 2/20
[1m2883/2883[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41us/step - accuracy: 0.0000e+00 - loss: 0.0000e+00
Epoch 3/20


  self.gen.throw(value)


[1m2197/2883[0m [32m━━━━━━━━━━━━━━━[0m[37m━━━━━[0m [1m1:28[0m 129ms/step - accuracy: 0.3436 - loss: 4.6142



[1m2883/2883[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 120ms/step - accuracy: 0.3473 - loss: 4.5834



[1m2883/2883[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m398s[0m 138ms/step - accuracy: 0.3474 - loss: 4.5833 - val_accuracy: 0.3848 - val_loss: 4.6361
Epoch 4/20
[1m2883/2883[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25us/step - accuracy: 0.0000e+00 - loss: 0.0000e+00
Epoch 5/20
[1m 574/2883[0m [32m━━━[0m[37m━━━━━━━━━━━━━━━━━[0m [1m4:02[0m 105ms/step - accuracy: 0.4024 - loss: 3.9366



[1m2882/2883[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 97ms/step - accuracy: 0.4081 - loss: 3.8879



[1m2883/2883[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m341s[0m 118ms/step - accuracy: 0.4081 - loss: 3.8878 - val_accuracy: 0.4248 - val_loss: 4.4459
Epoch 6/20
[1m2883/2883[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33us/step - accuracy: 0.0000e+00 - loss: 0.0000e+00
Epoch 7/20
[1m1093/2883[0m [32m━━━━━━━[0m[37m━━━━━━━━━━━━━[0m [1m4:40[0m 157ms/step - accuracy: 0.4417 - loss: 3.4231



[1m2882/2883[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 131ms/step - accuracy: 0.4461 - loss: 3.3833



[1m2883/2883[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m433s[0m 149ms/step - accuracy: 0.4461 - loss: 3.3833 - val_accuracy: 0.4447 - val_loss: 4.4960
Epoch 8/20
[1m2883/2883[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43us/step - accuracy: 0.0000e+00 - loss: 0.0000e+00
Epoch 9/20
[1m1419/2883[0m [32m━━━━━━━━━[0m[37m━━━━━━━━━━━[0m [1m2:34[0m 105ms/step - accuracy: 0.4764 - loss: 3.0016



[1m2883/2883[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 98ms/step - accuracy: 0.4785 - loss: 2.9876



[1m2883/2883[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m324s[0m 112ms/step - accuracy: 0.4785 - loss: 2.9876 - val_accuracy: 0.4614 - val_loss: 4.6600
Epoch 10/20
[1m2883/2883[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21us/step - accuracy: 0.0000e+00 - loss: 0.0000e+00
Epoch 11/20
[1m1734/2883[0m [32m━━━━━━━━━━━━[0m[37m━━━━━━━━[0m [1m1:49[0m 95ms/step - accuracy: 0.5026 - loss: 2.6743



[1m2883/2883[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 100ms/step - accuracy: 0.5024 - loss: 2.6744



[1m2883/2883[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m345s[0m 119ms/step - accuracy: 0.5024 - loss: 2.6744 - val_accuracy: 0.4703 - val_loss: 4.7765
Epoch 12/20
[1m2883/2883[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19us/step - accuracy: 0.0000e+00 - loss: 0.0000e+00
Epoch 13/20
[1m 460/2883[0m [32m━━━[0m[37m━━━━━━━━━━━━━━━━━[0m [1m3:30[0m 87ms/step - accuracy: 0.5366 - loss: 2.3867



[1m2882/2883[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 92ms/step - accuracy: 0.5286 - loss: 2.4043



[1m2883/2883[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m305s[0m 106ms/step - accuracy: 0.5286 - loss: 2.4043 - val_accuracy: 0.4763 - val_loss: 5.1051
Epoch 14/20
[1m2883/2883[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20us/step - accuracy: 0.0000e+00 - loss: 0.0000e+00
Epoch 15/20
[1m2798/2883[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m8s[0m 100ms/step - accuracy: 0.5461 - loss: 2.1866



[1m2883/2883[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 100ms/step - accuracy: 0.5460 - loss: 2.1871



[1m2883/2883[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m338s[0m 117ms/step - accuracy: 0.5460 - loss: 2.1871 - val_accuracy: 0.4759 - val_loss: 5.2371
Epoch 16/20
[1m2883/2883[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32us/step - accuracy: 0.0000e+00 - loss: 0.0000e+00
Epoch 17/20
[1m1812/2883[0m [32m━━━━━━━━━━━━[0m[37m━━━━━━━━[0m [1m1:48[0m 101ms/step - accuracy: 0.5729 - loss: 1.9484



[1m2883/2883[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 101ms/step - accuracy: 0.5695 - loss: 1.9682



[1m2883/2883[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m344s[0m 119ms/step - accuracy: 0.5695 - loss: 1.9682 - val_accuracy: 0.4810 - val_loss: 5.3241
Epoch 18/20
[1m2883/2883[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39us/step - accuracy: 0.0000e+00 - loss: 0.0000e+00
Epoch 19/20
[1m1212/2883[0m [32m━━━━━━━━[0m[37m━━━━━━━━━━━━[0m [1m2:54[0m 104ms/step - accuracy: 0.6017 - loss: 1.7394



[1m2883/2883[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 105ms/step - accuracy: 0.5941 - loss: 1.7788



[1m2883/2883[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m352s[0m 122ms/step - accuracy: 0.5941 - loss: 1.7789 - val_accuracy: 0.4838 - val_loss: 5.5520
Epoch 20/20
[1m2883/2883[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22us/step - accuracy: 0.0000e+00 - loss: 0.0000e+00


In [6]:
# Save the model
model.save('word_classification_model.h5')
model.save('word_classification_model.keras')

# Convert to TensorFlow Lite format
converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()

# Save the TensorFlow Lite model
with open('word_classification_model.tflite', 'wb') as f:
    f.write(tflite_model)

print("Training completed and models saved.")



INFO:tensorflow:Assets written to: C:\Users\Bulaya\AppData\Local\Temp\tmpan3jxffc\assets


INFO:tensorflow:Assets written to: C:\Users\Bulaya\AppData\Local\Temp\tmpan3jxffc\assets


Saved artifact at 'C:\Users\Bulaya\AppData\Local\Temp\tmpan3jxffc'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 32, 128, 1), dtype=tf.float32, name='keras_tensor')
Output Type:
  TensorSpec(shape=(None, 13542), dtype=tf.float32, name=None)
Captures:
  1933142224400: TensorSpec(shape=(), dtype=tf.resource, name=None)
  1933142223824: TensorSpec(shape=(), dtype=tf.resource, name=None)
  1933142224976: TensorSpec(shape=(), dtype=tf.resource, name=None)
  1933142226128: TensorSpec(shape=(), dtype=tf.resource, name=None)
  1933142225936: TensorSpec(shape=(), dtype=tf.resource, name=None)
  1933142224208: TensorSpec(shape=(), dtype=tf.resource, name=None)
  1933142226320: TensorSpec(shape=(), dtype=tf.resource, name=None)
  1933142226896: TensorSpec(shape=(), dtype=tf.resource, name=None)
  1933142224784: TensorSpec(shape=(), dtype=tf.resource, name=None)
  1933142227280: TensorSpec(shape=(), dtype=tf.resource, name=None)
Tra