In [1]:
import tensorflow as tf
from tensorflow import keras
import numpy as np
import os
from PIL import Image
import matplotlib.pyplot as plt


In [2]:
# Data loading and preprocessing
def load_data(dataset_dir, img_size=(28, 28)):
  rgb_images, binary_images, labels = [], [], []

  # Use relative paths within the function
  data_dir = os.path.join(dataset_dir, 'data')
  binary_dir = os.path.join(dataset_dir, 'data-bin')

  if not os.path.exists(data_dir) or not os.path.exists(binary_dir):
    raise FileNotFoundError(f"Data directories not found in {dataset_dir}")

  for class_idx in range(62):  # 62 classes (0-9, a-z, A-Z)
    rgb_class_dir = os.path.join(data_dir, str(class_idx))
    binary_class_dir = os.path.join(binary_dir, str(class_idx))

    if not os.path.exists(rgb_class_dir) or not os.path.exists(binary_class_dir):
      print(f"Warning: Directory for class {class_idx} not found")
      continue

    for img_file in os.listdir(rgb_class_dir):
      if not img_file.lower().endswith('.png'):
        continue

      # Load RGB and binary images
      rgb_img_path = os.path.join(rgb_class_dir, img_file)
      binary_img_path = os.path.join(binary_class_dir, img_file)

      try:
        rgb_img = Image.open(rgb_img_path).convert('RGB')
        binary_img = Image.open(binary_img_path).convert('L')

        rgb_img_array = np.array(rgb_img) / 255.0
        binary_img_array = np.array(binary_img) / 255.0

        rgb_images.append(rgb_img_array)
        binary_images.append(binary_img_array.reshape(*img_size, 1))
        labels.append(class_idx)

      except Exception as e:
        print(f"Error loading image {img_file}: {str(e)}")

  if not rgb_images:
    raise ValueError("No valid images were loaded. Please check the data directories and file permissions.")

  return np.array(rgb_images), np.array(binary_images), np.array(labels)

In [3]:
# Define the model
def build_ocr_model(input_shape_rgb=(28, 28, 3), input_shape_binary=(28, 28, 1)):
    # RGB input branch
    rgb_input = keras.layers.Input(shape=input_shape_rgb)
    x_rgb = keras.layers.Conv2D(32, (3, 3), activation='relu', padding='same')(rgb_input)
    x_rgb = keras.layers.MaxPooling2D((2, 2))(x_rgb)
    x_rgb = keras.layers.Conv2D(64, (3, 3), activation='relu', padding='same')(x_rgb)
    x_rgb = keras.layers.MaxPooling2D((2, 2))(x_rgb)
    x_rgb = keras.layers.Flatten()(x_rgb)

    # Binary input branch
    binary_input = keras.layers.Input(shape=input_shape_binary)
    x_binary = keras.layers.Conv2D(32, (3, 3), activation='relu', padding='same')(binary_input)
    x_binary = keras.layers.MaxPooling2D((2, 2))(x_binary)
    x_binary = keras.layers.Conv2D(64, (3, 3), activation='relu', padding='same')(x_binary)
    x_binary = keras.layers.MaxPooling2D((2, 2))(x_binary)
    x_binary = keras.layers.Flatten()(x_binary)

    # Concatenate both branches
    x = keras.layers.concatenate([x_rgb, x_binary])

    # Fully connected layers
    x = keras.layers.Dense(128, activation='relu')(x)
    x = keras.layers.Dropout(0.5)(x)
    x = keras.layers.Dense(64, activation='relu')(x)
    x = keras.layers.Dropout(0.5)(x)
    output = keras.layers.Dense(62, activation='softmax')(x)

    model = keras.Model(inputs=[rgb_input, binary_input], outputs=output)
    return model


In [4]:
def train_ocr_model(dataset_dir):
    # Load and preprocess data
    x_rgb, x_binary, y = load_data(dataset_dir)
    
    # Split data into train and validation sets
    from sklearn.model_selection import train_test_split
    x_rgb_train, x_rgb_val, x_binary_train, x_binary_val, y_train, y_val = train_test_split(
        x_rgb, x_binary, y, test_size=0.2, random_state=42)

    # Convert labels to one-hot encoding
    y_train = keras.utils.to_categorical(y_train, num_classes=62)
    y_val = keras.utils.to_categorical(y_val, num_classes=62)

    # Create and compile the model
    model = build_ocr_model()
    model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])
    
    # Define callbacks
    early_stopping = keras.callbacks.EarlyStopping(patience=10, restore_best_weights=True)
    reduce_lr = keras.callbacks.ReduceLROnPlateau(factor=0.2, patience=5)

    # Train the model
    history = model.fit(
        [x_rgb_train, x_binary_train], y_train,
        validation_data=([x_rgb_val, x_binary_val], y_val),
        epochs=100,  # You can adjust this
        batch_size=32,
        callbacks=[early_stopping, reduce_lr]
    )
        # Plot training history
    plt.figure(figsize=(12, 4))
    plt.subplot(1, 2, 1)
    plt.plot(history.history['accuracy'], label='Training Accuracy')
    plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
    plt.title('Model Accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend()

    plt.subplot(1, 2, 2)
    plt.plot(history.history['loss'], label='Training Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.title('Model Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()

    plt.tight_layout()
    plt.savefig('ocr_training_history.png')
    plt.close()

    # Evaluate the model
    test_loss, test_accuracy = model.evaluate([x_rgb_val, x_binary_val], y_val)
    print(f"Test accuracy: {test_accuracy:.4f}")

    # Save the model
    model.save('ocr_model.h5')

    print("Training complete. Model saved as 'ocr_model.h5'.")

    return model

In [5]:
# Run the training
if __name__ == "__main__":
    dataset_dir = r'C:\Users\VIRAT\Projects\OCR\data\dataset-5'
    try:
        trained_model = train_ocr_model(dataset_dir)
        print("OCR model training completed successfully.")
    except Exception as e:
        print(f"An error occurred during training: {str(e)}")

Epoch 1/100
[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 13ms/step - accuracy: 0.0187 - loss: 4.1637 - val_accuracy: 0.0568 - val_loss: 4.0989 - learning_rate: 0.0010
Epoch 2/100
[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.0425 - loss: 4.0589 - val_accuracy: 0.0746 - val_loss: 3.9442 - learning_rate: 0.0010
Epoch 3/100
[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - accuracy: 0.0687 - loss: 3.9030 - val_accuracy: 0.1741 - val_loss: 3.5483 - learning_rate: 0.0010
Epoch 4/100
[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - accuracy: 0.1047 - loss: 3.5970 - val_accuracy: 0.2433 - val_loss: 3.1715 - learning_rate: 0.0010
Epoch 5/100
[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - accuracy: 0.1573 - loss: 3.3724 - val_accuracy: 0.3197 - val_loss: 2.8805 - learning_rate: 0.0010
Epoch 6/100
[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1



Test accuracy: 0.5897
Training complete. Model saved as 'ocr_model.h5'.
OCR model training completed successfully.
