In [1]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Reshape, Dense, Dropout, LSTM, Bidirectional, BatchNormalization, Input, TimeDistributed
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.backend import ctc_batch_cost
from sklearn.model_selection import train_test_split
import cv2
import pytesseract
pytesseract.pytesseract.tesseract_cmd ="C://Program Files//Tesseract-OCR//tesseract.exe"

In [2]:
# Paths
DATASET_DIR = "C:/Users/deeks/Desktop/Engineering/GGH/Project/dataset/training/prescriptions"
LABELS_FILE = "C:/Users/deeks/Desktop/Engineering/GGH/Project/dataset/training/prescriptions/_annotations.csv"

In [3]:
# Load dataset
labels_df = pd.read_csv(LABELS_FILE)
labels_df.dropna(inplace=True)  # Ensure no missing values

In [4]:
# Function to extract text using OCR
def extract_text(image_path):
    img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    img = cv2.resize(img, (128, 128))  # Resize for consistency
    text = pytesseract.image_to_string(img, config='--psm 6')  # OCR extraction
    return text.strip()

In [5]:
# Prepare images and extract text labels
X, y = [], []
for _, row in labels_df.iterrows():
    image_path = os.path.join(DATASET_DIR, row["filename"])
    if os.path.exists(image_path):
        img = load_img(image_path, color_mode='grayscale', target_size=(128, 128))
        img = img_to_array(img) / 255.0  # Normalize
        X.append(img)
        extracted_text = extract_text(image_path)
        y.append(extracted_text)

X = np.array(X)
print("Text Extracted")

Text Extracted


In [6]:
# Convert text labels to numerical values using Tokenization
from tensorflow.keras.preprocessing.text import Tokenizer

tokenizer = Tokenizer(char_level=True, filters="")
tokenizer.fit_on_texts(y)
y_seq = tokenizer.texts_to_sequences(y)
max_length = max([len(seq) for seq in y_seq])
y_padded = tf.keras.preprocessing.sequence.pad_sequences(y_seq, maxlen=max_length, padding='post')

In [7]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y_padded, test_size=0.2, random_state=42)

In [8]:
# Build CNN+LSTM OCR Model
inputs = Input(shape=(128, 128, 1))

In [34]:
# CNN Layers
x = Conv2D(32, (3, 3), activation='relu', padding='same')(inputs)
x = MaxPooling2D((2, 2), padding='same')(x)
x = BatchNormalization()(x)

x = Conv2D(64, (3, 3), activation='relu', padding='same')(x)
x = MaxPooling2D((2, 2), padding='same')(x)  
x = BatchNormalization()(x)

x = Conv2D(128, (3, 3), activation='relu', padding='same')(x)
x = MaxPooling2D((2, 2), padding='same')(x)  
x = BatchNormalization()(x)

In [35]:
# Reshape for LSTM
shape_before_reshape = tf.keras.backend.int_shape(x)
print("CNN output shape before reshape:", shape_before_reshape)

# Ensure shape has enough dimensions
if len(shape_before_reshape) == 4:  # Expected (batch, H, W, C)
    H, W, C = shape_before_reshape[1], shape_before_reshape[2], shape_before_reshape[3]
    timesteps = H * W  # Flatten spatial dimensions
    feature_dim = C
    x = Reshape((timesteps, feature_dim))(x)  # ✅ Reshape correctly

else:
    raise ValueError(f"Unexpected CNN output shape: {shape_before_reshape}")

# LSTM Layers
x = Bidirectional(LSTM(128, return_sequences=True))(x)
x = Bidirectional(LSTM(64, return_sequences=True))(x)

CNN output shape before reshape: (None, 16, 16, 128)


In [36]:
# Fully Connected Output Layer
x = TimeDistributed(Dense(len(tokenizer.word_index) + 1, activation='softmax'))(x)

model = Model(inputs, x)  # Define the model

In [37]:
# Define CTC loss
def ctc_loss(y_true, y_pred):
    batch_len = tf.cast(tf.shape(y_true)[0], dtype='int64')
    input_len = tf.fill([batch_len], tf.shape(y_pred)[1])
    label_len = tf.reduce_sum(tf.cast(tf.not_equal(y_true, 0), dtype='int64'), axis=1)  # Actual label lengths
    return ctc_batch_cost(y_true, y_pred, input_len, label_len)

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.001), loss=ctc_loss, metrics=['accuracy'])

In [38]:
# Train model
history = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=20, batch_size=16)


Epoch 1/20


IndexError: tuple index out of range