In [1]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.mobilenet_v2 import MobileNetV2, preprocess_input
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam


In [3]:
import tarfile

# Define the path to the .tgz file and the target directory for extraction
tgz_path = './data/EnglishFnt.tgz'
target_path = './data'

# Uncompress the .tgz file
with tarfile.open(tgz_path, 'r:gz') as tgz_ref:
    tgz_ref.extractall(target_path)

print(f'Extraction done! Check the directory: {target_path}')


Extraction done! Check the directory: ./data


In [4]:
import os

fnt_dir = os.path.join(target_path, 'English', 'Fnt')
if os.path.exists(fnt_dir):
    print(f'Found Fnt directory at: {fnt_dir}')
    print('Contents:', os.listdir(fnt_dir))
else:
    print(f'Directory not found: {fnt_dir}')


train_dir = './data/English/Fnt_train'
val_dir = './data/English/Fnt_val'

os.makedirs(train_dir, exist_ok=True)
os.makedirs(val_dir, exist_ok=True)

Found Fnt directory at: ./data\English\Fnt
Contents: ['Sample001', 'Sample002', 'Sample003', 'Sample004', 'Sample005', 'Sample006', 'Sample007', 'Sample008', 'Sample009', 'Sample010', 'Sample011', 'Sample012', 'Sample013', 'Sample014', 'Sample015', 'Sample016', 'Sample017', 'Sample018', 'Sample019', 'Sample020', 'Sample021', 'Sample022', 'Sample023', 'Sample024', 'Sample025', 'Sample026', 'Sample027', 'Sample028', 'Sample029', 'Sample030', 'Sample031', 'Sample032', 'Sample033', 'Sample034', 'Sample035', 'Sample036', 'Sample037', 'Sample038', 'Sample039', 'Sample040', 'Sample041', 'Sample042', 'Sample043', 'Sample044', 'Sample045', 'Sample046', 'Sample047', 'Sample048', 'Sample049', 'Sample050', 'Sample051', 'Sample052', 'Sample053', 'Sample054', 'Sample055', 'Sample056', 'Sample057', 'Sample058', 'Sample059', 'Sample060', 'Sample061', 'Sample062']


In [5]:
from sklearn.model_selection import train_test_split
from sklearn.model_selection import train_test_split
import os
import shutil
import tarfile

val_split = 0.2  # Split ratio for validation

for folder_name in os.listdir(fnt_dir):
    character_folder_path = os.path.join(fnt_dir, folder_name)
    
    if os.path.isdir(character_folder_path):
        images = os.listdir(character_folder_path)
        
        if images:
            train_images, val_images = train_test_split(images, test_size=val_split)
            
            train_character_folder = os.path.join(train_dir, folder_name)
            val_character_folder = os.path.join(val_dir, folder_name)
            
            os.makedirs(train_character_folder, exist_ok=True)
            os.makedirs(val_character_folder, exist_ok=True)
            
            for image in train_images:
                src_path = os.path.join(character_folder_path, image)
                dst_path = os.path.join(train_character_folder, image)
                if not os.path.exists(dst_path):  # Check if the file already exists
                    shutil.move(src_path, dst_path)
                else:
                    print(f"File already exists: {dst_path}")
                
            for image in val_images:
                src_path = os.path.join(character_folder_path, image)
                dst_path = os.path.join(val_character_folder, image)
                if not os.path.exists(dst_path):  # Check if the file already exists
                    shutil.move(src_path, dst_path)
                else:
                    print(f"File already exists: {dst_path}")
        else:
            print(f"No images found in folder {folder_name}. Skipping...")


In [6]:
# Check a few folders to ensure they have been populated correctly
for folder_name in os.listdir(train_dir)[:5]:  # Check the first 5 folders
    train_character_folder = os.path.join(train_dir, folder_name)
    val_character_folder = os.path.join(val_dir, folder_name)
    
    print(f"Training images in {folder_name}: {len(os.listdir(train_character_folder))}")
    print(f"Validation images in {folder_name}: {len(os.listdir(val_character_folder))}")


Training images in Sample001: 812
Validation images in Sample001: 204
Training images in Sample002: 812
Validation images in Sample002: 204
Training images in Sample003: 812
Validation images in Sample003: 204
Training images in Sample004: 812
Validation images in Sample004: 204
Training images in Sample005: 812
Validation images in Sample005: 204


In [7]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Define your data generator for training data
train_datagen = ImageDataGenerator(
    rescale=1./255  # Normalize the image
)

# Define your data generator for validation data
val_datagen = ImageDataGenerator(
    rescale=1./255  # Normalize the image
)

# Load images from the training directory
train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(224, 224),  # Resize images to 224x224
    batch_size=32,
    class_mode='categorical'  # For multi-class classification
)

# Load images from the validation directory
validation_generator = val_datagen.flow_from_directory(
    val_dir,
    target_size=(224, 224),  # Resize images to 224x224
    batch_size=32,
    class_mode='categorical'  # For multi-class classification
)


Found 50344 images belonging to 62 classes.
Found 12648 images belonging to 62 classes.


In [8]:
print('Number of training samples:', train_generator.samples)
print('Number of validation samples:', validation_generator.samples)


Number of training samples: 50344
Number of validation samples: 12648


In [9]:
# Load MobileNetV2
base_model = MobileNetV2(weights='imagenet', include_top=False)

# Freeze the base_model
base_model.trainable = False

# Add custom layers
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(1024, activation='relu')(x)
x = Dropout(0.5)(x)

predictions = Dense(62, activation='softmax')(x)  # Use 62 for 62 classes

# Create the final model
model = Model(inputs=base_model.input, outputs=predictions)

steps_per_epoch = max(1, train_generator.samples // train_generator.batch_size)
validation_steps = max(1, validation_generator.samples // validation_generator.batch_size)

print('Number of training samples:', train_generator.samples)
print('Number of validation samples:', validation_generator.samples)
print('Steps per epoch:', steps_per_epoch)
print('Validation steps:', validation_steps)

# Compile the model
model.compile(optimizer=Adam(lr=0.0001), loss='categorical_crossentropy', metrics=['accuracy'])

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_224_no_top.h5
Number of training samples: 50344
Number of validation samples: 12648
Steps per epoch: 1573
Validation steps: 395


  super().__init__(name, **kwargs)


In [28]:
# Train the model
history = model.fit(
    train_generator,
    steps_per_epoch=steps_per_epoch,
    epochs=20,
    validation_data=validation_generator,
    validation_steps=validation_steps
)


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [29]:
model.save('./models/ocr')



INFO:tensorflow:Assets written to: ./models/ocr\assets


INFO:tensorflow:Assets written to: ./models/ocr\assets


In [12]:
import numpy as np

def non_max_suppression(boxes, probs=None, overlapThresh=0.5):
    # If there are no boxes, return an empty list
    if len(boxes) == 0:
        return []

    # If the bounding boxes are integers, convert them to floats -- this
    # is important since we'll be doing a bunch of divisions
    if boxes.dtype.kind == "i":
        boxes = boxes.astype("float")

    # Initialize the list of picked indexes
    pick = []

    # Grab the coordinates of the bounding boxes
    x1 = boxes[:, 0]
    y1 = boxes[:, 1]
    x2 = boxes[:, 2]
    y2 = boxes[:, 3]

    # Compute the area of the bounding boxes and sort the bounding
    # boxes by the bottom-right y-coordinate of the bounding box
    area = (x2 - x1 + 1) * (y2 - y1 + 1)
    idxs = np.argsort(y2)

    # Keep looping while some indexes still remain in the indexes list
    while len(idxs) > 0:
        # Grab the last index in the indexes list, add the index
        # value to the list of picked indexes, then initialize
        # the suppression list (i.e., indexes that will be deleted)
        # using the last index
        last = len(idxs) - 1
        i = idxs[last]
        pick.append(i)
        suppress = [last]

        # Loop over all indexes in the indexes list
        for pos in range(0, last):
            # Grab the current index
            j = idxs[pos]

            # Find the largest (x, y) coordinates for the start of
            # the bounding box and the smallest (x, y) coordinates
            # for the end of the bounding box
            xx1 = max(x1[i], x1[j])
            yy1 = max(y1[i], y1[j])
            xx2 = min(x2[i], x2[j])
            yy2 = min(y2[i], y2[j])

            # Compute the width and height of the bounding box
            w = max(0, xx2 - xx1 + 1)
            h = max(0, yy2 - yy1 + 1)

            # Compute the ratio of overlap between the computed
            # bounding box and the bounding box in the area list
            overlap = float(w * h) / area[j]

            # If there is sufficient overlap, suppress the
            # current bounding box
            if overlap > overlapThresh:
                suppress.append(pos)

        # Delete all indexes from the index list that are in the
        # suppression list
        idxs = np.delete(idxs, suppress)

    # Return only the bounding boxes that were picked
    return boxes[pick].astype("int")


In [34]:
import cv2
import numpy as np

def decode_predictions(scores, geometry, min_confidence):
    # Grab the number of rows and columns from the scores volume, then initialize
    # our set of bounding box rectangles and corresponding confidence scores
    (numRows, numCols) = scores.shape[2:4]
    rects = []
    confidences = []

    # Loop over the number of rows
    for y in range(0, numRows):
        # Extract the scores (probabilities), followed by the geometrical
        # data used to derive potential bounding box coordinates that
        # surround text
        scoresData = scores[0, 0, y]
        xData0 = geometry[0, 0, y]
        xData1 = geometry[0, 1, y]
        xData2 = geometry[0, 2, y]
        xData3 = geometry[0, 3, y]
        anglesData = geometry[0, 4, y]

        # Loop over the number of columns
        for x in range(0, numCols):
            # If our score does not have sufficient probability, ignore it
            if scoresData[x] < min_confidence:
                continue

            # Compute the offset factor as our resulting feature maps will
            # be 4x smaller than the input image
            (offsetX, offsetY) = (x * 4.0, y * 4.0)

            # Extract the rotation angle for the prediction and then
            # compute the sin and cosine
            angle = anglesData[x]
            cos = np.cos(angle)
            sin = np.sin(angle)

            # Use the geometry volume to derive the width and height of
            # the bounding box
            h = xData0[x] + xData2[x]
            w = xData1[x] + xData3[x]

            # Compute both the starting and ending (x, y)-coordinates for
            # the text prediction bounding box
            endX = int(offsetX + (cos * xData1[x]) + (sin * xData2[x]))
            endY = int(offsetY - (sin * xData1[x]) + (cos * xData2[x]))
            startX = int(endX - w)
            startY = int(endY - h)

            # Add the bounding box coordinates and probability score to
            # our respective lists
            rects.append((startX, startY, endX, endY))
            confidences.append(scoresData[x])

    # Return a tuple of the bounding boxes and associated confidences
    return (rects, confidences)

# Load the pre-trained EAST text detector
net = cv2.dnn.readNet('./models/frozen_east_text_detection.pb')

# Load the image
image = cv2.imread('test_image.jpeg')
orig = image.copy()
(origH, origW) = image.shape[:2]

# Set the new width and height and determine the ratio in change
(newW, newH) = (320, 320)
rW = origW / float(newW)
rH = origH / float(newH)

# Resize the image and grab the new image dimensions
image = cv2.resize(image, (newW, newH))
(H, W) = image.shape[:2]

# Define the two output layer names for the EAST detector model
layerNames = [
    "feature_fusion/Conv_7/Sigmoid",
    "feature_fusion/concat_3"
]

# Forward pass of the blob through the network
blob = cv2.dnn.blobFromImage(image, 1.0, (W, H),
                             (123.68, 116.78, 103.94), swapRB=True, crop=False)
net.setInput(blob)
(scores, geometry) = net.forward(layerNames)

# Decode the predictions
(rects, confidences) = decode_predictions(scores, geometry, min_confidence=0.5)

# Apply non-maxima suppression to suppress weak, overlapping bounding boxes
boxes = non_max_suppression(np.array(rects), probs=confidences)


In [14]:
import requests
from PIL import Image
import io


def preprocess_image(image, target_size):
    # Convert the image from BGR (OpenCV format) to RGB
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    # Convert the image from NumPy array to PIL Image
    image = Image.fromarray(image)

    # Check the mode and resize
    if image.mode != "RGB":
        image = image.convert("RGB")
    image = image.resize(target_size)

    # Convert the image to array and preprocess it
    image = preprocess_input(np.array(image))
    image = np.expand_dims(image, axis=0)

    return image




In [15]:
import string

# Digits
digits = list(string.digits)

# Uppercase characters
uppercase_chars = list(string.ascii_uppercase)

# Lowercase characters
lowercase_chars = list(string.ascii_lowercase)

# Combine all the characters to form the class labels, starting with digits
all_chars = digits + uppercase_chars + lowercase_chars

# Create the index_to_char dictionary
index_to_char = {i: all_chars[i] for i in range(len(all_chars))}

print(index_to_char)  # Display the mapping


{0: '0', 1: '1', 2: '2', 3: '3', 4: '4', 5: '5', 6: '6', 7: '7', 8: '8', 9: '9', 10: 'A', 11: 'B', 12: 'C', 13: 'D', 14: 'E', 15: 'F', 16: 'G', 17: 'H', 18: 'I', 19: 'J', 20: 'K', 21: 'L', 22: 'M', 23: 'N', 24: 'O', 25: 'P', 26: 'Q', 27: 'R', 28: 'S', 29: 'T', 30: 'U', 31: 'V', 32: 'W', 33: 'X', 34: 'Y', 35: 'Z', 36: 'a', 37: 'b', 38: 'c', 39: 'd', 40: 'e', 41: 'f', 42: 'g', 43: 'h', 44: 'i', 45: 'j', 46: 'k', 47: 'l', 48: 'm', 49: 'n', 50: 'o', 51: 'p', 52: 'q', 53: 'r', 54: 's', 55: 't', 56: 'u', 57: 'v', 58: 'w', 59: 'x', 60: 'y', 61: 'z'}


In [16]:
def segment_characters(roi, draw_rectangles=True):
    # Convert ROI to grayscale and apply threshold
    gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
    # Use cv2.THRESH_BINARY instead of cv2.THRESH_BINARY_INV
    thresh = cv2.threshold(gray, 150, 255, cv2.THRESH_BINARY)[1]

    # Find contours and sort them from left-to-right
    contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    bounding_boxes = [cv2.boundingRect(c) for c in contours]
    (contours, bounding_boxes) = zip(*sorted(zip(contours, bounding_boxes),
                                            key=lambda b: b[1][0],
                                            reverse=False))
    character_images = []
    char_coords = []  # List to store the coordinates of each character

    for contour in contours:
        # Get rectangle bounding contour
        x, y, w, h = cv2.boundingRect(contour)

        # Draw rectangles (optional)
        if draw_rectangles:
            cv2.rectangle(roi, (x, y), (x + w, y + h), (0, 255, 0), 2)

        # Store the coordinates of the character
        char_coords.append((x, y, w, h))

        # Getting ROI
        char_roi = roi[y:y + h, x:x + w]

        # Preprocess the ROI for character recognition model
        # Make sure the preprocess_image function is defined and compatible with your model
        char_roi_processed = preprocess_image(char_roi, target_size=(224, 224))
        character_images.append(char_roi_processed)

    return roi, character_images, char_coords


In [20]:

for (startX, startY, endX, endY) in boxes:
    # Scale the bounding box coordinates based on the respective ratios
    startX = int(startX * rW)
    startY = int(startY * rH)
    endX = int(endX * rW)
    endY = int(endY * rH)

    # Extract the region of interest
    roi = orig[startY:endY, startX:endX]

    # Segment characters in the ROI and get their coordinates
    _, character_images, char_coords = segment_characters(roi)

    # Draw bounding box for each character on the original image
    for (x, y, w, h) in char_coords:
        cv2.rectangle(orig, (startX + x, startY + y), (startX + x + w, startY + y + h), (0, 0, 255), 2)

# Show the output image with bounding boxes around each character
cv2.imshow("Image",orig)
cv2.waitKey(0)
cv2.destroyAllWindows()


In [35]:

detected_text = ""  # To store detected text from all boxes

for (startX, startY, endX, endY) in boxes:
    # Scale the bounding box coordinates based on the respective ratios
    startX = int(startX * rW)
    startY = int(startY * rH)
    endX = int(endX * rW)
    endY = int(endY * rH)

    # Extract the region of interest
    roi = orig[startY:endY, startX:endX]

    # Segment characters in the ROI and get their coordinates
    roi_with_characters, character_images, char_coords = segment_characters(roi)

     # Recognize each character and draw bounding box around it
    for char_img, (x, y, w, h) in zip(character_images, char_coords):
        prediction = model.predict(char_img)
        predicted_class = np.argmax(prediction, axis=1)
        predicted_char = index_to_char[predicted_class[0]]  # Translate class index to character
        print(predicted_class[0])

        # Append the predicted character to the detected_text string
        detected_text += predicted_char

        # Draw bounding box for each character on the original image
        cv2.rectangle(orig, (startX + x, startY + y), (startX + x + w, startY + y + h), (0, 0, 255), 2)
        cv2.putText(orig, predicted_char, (startX + x, startY + y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)

    # Reset detected_text for the next box
    detected_text = ""

# Show the output image with bounding boxes around each character
cv2.imshow("Image",orig)
cv2.waitKey(0)
cv2.destroyAllWindows()


48
40
33
48
39
