In [15]:
import os
import json
import cv2
import numpy as np
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from keras.models import Sequential
from keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.preprocessing.image import load_img, img_to_array
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from keras.applications.vgg16 import preprocess_input

In [2]:
# Load the VGG JSON annotation file
annotation = "annotations.json"
with open(annotation, "r") as f:
    annotation_data = json.load(f)

In [3]:
# Extract image names and corresponding bounding box coordinates from the annotation file
image_names = []
boxes = []
for _, annotation in annotation_data.items():
    image_name = annotation["filename"]
    regions = annotation['regions']
    
    for region in regions:
        x = region["shape_attributes"]["x"]
        y = region["shape_attributes"]["y"]
        width = region["shape_attributes"]["width"]
        height = region["shape_attributes"]["height"]
        image_names.append(image_name)
        boxes.append([x, y, x + width, y + height])

In [4]:
len(image_names)

3267

In [5]:
# Load the images and their corresponding bounding box annotations
X = []
Y = []

# Define target image size for resizing
target_size = (224, 224)

for i in range(len(image_names)):
    img_name = image_names[i]
    # Load image
    image = cv2.imread(img_name)
    # Check if the image was loaded successfully
    if image is None:
        print(f"Skipping file {img_name} as it could not be loaded.")
        continue
        
    # Loading the bounding boxes
    box = boxes[i]
    x, y, x2, y2 = box
    
    # Check if bounding box coordinates are valid
    if x >= x2 or y >= y2:
        print(f"Skipping file {img_name} due to invalid bounding box coordinates: ({x}, {y}, {x2}, {y2})")
        continue
        
    # Extract text region from the image
    text_region = image[y:y2, x:x2]
    
    # Check if extracted text region is empty
    if text_region.size == 0:
        print(f"Skipping file {img_name} due to empty text region")
        continue

    # Resize the text region to target size
    text_region = cv2.resize(text_region, target_size)
    
    # Normalize the text region
    text_region = preprocess_input(text_region) 
    
    X.append(text_region)
    Y.append(1) # Label as text region

X = np.array(X)
Y = np.array(Y)

Skipping file 1.jpg due to empty text region
Skipping file 13.jpg due to empty text region
Skipping file 110.jpg due to empty text region
Skipping file 115.jpg due to empty text region
Skipping file 240.jpg as it could not be loaded.
Skipping file 240.jpg as it could not be loaded.
Skipping file 240.jpg as it could not be loaded.
Skipping file 240.jpg as it could not be loaded.
Skipping file 240.jpg as it could not be loaded.
Skipping file 240.jpg as it could not be loaded.
Skipping file 240.jpg as it could not be loaded.
Skipping file 240.jpg as it could not be loaded.
Skipping file 240.jpg as it could not be loaded.
Skipping file 240.jpg as it could not be loaded.


In [6]:
X

array([[[[ 128.061   ,  111.221   ,   95.32    ],
         [ 131.061   ,  114.221   ,   98.32    ],
         [ 130.061   ,  113.221   ,   97.32    ],
         ...,
         [ 111.061   ,   89.221   ,   65.32    ],
         [ 111.061   ,   89.221   ,   65.32    ],
         [ 109.061   ,   87.221   ,   63.32    ]],

        [[ 127.061   ,  111.221   ,   94.32    ],
         [ 131.061   ,  114.221   ,   98.32    ],
         [ 130.061   ,  113.221   ,   97.32    ],
         ...,
         [ 111.061   ,   89.221   ,   65.32    ],
         [ 111.061   ,   89.221   ,   65.32    ],
         [ 109.061   ,   87.221   ,   63.32    ]],

        [[ 128.061   ,  111.221   ,   94.32    ],
         [ 131.061   ,  114.221   ,   98.32    ],
         [ 130.061   ,  113.221   ,   98.32    ],
         ...,
         [ 111.061   ,   89.221   ,   65.32    ],
         [ 111.061   ,   89.221   ,   65.32    ],
         [ 109.061   ,   87.221   ,   63.32    ]],

        ...,

        [[ 128.061   ,  111.221   ,   

In [7]:
Y

array([1, 1, 1, ..., 1, 1, 1])

In [8]:
# Perform data augmentation to create negative samples
data_augmentation = ImageDataGenerator(
    rotation_range=10,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.1,
    zoom_range=0.1,
    horizontal_flip=True,
    vertical_flip=True,
    fill_mode="nearest",
    preprocessing_function=preprocess_input # Normalize the images during data augmentation
)

In [10]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

In [13]:
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((2602, 224, 224, 3), (651, 224, 224, 3), (2602,), (651,))

In [16]:
# Define the neural network architecture
model = Sequential()

# Convolutional layers
model.add(Conv2D(32, (3, 3), activation="relu", input_shape=(224, 224, 3)))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(64, (3, 3), activation="relu"))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(128, (3, 3), activation="relu"))
model.add(MaxPooling2D((2, 2)))

# Flatten and fully connected layers
model.add(Flatten())
model.add(Dense(256, activation="relu"))
model.add(Dropout(0.5))  # Adding dropout for regularization
model.add(Dense(128, activation="relu"))
model.add(Dropout(0.5))  # Adding dropout for regularization
model.add(Dense(1, activation="sigmoid"))

# Compile the model
model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])

In [17]:
# Train the model with data augmentation
model.fit(data_augmentation.flow(X_train, y_train, batch_size=32),
          steps_per_epoch=len(X_train) // 32, epochs=10, validation_data=(X_test, y_test))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x10f0231dfd0>

In [19]:
# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test, verbose=0)
print("Test Loss:", loss)
print("Test Accuracy:", accuracy)

Test Loss: 0.0
Test Accuracy: 1.0


In [20]:
# Save the model
model.save("text_detection_model.h5")

In [35]:
from keras.models import load_model
import warnings

# Disable all warnings
warnings.filterwarnings("ignore")

# Load the trained text detection model
model_path = "text_detection_model.h5"
trained_model = load_model(model_path)

# Load an image from the test dataset
test_image_path = r"C:\Users\uzair\Desktop\Deep_Learning\Text_detection_model\test.jpg"
test_image = cv2.imread(test_image_path)

# Preprocess the test image
test_image_preprocessed = cv2.resize(test_image, (224, 224))
test_image_preprocessed = np.expand_dims(test_image_preprocessed, axis=0)
test_image_preprocessed = preprocess_input(test_image_preprocessed)

# Predict the text regions using the trained model
predictions = model.predict(test_image_preprocessed)[0]
text_regions = np.where(predictions > 0.5)[0]

if len(text_regions) > 0:
    print("Text regions detected:", len(text_regions))
    for idx in text_regions:
        box = boxes[idx]
        x, y, x2, y2 = box
        # Scale the bounding box coordinates to match the resized test image
        scale_x = 224 / test_image_preprocessed.shape[2]
        scale_y = 224 / test_image_preprocessed.shape[1]
        x = int(x * scale_x)
        y = int(y * scale_y)
        x2 = int(x2 * scale_x)
        y2 = int(y2 * scale_y)
        # Draw bounding box on the test image
        cv2.rectangle(test_image, (x, y), (x2, y2), (0, 255, 0), 2)
else:
    print("No text regions detected.")

# Display the original and annotated test image
cv2.imshow("Original Test Image", cv2.imread(test_image_path))
cv2.imshow("Annotated Test Image", test_image)
cv2.waitKey(0)
cv2.destroyAllWindows()

Text regions detected: 1
