In [1]:
import numpy as np
import os
import argparse
import cv2
import imutils
import pytesseract
from pytesseract import Output

from imutils.contours import sort_contours
from matplotlib import pyplot as plt
from collections import Counter

## Loading

In [112]:
def load_images_from_folder(folder):
    images = []
    dimensions = []
    for filename in os.listdir(folder):
        img_path = os.path.join(folder, filename)
        if os.path.isfile(img_path):
            img = cv2.imread(img_path)
            if img is not None:
                images.append(img)
                dimensions.append(img.shape[:2])  # Append the (height, width)
    return images, dimensions

In [113]:
images, dimensions = load_images_from_folder("Train")
test_images, test_dimensions = load_images_from_folder("Test")

## Resizing

In [114]:
def analyze_image_dimensions(dimensions):
    heights = [dim[0] for dim in dimensions]
    widths = [dim[1] for dim in dimensions]

    average_height = np.mean(heights)
    average_width = np.mean(widths)
    median_height = np.median(heights)
    median_width = np.median(widths)

    height_freq = Counter(heights)
    width_freq = Counter(widths)

    most_common_heights = height_freq.most_common(3)
    most_common_widths = width_freq.most_common(3)

    return {
        "average_height": average_height,
        "average_width": average_width,
        "median_height": median_height,
        "median_width": median_width,
        "most_common_heights": most_common_heights,
        "most_common_widths": most_common_widths
    }

stats = analyze_image_dimensions(dimensions)

print("Image Dimension Statistics:")
print(f"Average Height: {stats['average_height']:.2f}")
print(f"Average Width: {stats['average_width']:.2f}")
print(f"Median Height: {stats['median_height']}")
print(f"Median Width: {stats['median_width']}")
print("Top 3 Most Common Heights:", stats['most_common_heights'])
print("Top 3 Most Common Widths:", stats['most_common_widths'])

Image Dimension Statistics:
Average Height: 859.32
Average Width: 747.52
Median Height: 944.5
Median Width: 810.0
Top 3 Most Common Heights: [(961, 14), (962, 13), (965, 9)]
Top 3 Most Common Widths: [(813, 22), (819, 14), (816, 11)]


In [115]:
height = 961
width = 813

In [116]:
def resize_images(images, target_size=(961, 813)):
    resized_images = []
    for i in range(len(images)):
        image = images[i]
        original_size = image.shape[:2]
        if original_size[0] < target_size[0] or original_size[1] < target_size[1]:
            interpolation = cv2.INTER_CUBIC
        else:
            interpolation = cv2.INTER_LINEAR

        resized_image = cv2.resize(image, (target_size[1], target_size[0]), interpolation=interpolation)
        # cv2.imwrite(f"resized_rotated_train/{i}.jpg", resized_image)
        resized_images.append(resized_image)
    return resized_images

In [117]:
resized_train = resize_images(images)
resized_test = resize_images(test_images)

## Enhancing Image (Applying CLAHE)

In [3]:
def enhance_images(images_path):
    for filename in os.listdir(images_path):
        if filename.endswith(".jpg") or filename.endswith(".png"):
            # Read image
            image_path = os.path.join(images_path, filename)
            image = cv2.imread(image_path)
            image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
            thresh = cv2.threshold(image, 0, 255,
            cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)[1]
            clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(4,4))
            enhanced_image = clahe.apply(image)
            cv2.imwrite(image_path, enhanced_image)


In [4]:
images_folder = "train_enhanced/train_enhanced"

# Preprocess images in the folder
enhance_images(images_folder)

In [None]:
def enhance_image(images_path):
    for filename in os.listdir(images_path):
        if filename.endswith(".jpg") or filename.endswith(".png"):
            # Read image
            image_path = os.path.join(images_path, filename)
            image = cv2.imread(image_path)
            # Convert image to grayscale
            gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
            # Apply Otsu's thresholding
            _, thresh = cv2.threshold(gray_image, 0, 255,cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)
            # Apply CLAHE (Contrast Limited Adaptive Histogram Equalization)
            clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(4, 4))
            enhanced_image = clahe.apply(gray_image)
            # Overwrite the enhanced image
            cv2.imwrite(image_path, enhanced_image)

# Path to the folder containing images
images_folder = "train_enhanced/train_enhanced"

# Preprocess images in the folder
enhance_image(images_folder)

In [121]:
enhanced_train = enhance_image(resized_train)
# cv2.imwrite("A.jpg", enhanced_train[48])

## Rotating

In [122]:
def crop_image(images):
    cropped_images = []
    for i in range(len(images)):
        x = 813-240
        y = 30
        cropped_images.append(images[i][y:y+height, x:x+width])
    return cropped_images

In [123]:
cropped_train = crop_image(enhanced_train)

In [124]:
class ShapeDetector:
    def __init__(self):
        pass

    def detect(self, c):
        shape = "unidentified"
        peri = cv2.arcLength(c, True)
        approx = cv2.approxPolyDP(c, 0.04 * peri, True)

        if len(approx) == 3:
            shape = "triangle"
        elif len(approx) == 4:
            (x, y, w, h) = cv2.boundingRect(approx)
            ar = w / float(h)
            shape = "square" if 0.95 <= ar <= 1.05 else "rectangle"
        elif len(approx) == 5:
            shape = "pentagon"
        else:
            shape = "circle"
        return shape

sd = ShapeDetector()

In [139]:
def get_rectangles(image):
    thresh = cv2.threshold(image, 0, 255,
        cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
    specified_rectangles = []
    cnts = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    cnts = imutils.grab_contours(cnts)
    for c in cnts:
        M = cv2.moments(c)
        if M["m00"] != 0:  # To avoid division by zero
            cX = int((M["m10"] / M["m00"]))
            cY = int((M["m01"] / M["m00"]))
            shape = sd.detect(c)
            if shape == "rectangle":
                # Retrieve the bounding box to check dimensions
                (x, y, w, h) = cv2.boundingRect(c)
                # Check if the rectangle meets the dimension criteria
                if 40 <= w <= 275 and 225 <= h <= 375:
                    cropped_image = image[y:y+h, x:x+w]
                    specified_rectangles.append(cropped_image)
    return specified_rectangles

In [141]:
specified_rectangles = []
need_rotate = []
for i in range(len(cropped_train)):
    specified_rectangles.append(get_rectangles(cropped_train[i]))
    if len(specified_rectangles[i]) > 0:
        for j in range(len(specified_rectangles[i])):
            cv2.imwrite(f"cropped_rectangle/{i}_{j}.jpg", specified_rectangles[i][j])
    else:
        need_rotate.append(i)
print(need_rotate)

[14, 68, 84, 107, 206, 212, 221, 241, 253, 269, 296, 395, 421, 429, 467]
