In [None]:
import cv2
import numpy as np
import os
import xml.etree.ElementTree as ET
import matplotlib.pyplot as plt

In [None]:
def load_images_from_folder(folder_path):
    images = []
    image_ids = []

    # Iterate through all files in the given folder
    for filename in os.listdir(folder_path):
        if filename.endswith(".jpg") or filename.endswith(".png"):
            # Load the image using OpenCV
            image_path = os.path.join(folder_path, filename)
            image = cv2.imread(image_path)

            # Extract the image ID from the filename
            image_id = os.path.splitext(filename)[0]  # Remove file extension to get the ID

            # Append the image and its ID to the respective lists
            images.append(image)
            image_ids.append(image_id)

    return images, image_ids


In [None]:
# Usage
folder_path = "Data/It2/Fine_Tuned_Flow"
images, image_ids = load_images_from_folder(folder_path)

# Example of printing loaded images and their IDs
print(f"Loaded {len(images)} images")
for img_id in image_ids:
    print(f"Loaded Image ID: {img_id}")


In [None]:
def crop_to_table_boundaries_flexible(image, xml_path, margin_ratio=0.05):
    # Parse XML to find table boundaries
    tree = ET.parse(xml_path)
    root = tree.getroot()

    table_bounds = None
    for obj in root.findall('object'):
        name = obj.find('name').text
        if name == 'table':  # Assuming 'table' is the label for the whole table
            bndbox = obj.find('bndbox')
            xmin = int(float(bndbox.find('xmin').text))
            ymin = int(float(bndbox.find('ymin').text))
            xmax = int(float(bndbox.find('xmax').text))
            ymax = int(float(bndbox.find('ymax').text))
            table_bounds = (xmin, ymin, xmax, ymax)
            break

    if table_bounds is None:
        print("Table boundaries not found in XML.")
        return None

    # Add a margin to the detected table boundary to handle image variations
    img_height, img_width = image.shape[:2]
    margin_x = int(margin_ratio * img_width)
    margin_y = int(margin_ratio * img_height)

    xmin, ymin, xmax, ymax = table_bounds
    xmin = max(0, xmin - margin_x)
    ymin = max(0, ymin - margin_y)
    xmax = min(img_width, xmax + margin_x)
    ymax = min(img_height, ymax + margin_y)

    cropped_table = image[ymin:ymax, xmin:xmax]

    # Visualize cropped table
    plt.figure(figsize=(10, 10))
    plt.imshow(cropped_table, cmap='gray')
    plt.title('Cropped Table (Flexible)')
    plt.show()

    return cropped_table


In [None]:
def crop_leftmost_column(cropped_table):
    # Convert to grayscale if the image is not already
    if len(cropped_table.shape) == 3:
        cropped_table = cv2.cvtColor(cropped_table, cv2.COLOR_BGR2GRAY)

    # Apply edge detection to find vertical lines (column boundaries)
    edges = cv2.Canny(cropped_table, 50, 150, apertureSize=3)

    # Use Hough Line Transform to detect lines
    lines = cv2.HoughLinesP(edges, 1, np.pi / 180, threshold=100, minLineLength=100, maxLineGap=10)

    # Find the leftmost line that represents the actual left column (ignoring page holes and edges)
    leftmost_x = cropped_table.shape[1]  # Initialize to the rightmost possible value
    if lines is not None:
        for line in lines:
            for x1, y1, x2, y2 in line:
                if abs(x1 - x2) < 10:  # Vertical line
                    # Ignore lines that are too close to the left edge (likely page holes)
                    if x1 > 50:  # Assuming page holes are within the first 50 pixels
                        leftmost_x = min(leftmost_x, x1)

    # Define the bounding box for the leftmost column
    col_xmin = max(0, leftmost_x - 10)  # Add some padding to include the full column, but avoid negative values
    col_xmax = leftmost_x + int(0.12 * cropped_table.shape[1])  # Dynamically add padding based on image width
    col_ymin = 0
    col_ymax = cropped_table.shape[0]

    # Crop the leftmost column
    cropped_column = cropped_table[col_ymin:col_ymax, col_xmin:col_xmax]

    # Visualize the cropped leftmost column
    plt.figure(figsize=(10, 10))
    plt.imshow(cropped_column, cmap='gray')
    plt.title('Cropped Leftmost Column (Improved)')
    plt.show()

    return cropped_column


In [None]:
def extract_rows_from_column(cropped_column, xml_path):
    # Convert to grayscale if the image is not already
    if len(cropped_column.shape) == 3:
        cropped_column = cv2.cvtColor(cropped_column, cv2.COLOR_BGR2GRAY)

    # Parse the XML file to extract row boundaries
    tree = ET.parse(xml_path)
    root = tree.getroot()

    row_boundaries = []
    for obj in root.findall('object'):
        name = obj.find('name').text
        if name == 'row':
            bndbox = obj.find('bndbox')
            ymin = int(float(bndbox.find('ymin').text))
            ymax = int(float(bndbox.find('ymax').text))
            row_boundaries.append((ymin, ymax))

    # Sort row boundaries by their position
    row_boundaries = sorted(row_boundaries, key=lambda x: x[0])

    # Crop each row and visualize
    rows = []
    for i, (ymin, ymax) in enumerate(row_boundaries):
        cropped_row = cropped_column[ymin:ymax, :]
        rows.append(cropped_row)

        # Visualize each cropped row
        plt.figure(figsize=(10, 2))
        plt.imshow(cropped_row, cmap='gray')
        plt.title(f'Cropped Row {i + 1}')
        plt.show()

    return rows

In [None]:
def extract_digits_from_cells(rows, image_id, output_folder):
    # Create the output directory if it does not exist
    os.makedirs(output_folder, exist_ok=True)

    # Loop over each row
    for row_idx, row in enumerate(rows):
        # Apply a simple threshold to make the image binary
        _, binary = cv2.threshold(row, 127, 255, cv2.THRESH_BINARY_INV)

        # Find contours to detect digits in the row
        contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

        # List to hold digit images for the current row, sorted by x-coordinate
        digit_list = []

        for contour_idx, contour in enumerate(contours):
            # Extract the bounding box of each contour
            x, y, w, h = cv2.boundingRect(contour)

            # Filter out too small or too large contours that might be noise
            if h > 20 and w < 100:  # Tune these values as needed
                digit = binary[y:y+h, x:x+w]

                # Resize to 28x28 for consistency
                digit_resized = cv2.resize(digit, (28, 28))

                # Save the digit with a name that encodes its position
                digit_filename = f"{image_id}_column1_row{row_idx+1}_digit{contour_idx+1}.png"
                digit_path = os.path.join(output_folder, digit_filename)
                cv2.imwrite(digit_path, digit_resized)

                # Add the digit to the list for this row
                digit_list.append((x, digit_path))

        # Sort the digit list by the x-coordinate to maintain left-to-right order
        digit_list = sorted(digit_list, key=lambda d: d[0])

        # Print the number of digits extracted for verification
        print(f"Row {row_idx+1}: {len(digit_list)} digits extracted")


In [None]:
def process_images(images, image_ids, xml_paths, output_folder):
    all_extracted_digits = {}
    for img, img_id, xml_path in zip(images, image_ids, xml_paths):
        print(f"Processing Image ID: {img_id}")

        # Step 1: Crop image to table boundaries
        cropped_table = crop_to_table_boundaries_flexible(img, xml_path)
        if cropped_table is None:
            continue

        # Step 2: Crop the leftmost column
        cropped_column = crop_leftmost_column(cropped_table)
        if cropped_column is None:
            continue

        # Step 3: Extract rows from the leftmost column
        row_images = extract_rows_from_column(cropped_column, xml_path)
        if not row_images:
            continue
        
        # # Step 4: Extract digits from each cell and save them
        extracted_digits = extract_digits_from_cells(row_images, img_id, output_folder)
        all_extracted_digits.update(extracted_digits)

    return all_extracted_digits


In [None]:
# Example usage
xml_paths = ["../data/DSC00923.xml"] # Add all XML paths here
output_folder = "Data/it2/FinalDigits"
extracted_digits = process_images(images, image_ids, xml_paths, output_folder)
