In [18]:
import cv2
import numpy as np
import os
import xml.etree.ElementTree as ET

In [19]:
def load_images_from_folder(folder_path):
    images = []
    image_ids = []

    # Iterate through all files in the given folder
    for filename in os.listdir(folder_path):
        if filename.endswith(".jpg") or filename.endswith(".png"):
            # Load the image using OpenCV
            image_path = os.path.join(folder_path, filename)
            image = cv2.imread(image_path)

            # Extract the image ID from the filename
            image_id = os.path.splitext(filename)[0]  # Remove file extension to get the ID

            # Append the image and its ID to the respective lists
            images.append(image)
            image_ids.append(image_id)

    return images, image_ids


In [20]:
# Usage
folder_path = "Data/It2/Fine_Tuned_Flow"
images, image_ids = load_images_from_folder(folder_path)

# Example of printing loaded images and their IDs
print(f"Loaded {len(images)} images")
for img_id in image_ids:
    print(f"Loaded Image ID: {img_id}")


Loaded 12 images
Loaded Image ID: Fine_Tuned_Flow_Image_Image_1
Loaded Image ID: Fine_Tuned_Flow_Image_Image_10
Loaded Image ID: Fine_Tuned_Flow_Image_Image_11
Loaded Image ID: Fine_Tuned_Flow_Image_Image_12
Loaded Image ID: Fine_Tuned_Flow_Image_Image_2
Loaded Image ID: Fine_Tuned_Flow_Image_Image_3
Loaded Image ID: Fine_Tuned_Flow_Image_Image_4
Loaded Image ID: Fine_Tuned_Flow_Image_Image_5
Loaded Image ID: Fine_Tuned_Flow_Image_Image_6
Loaded Image ID: Fine_Tuned_Flow_Image_Image_7
Loaded Image ID: Fine_Tuned_Flow_Image_Image_8
Loaded Image ID: Fine_Tuned_Flow_Image_Image_9


In [32]:
def extract_digits_from_left_column_and_save(images, image_ids, xml_path, output_folder):
    # Parse the XML file to extract bounding box coordinates
    tree = ET.parse(xml_path)
    root = tree.getroot()

    # Extract coordinates and determine the leftmost column
    left_column_coords = None
    min_x = float('inf')
    for obj in root.findall('object'):
        name = obj.find('name').text
        if name == 'colum':  # Assuming 'colum' is the label for columns
            bndbox = obj.find('bndbox')
            xmin = int(float(bndbox.find('xmin').text))
            ymin = int(float(bndbox.find('ymin').text))
            xmax = int(float(bndbox.find('xmax').text))
            ymax = int(float(bndbox.find('ymax').text))

            # Find the leftmost column (smallest xmin)
            if xmin < min_x:
                min_x = xmin
                left_column_coords = (xmin, ymin, xmax, ymax)

    # Check if left column coordinates were found
    if not left_column_coords:
        print("No column labeled 'colum' found in XML.")
        return {}

    # Dictionary to hold extracted digits
    extracted_digits = {}

    # Extract the leftmost column coordinates
    xmin, ymin, xmax, ymax = left_column_coords

    # Iterate over images and their IDs
    for img, img_id in zip(images, image_ids):
        height, width = img.shape[:2]

        # Ensure bounding box coordinates are within the image boundaries
        if xmin >= width or xmax > width or ymin >= height or ymax > height or xmin < 0 or ymin < 0:
            print(f"Invalid bounding box for image {img_id}: ({xmin}, {ymin}, {xmax}, {ymax}) - Skipping")
            continue

        # Crop the image to the bounding box for the leftmost column
        column_image = img[ymin:ymax, xmin:xmax]

        # Check if the cropped region is empty
        if column_image.size == 0:
            print(f"Empty crop for image {img_id} - Skipping")
            continue

        # Ensure the image is in grayscale (single-channel)
        if len(column_image.shape) == 3:  # Check if the image has 3 channels (BGR color)
            column_image = cv2.cvtColor(column_image, cv2.COLOR_BGR2GRAY)

        # Apply a simple threshold to make the image binary
        _, binary = cv2.threshold(column_image, 127, 255, cv2.THRESH_BINARY_INV)

        # Find contours to detect digits
        contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

        digit_list = []
        for contour_idx, contour in enumerate(contours):
            # Extract the bounding box of each contour
            x, y, w, h = cv2.boundingRect(contour)
            # Filter out too small or too large contours that might be noise
            if h > 20 and w < 100:  # Tune these values as needed
                digit = binary[y:y+h, x:x+w]
                # Resize to 28x28 for consistency
                digit_resized = cv2.resize(digit, (28, 28))
                digit_list.append(digit_resized)

                # Save each digit as an individual image for visual verification
                digit_folder = os.path.join(output_folder, f"{img_id}_left_column")
                os.makedirs(digit_folder, exist_ok=True)
                digit_path = os.path.join(digit_folder, f"digit_{contour_idx}.png")
                cv2.imwrite(digit_path, digit_resized)

        # Sort the digits by x-coordinate (left to right)
        digit_list = sorted(digit_list, key=lambda d: cv2.boundingRect(d)[0] if d is not None else 0)

        # Store the extracted digits for this image and the leftmost column
        extracted_digits[f"{img_id}_left_column"] = digit_list

    return extracted_digits

In [34]:
xml_path = "../data/DSC00923.xml"
extracted_digits = extract_digits_from_left_column_and_save(images, image_ids, xml_path,"Data/it2/Dictionary")

# Print the extracted digits for verification
for key, digit_list in extracted_digits.items():
    print(f"{key}: {len(digit_list)} digits extracted")


Invalid bounding box for image Fine_Tuned_Flow_Image_Image_11: (1272, 392, 1595, 3478) - Skipping
Invalid bounding box for image Fine_Tuned_Flow_Image_Image_12: (1272, 392, 1595, 3478) - Skipping
Fine_Tuned_Flow_Image_Image_1_left_column: 104 digits extracted
Fine_Tuned_Flow_Image_Image_10_left_column: 117 digits extracted
Fine_Tuned_Flow_Image_Image_2_left_column: 117 digits extracted
Fine_Tuned_Flow_Image_Image_3_left_column: 77 digits extracted
Fine_Tuned_Flow_Image_Image_4_left_column: 85 digits extracted
Fine_Tuned_Flow_Image_Image_5_left_column: 53 digits extracted
Fine_Tuned_Flow_Image_Image_6_left_column: 70 digits extracted
Fine_Tuned_Flow_Image_Image_7_left_column: 66 digits extracted
Fine_Tuned_Flow_Image_Image_8_left_column: 153 digits extracted
Fine_Tuned_Flow_Image_Image_9_left_column: 57 digits extracted


In [43]:
def extract_digits_by_cell(images, image_ids, xml_path, output_folder):
    # Parse the XML file to extract bounding box coordinates for the left column
    tree = ET.parse(xml_path)
    root = tree.getroot()

    # Extract the bounding box for the left column
    column_coordinates = None
    for obj in root.findall('object'):
        name = obj.find('name').text
        if name == 'colum':  # Assuming 'colum' is the label for the left column
            bndbox = obj.find('bndbox')
            xmin = int(float(bndbox.find('xmin').text))
            ymin = int(float(bndbox.find('ymin').text))
            xmax = int(float(bndbox.find('xmax').text))
            ymax = int(float(bndbox.find('ymax').text))
            column_coordinates = (xmin, ymin, xmax, ymax)
            break

    if not column_coordinates:
        print("Column coordinates not found in XML.")
        return

    # Create output directory if it doesn't exist
    os.makedirs(output_folder, exist_ok=True)

    # Dictionary to hold extracted digits
    extracted_digits = {}

    # Process each image
    for img, img_id in zip(images, image_ids):
        # Crop the image to the bounding box for the column
        xmin, ymin, xmax, ymax = column_coordinates

        # Ensure the coordinates are within the image dimensions
        img_height, img_width = img.shape[:2]
        if xmin >= img_width or ymin >= img_height:
            print(f"Warning: Bounding box coordinates exceed image dimensions for image ID {img_id}. Skipping.")
            continue

        # Adjust the bounding box if it goes out of image boundaries
        xmin = max(0, xmin)
        ymin = max(0, ymin)
        xmax = min(img_width, xmax)
        ymax = min(img_height, ymax)

        # Crop the column from the image
        column_image = img[ymin:ymax, xmin:xmax]

        # Check if the cropped column is valid (not empty)
        if column_image is None or column_image.size == 0:
            print(f"Warning: Cropped column is empty for image ID {img_id}. Skipping.")
            continue

        # Calculate height of each row and width of each cell by assuming rows are evenly distributed
        num_rows = 20  # Assume there are 20 rows; adjust as needed
        num_cells = 5  # Assume there are 5 columns for cells within the left column; adjust as needed
        row_height = (ymax - ymin) // num_rows
        cell_width = (xmax - xmin) // num_cells

        for row_idx in range(num_rows):
            for cell_idx in range(num_cells):
                # Define the coordinates for each cell
                cell_xmin = cell_idx * cell_width
                cell_xmax = cell_xmin + cell_width
                cell_ymin = row_idx * row_height
                cell_ymax = cell_ymin + row_height

                # Crop the cell from the column image
                cell_image = column_image[cell_ymin:cell_ymax, cell_xmin:cell_xmax]

                # Check if the cropped cell is valid (not empty)
                if cell_image is None or cell_image.size == 0:
                    print(f"Warning: Cropped cell is empty for image ID {img_id}, row {row_idx}, cell {cell_idx}. Skipping.")
                    continue

                # Ensure the cropped cell image is in grayscale
                if len(cell_image.shape) == 3:  # Check if the image has 3 channels
                    cell_image = cv2.cvtColor(cell_image, cv2.COLOR_BGR2GRAY)

                # Apply a simple threshold to make the image binary
                _, binary = cv2.threshold(cell_image, 127, 255, cv2.THRESH_BINARY_INV)

                # Find contours to detect digits in the cell
                contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

                digit_list = []
                for contour in contours:
                    # Extract the bounding box of each contour
                    x, y, w, h = cv2.boundingRect(contour)
                    # Filter out too small or too large contours that might be noise
                    if h > 20 and w < 100:  # Tune these values as needed
                        digit = binary[y:y + h, x:x + w]
                        # Resize to 28x28 for consistency
                        digit_resized = cv2.resize(digit, (28, 28))

                        # Save the digit image
                        digit_filename = f"digit_{img_id}_row_{row_idx}_cell_{cell_idx}_x_{x}_y_{y}.png"
                        digit_output_path = os.path.join(output_folder, digit_filename)
                        cv2.imwrite(digit_output_path, digit_resized)

                        digit_list.append(digit_resized)

                # Store the extracted digits for this image and cell
                key = f"{img_id}_row_{row_idx}_cell_{cell_idx}"
                extracted_digits[key] = digit_list

    return extracted_digits

In [44]:
xml_path = "../data/DSC00923.xml"
extracted_digits = extract_digits_by_cell(images, image_ids, xml_path,"Data/it2/Dictionary2")

# Print the extracted digits for verification
for key, digit_list in extracted_digits.items():
    print(f"{key}: {len(digit_list)} digits extracted")

Fine_Tuned_Flow_Image_Image_1_row_0_cell_0: 4 digits extracted
Fine_Tuned_Flow_Image_Image_1_row_0_cell_1: 2 digits extracted
Fine_Tuned_Flow_Image_Image_1_row_0_cell_2: 2 digits extracted
Fine_Tuned_Flow_Image_Image_1_row_0_cell_3: 0 digits extracted
Fine_Tuned_Flow_Image_Image_1_row_0_cell_4: 4 digits extracted
Fine_Tuned_Flow_Image_Image_1_row_1_cell_0: 7 digits extracted
Fine_Tuned_Flow_Image_Image_1_row_1_cell_1: 9 digits extracted
Fine_Tuned_Flow_Image_Image_1_row_1_cell_2: 8 digits extracted
Fine_Tuned_Flow_Image_Image_1_row_1_cell_3: 1 digits extracted
Fine_Tuned_Flow_Image_Image_1_row_1_cell_4: 1 digits extracted
Fine_Tuned_Flow_Image_Image_1_row_2_cell_0: 1 digits extracted
Fine_Tuned_Flow_Image_Image_1_row_2_cell_1: 2 digits extracted
Fine_Tuned_Flow_Image_Image_1_row_2_cell_2: 3 digits extracted
Fine_Tuned_Flow_Image_Image_1_row_2_cell_3: 8 digits extracted
Fine_Tuned_Flow_Image_Image_1_row_2_cell_4: 3 digits extracted
Fine_Tuned_Flow_Image_Image_1_row_3_cell_0: 7 digits ex

In [45]:
import os
import cv2
import numpy as np
from sklearn.cluster import KMeans
import shutil

def cluster_and_save_digits(extracted_digit_folder, categorized_output_folder, n_clusters=10):
    # Create a list to hold features and filepaths of all images
    features = []
    filepaths = []

    # Iterate over each extracted digit image
    for filename in os.listdir(extracted_digit_folder):
        if filename.endswith('.png'):
            image_path = os.path.join(extracted_digit_folder, filename)
            img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
            if img is not None:
                # Flatten the image to create a feature vector
                img_flattened = img.flatten()
                features.append(img_flattened)
                filepaths.append(image_path)

    # Convert features to a numpy array
    features = np.array(features)

    # Apply k-means clustering to categorize images
    kmeans = KMeans(n_clusters=n_clusters, random_state=42)
    labels = kmeans.fit_predict(features)

    # Create output directories for each cluster
    for cluster_idx in range(n_clusters):
        cluster_folder = os.path.join(categorized_output_folder, f"Cluster_{cluster_idx}")
        os.makedirs(cluster_folder, exist_ok=True)

    # Move images to the corresponding cluster folder
    for filepath, label in zip(filepaths, labels):
        cluster_folder = os.path.join(categorized_output_folder, f"Cluster_{label}")
        filename = os.path.basename(filepath)
        shutil.copy(filepath, os.path.join(cluster_folder, filename))

    print(f"Clustering complete. Images saved in '{categorized_output_folder}'.")

# Example usage
extracted_digit_folder = "Data/It2/Dictionary2"  # Folder containing cropped digit images
categorized_output_folder = "Data/It2/ClusteredDigits"  # Folder to save clustered images
cluster_and_save_digits(extracted_digit_folder, categorized_output_folder, n_clusters=12)


Clustering complete. Images saved in 'Data/It2/ClusteredDigits'.
