In [1]:
import os
import cv2
import time

In [2]:

# Function to process images in a directory
def process_images(input_dir, output_dir):
    # Create output directory if it doesn't exist
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    # Iterate through files in input directory
    for filename in os.listdir(input_dir):
        if filename.endswith(".jpg") or filename.endswith(".png"):  # Assuming images are in JPEG or PNG format
            # Read the image
            image_path = os.path.join(input_dir, filename)
            image = cv2.imread(image_path)
            
            # Convert the image to grayscale
            gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
            
            # Detect Canny edges
            edges = cv2.Canny(gray_image, 100, 200)
            
            # Find contours
            contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
            
            # Find the biggest contour
            biggest_contour = max(contours, key=cv2.contourArea)
            
            # Draw the biggest contour on a blank image
            blank_image = gray_image.copy()
            cv2.drawContours(blank_image, [biggest_contour], -1, (255, 255, 255), cv2.FILLED)
            
            # Find the bounding box of the biggest contour
            x, y, w, h = cv2.boundingRect(biggest_contour)
            
            # Crop the image on the detected contour area
            cropped_image = image[y:y+h, x:x+w]
            
            # Save the cropped image in the output directory
            output_path = os.path.join(output_dir, filename)
            cv2.imwrite(output_path, cropped_image)

# Replace 'input_directory' and 'output_directory' with your actual directory paths
input_directory = r'C:\Users\saika\OneDrive\Desktop\test1'
output_directory = r'C:\Users\saika\OneDrive\Desktop\test1'

# Process images in the input directory and save them in the output directory
process_images(input_directory, output_directory)


Row wise cropping for upper the box data gives best results for row_height=100 and dist=8

Row wise cropping for lower the box data gives best results for row_height=100 and dist=15

In [5]:

# Define constants
row_height = 290  # Adjust this value according to your image dimensions
distance_between_rows = 15  # Adjust this value according to your requirement
epochs = 10
input_directory =  r'C:\Users\saika\OneDrive\Desktop\test1'
output_directory =  r'C:\Users\saika\OneDrive\Desktop\test1\row'

# Create output directory if it doesn't exist
if not os.path.exists(output_directory):
    os.makedirs(output_directory)

# Function to crop image row-wise
def crop_row_wise(image_path):
    img = cv2.imread(image_path)
    height, width, _ = img.shape
    start = 0
    end = start + row_height
    row_folder = 0

    for _ in range(epochs):
        while end <= height:
            cropped_row = img[start:end, :]
            output_path = os.path.join(output_directory, f"row_{row_folder}_{os.path.basename(image_path)}")
            cv2.imwrite(output_path, cropped_row)
            start = end + distance_between_rows
            end = start + row_height
            row_folder += 1

# Iterate over all images in the input directory
for filename in os.listdir(input_directory):
    if filename.endswith(".jpg") or filename.endswith(".png"):  # Adjust extensions as needed
        image_path = os.path.join(input_directory, filename)
        crop_row_wise(image_path)


Finding the biggest contour of the row wise cropped data

In [6]:

# Function to process images in a directory
def process_images(input_dir, output_dir):
    # Create output directory if it doesn't exist
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    # Iterate through files in input directory
    for filename in os.listdir(input_dir):
        if filename.endswith(".jpg") or filename.endswith(".png"):  # Assuming images are in JPEG or PNG format
            # Read the image
            image_path = os.path.join(input_dir, filename)
            image = cv2.imread(image_path)
            
            # Convert the image to grayscale
            gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
            
            # Detect Canny edges
            edges = cv2.Canny(gray_image, 100, 200)
            
            # Find contours
            contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
            
            # Find the biggest contour
            biggest_contour = max(contours, key=cv2.contourArea)
            
            # Draw the biggest contour on a blank image
            blank_image = gray_image.copy()
            cv2.drawContours(blank_image, [biggest_contour], -1, (255, 255, 255), cv2.FILLED)
            
            # Find the bounding box of the biggest contour
            x, y, w, h = cv2.boundingRect(biggest_contour)
            
            # Crop the image on the detected contour area
            cropped_image = image[y:y+h, x:x+w]
            
            # Save the cropped image in the output directory
            output_path = os.path.join(output_dir, filename)
            cv2.imwrite(output_path, cropped_image)

# Replace 'input_directory' and 'output_directory' with your actual directory paths
input_directory = r'C:\Users\saika\OneDrive\Desktop\test1\row'
output_directory = r'C:\Users\saika\OneDrive\Desktop\test1\row\row_cont'

# Process images in the input directory and save them in the output directory
process_images(input_directory, output_directory)


Column wise cropping

In [11]:

# Define constants
column_width = 200  # Adjust this value according to your image dimensions
row_wise_cropped_directory = r'C:\Users\saika\OneDrive\Desktop\test1\row\row_cont'
output_directory = r'C:\Users\saika\OneDrive\Desktop\test1\col'

# Create output directory if it doesn't exist
if not os.path.exists(output_directory):
    os.makedirs(output_directory)

# Function to separate each character
def separate_characters(image_path):
    img = cv2.imread(image_path)
    height, width, _ = img.shape
    start = 0
    end = start + column_width
    column_folder = 0

    for _ in range(12):
        while end <= width:
            cropped_column = img[:, start:end]
            output_path = os.path.join(output_directory, f"column_{column_folder}_{os.path.basename(image_path)}")
            cv2.imwrite(output_path, cropped_column)
            start = end
            end = start + column_width
            column_folder += 1

# Iterate over all files in row-wise cropped directory
for file_name in os.listdir(row_wise_cropped_directory):
    file_path = os.path.join(row_wise_cropped_directory, file_name)
    if os.path.isfile(file_path):
        separate_characters(file_path)
