In [2]:
import cv2
import numpy as np

class ImageProcessor:
    def __init__(self, image):
        self.image = image

    def execute(self):
        self.convert_image_to_grayscale()
        self.threshold_image()
        self.invert_image()
        self.dilate_image()
        
        self.find_contours()
        self.filter_contours_and_leave_only_rectangles()
        self.find_largest_contour_by_area()
        if self.contour_with_max_area is not None:
            self.order_points_in_the_contour_with_max_area()
            self.calculate_new_width_and_height_of_image()
            self.apply_perspective_transform()
            self.add_10_percent_padding()
        else:
            print("No contour with max area found.")

        self.order_points_in_the_contour_with_max_area()
        self.calculate_new_width_and_height_of_image()
        self.apply_perspective_transform()
        self.add_10_percent_padding()

    def convert_image_to_grayscale(self):
        self.grayscale_image = cv2.cvtColor(self.image, cv2.COLOR_BGR2GRAY)

    def threshold_image(self):
        self.thresholded_image = cv2.threshold(self.grayscale_image, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]

    def invert_image(self):
        self.inverted_image = cv2.bitwise_not(self.thresholded_image)

    def dilate_image(self):
        self.dilated_image = cv2.dilate(self.inverted_image, None, iterations=100)

    def find_contours(self):
        self.contours, self.hierarchy = cv2.findContours(self.dilated_image, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
        self.image_with_all_contours = self.image.copy()
        cv2.drawContours(self.image_with_all_contours, self.contours, -1, (0, 255, 0), 3)

    def filter_contours_and_leave_only_rectangles(self):
        self.rectangular_contours = []
        for contour in self.contours:
            peri = cv2.arcLength(contour, True)
            approx = cv2.approxPolyDP(contour, 0.02 * peri, True)
            if len(approx) == 4:
                self.rectangular_contours.append(approx)
        self.image_with_only_rectangular_contours = self.image.copy()
        cv2.drawContours(self.image_with_only_rectangular_contours, self.rectangular_contours, -1, (0, 255, 0), 3)

    def find_largest_contour_by_area(self):
        max_area = 0
        self.contour_with_max_area = None
        for contour in self.rectangular_contours:
            area = cv2.contourArea(contour)
            if area > max_area:
                max_area = area
                self.contour_with_max_area = contour

        if self.contour_with_max_area is not None:
            self.image_with_contour_with_max_area = self.image.copy()
            cv2.drawContours(self.image_with_contour_with_max_area, [self.contour_with_max_area], -1, (0, 255, 0), 3)
        else:
            print("No contour with max area found.")


    def order_points_in_the_contour_with_max_area(self):
        if self.contour_with_max_area is not None:
            self.contour_with_max_area_ordered = self.order_points(self.contour_with_max_area)
            self.image_with_points_plotted = self.image.copy()
            for point in self.contour_with_max_area_ordered:
                # Draw the ordered points on the image
                cv2.circle(self.image_with_points_plotted, (int(point[0]), int(point[1])), 5, (0, 0, 255), -1)
        else:
            print("No contour with max area found.")


    def calculate_new_width_and_height_of_image(self):
        existing_image_width = self.image.shape[1]
        existing_image_width_reduced_by_10_percent = int(existing_image_width * 0.9)

        distance_between_top_left_and_top_right = self.calculateDistanceBetween2Points(self.contour_with_max_area_ordered[0], self.contour_with_max_area_ordered[1])
        distance_between_top_left_and_bottom_left = self.calculateDistanceBetween2Points(self.contour_with_max_area_ordered[0], self.contour_with_max_area_ordered[3])

        aspect_ratio = distance_between_top_left_and_bottom_left / distance_between_top_left_and_top_right

        self.new_image_width = existing_image_width_reduced_by_10_percent
        self.new_image_height = int(self.new_image_width * aspect_ratio)

    def apply_perspective_transform(self):
        pts1 = np.float32(self.contour_with_max_area_ordered)
        pts2 = np.float32([[0, 0], [self.new_image_width, 0], [self.new_image_width, self.new_image_height], [0, self.new_image_height]])
        matrix = cv2.getPerspectiveTransform(pts1, pts2)
        self.perspective_corrected_image = cv2.warpPerspective(self.image, matrix, (self.new_image_width, self.new_image_height))

    def add_10_percent_padding(self):
        image_height = self.image.shape[0]
        padding = int(image_height * 0.1)
        self.perspective_corrected_image_with_padding = cv2.copyMakeBorder(self.perspective_corrected_image, padding, padding, padding, padding, cv2.BORDER_CONSTANT, value=[255, 255, 255])

    def calculateDistanceBetween2Points(self, p1, p2):
        dis = ((p2[0] - p1[0]) ** 2 + (p2[1] - p1[1]) ** 2) ** 0.5
        return dis

    def order_points(self, pts):
        pts = pts.reshape(4, 2)
        rect = np.zeros((4, 2), dtype="float32")

        s = pts.sum(axis=1)
        rect[0] = pts[np.argmin(s)]
        rect[2] = pts[np.argmax(s)]

        diff = np.diff(pts, axis=1)
        rect[1] = pts[np.argmin(diff)]
        rect[3] = pts[np.argmax(diff)]

        return rect


In [3]:
import cv2
import numpy as np
import os
import string
import random

image_dir = 'test_path'

def random_string(length=10):
    letters = string.ascii_letters + string.digits
    return ''.join(random.choice(letters) for i in range(length))

for filename in os.listdir(image_dir):
    # Check if the file is an image
    if filename.endswith('.jpg') or filename.endswith('.png') or filename.endswith('.jpeg'):
        # Construct the full path to the image file
        image_path = os.path.join(image_dir, filename)
        
        # Load the image
        img2 = cv2.imread(image_path)

        # Process the image using ImageProcessor
        image_processor2 = ImageProcessor(img2)
        image_processor2.execute()
        table_image = image_processor2.perspective_corrected_image
        
        image2 = table_image
        
        # Create a directory to store the cropped table cells
        if not os.path.exists('table_cells_class02'):
            os.makedirs('table_cells_class02')

        # Set the dimensions of the table cells
        cell_width = image2.shape[1] // 8
        cell_height = image2.shape[0] // 8

        # Loop through the rows and columns of the table
        cell_count = 1
        for i in range(8):
            for j in range(0, 8, 2):
                # Create a directory for the current cell
                cell_dir = os.path.join('table_cells_class02', f'{cell_count}')
                if not os.path.exists(cell_dir):
                    os.makedirs(cell_dir)

                # Crop the cell image
                x = j * cell_width
                y = i * cell_height
                cell_image = image2[y:y+cell_height, x:x+cell_width]

                # Check if the cell image is not blank
                if np.any(cell_image != 255):
                    # Save the cropped cell image
                    random_filename = f"{random_string()}.jpg"
                    cv2.imwrite(os.path.join(cell_dir, random_filename), cell_image)
                else:
                    print(f"Skipping blank cell image for cell {cell_count}")
                cell_count += 1


Skipping blank cell image for cell 1
Skipping blank cell image for cell 2
Skipping blank cell image for cell 3
Skipping blank cell image for cell 4
Skipping blank cell image for cell 5
Skipping blank cell image for cell 6
Skipping blank cell image for cell 7
Skipping blank cell image for cell 8
Skipping blank cell image for cell 9
Skipping blank cell image for cell 10
Skipping blank cell image for cell 11
Skipping blank cell image for cell 12
Skipping blank cell image for cell 13
Skipping blank cell image for cell 14
Skipping blank cell image for cell 15
Skipping blank cell image for cell 16
Skipping blank cell image for cell 17
Skipping blank cell image for cell 18
Skipping blank cell image for cell 20
Skipping blank cell image for cell 21
Skipping blank cell image for cell 22
Skipping blank cell image for cell 23
Skipping blank cell image for cell 24
Skipping blank cell image for cell 25
Skipping blank cell image for cell 26
Skipping blank cell image for cell 27
Skipping blank cell i

In [4]:
import numpy as np
def removeLines(old_image: np.ndarray, axis) -> np.ndarray:
    gray = cv2.cvtColor(old_image, cv2.COLOR_BGR2GRAY)
    thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
    if axis == "horizontal":
        kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, 25))
    elif axis == "vertical":
        kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (25, 1))
    else:
        raise ValueError("Axis must be either 'horizontal' or 'vertical' in order to work")
    detected_lines = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations = 2)
    contours = cv2.findContours(detected_lines, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    contours = contours[0] if len(contours) == 2 else contours[1]
    result = old_image.copy()
    for contour in contours:
        cv2.drawContours(result, [contour], -1, (255, 255, 255), 2)
    return result

In [6]:
import cv2
import numpy as np
import os
import string
import random


def lines(input_dir, output_dir):
    for i in range(31):
        input_path = os.path.join(input_dir, str(i))
        output_path = os.path.join(output_dir, f"{i}_result")
        os.makedirs(output_path, exist_ok=True)
        for root, dirs, files in os.walk(input_path):
            for file in files:
                if file.endswith(".jpg") or file.endswith(".png"):
                    input_image_path = os.path.join(root, file)
                    output_image_path = os.path.join(output_path, file)
                    image = cv2.imread(input_image_path)
                    img1 = removeLines(image, "horizontal")
                    img2= removeLines(img1, "vertical")
                    cv2.imwrite(output_image_path, img2)
                    print(f"Processed: {input_image_path}")

# Usage example
input_directory = "table_cells_class02"
output_directory = "table_cells"
lines(input_directory, output_directory)

Processed: table_cells_class02\0\03VssSYFwZ.jpg
Processed: table_cells_class02\0\09UB4rmTyL.jpg
Processed: table_cells_class02\0\0CsS0x0dHa.jpg
Processed: table_cells_class02\0\0gut1lfqRM.jpg
Processed: table_cells_class02\0\0HAGcHes7W.jpg
Processed: table_cells_class02\0\0KUwV37z80.jpg
Processed: table_cells_class02\0\0pAWmVTZ5Q.jpg
Processed: table_cells_class02\0\0QylXPq1p1.jpg
Processed: table_cells_class02\0\0w3aIjuYi7.jpg
Processed: table_cells_class02\0\12bgBZd7ak.jpg
Processed: table_cells_class02\0\12tQzx52oH.jpg
Processed: table_cells_class02\0\13qX72kcu6.jpg
Processed: table_cells_class02\0\15t9lxWFKp.jpg
Processed: table_cells_class02\0\1hzQzxcuhU.jpg
Processed: table_cells_class02\0\1qi0OPXQV7.jpg
Processed: table_cells_class02\0\1rlYHSojj0.jpg
Processed: table_cells_class02\0\1Smf9q13OU.jpg
Processed: table_cells_class02\0\2EVqM8fCZP.jpg
Processed: table_cells_class02\0\2KdFFleeRN.jpg
Processed: table_cells_class02\0\2LfUgflOCm.jpg
Processed: table_cells_class02\0\2qWKhmw