In [3]:
import cv2
import os
import numpy as np
import matplotlib.pyplot as plt
from scipy.ndimage import gaussian_filter1d
from scipy.signal import find_peaks


In [None]:
import layoutparser as lp

# PubLayNet
model = lp.Detectron2LayoutModel('lp://PubLayNet/faster_rcnn_R_50_FPN_3x/config',extra_config=["MODEL.ROI_HEADS.SCORE_THRESH_TEST", 0.65],
                                 label_map={0: "Text", 1: "Title", 2: "List", 3:"Table", 4:"Figure"})


In [90]:
def remove_horizontal_edges(image):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    # Apply binary thresholding
    _, binary = cv2.threshold(gray, 128, 255, cv2.THRESH_BINARY_INV)

    # Define a horizontal kernel
    horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (30, 1))  
    # Detect horizontal lines using morphological operations
    horizontal_lines = cv2.morphologyEx(binary, cv2.MORPH_OPEN, horizontal_kernel, iterations=2)

    # Subtract horizontal lines from the binary image
    no_horizontal = cv2.subtract(binary, horizontal_lines)

    # Convert back to original format
    result = cv2.bitwise_not(no_horizontal)

    return result

def remove_vertical_edges(image):
    # Convert the image to grayscale
    if len(image.shape)>2:
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    gray = image

    # Apply binary thresholding
    _, binary = cv2.threshold(gray, 128, 255, cv2.THRESH_BINARY_INV)

    # Define a vertical kernel
    vertical_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, 30))  
    # Detect vertical lines using morphological operations
    vertical_lines = cv2.morphologyEx(binary, cv2.MORPH_OPEN, vertical_kernel, iterations=2)

    # Subtract vertical lines from the binary image
    no_vertical = cv2.subtract(binary, vertical_lines)

    # Convert back to original format
    result = cv2.bitwise_not(no_vertical)
    return result

In [91]:
def pre_process_image(img, save_in_file, morph_size=(8, 8)):

    if len(img.shape)>2:

    # get rid of the color
        pre = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    pre = img
    # Otsu threshold
    pre = cv2.threshold(pre, 250, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
    # dilate the text to make it solid spot
    cpy = pre.copy()
    struct = cv2.getStructuringElement(cv2.MORPH_RECT, morph_size)
    cpy = cv2.dilate(~cpy, struct, anchor=(-1, -1), iterations=1)
    pre = ~cpy

    if save_in_file is not None:
        cv2.imwrite(save_in_file, pre)
    return pre



In [124]:
def find_tables(image, layoutmodel, output_path, img_name ,verbose=False):
    layout = layoutmodel.detect(image)
    df = layout.to_dataframe()
    out_image=lp.draw_box(image, layout, box_width=3, color_map={}, show_element_type=True, id_text_color='white', id_text_background_color='black')
    #cv2.imwrite("output_image.jpg",image)
    if verbose:
        print('saving the image with table predictions')
        save_path = os.path.join(output_path,'layout_parser',img_name.split('.')[0]+'.jpg')
        out_image.save(save_path, format="JPEG") 

    x1,y1,x2,y2 = [],[],[],[] 
    table_found=False
    for idx, row in df.iterrows():
        if row['type']=='Table' :
            table_found=True
            x1.append(int(row['x_1']))
            x2.append(int(row['x_2']))
            y1.append(int(row['y_1']))
            y2.append(int(row['y_2']))

    if verbose:
        print(f'bounding boxes of the table are:{[x1,y1,x2,y2]}')

    if table_found:
        for idx,(xmin,ymin,xmax,ymax) in enumerate(zip(x1,y1,x2,y2)):
            image = np.array(image)
            table_img = image[ymin:ymax,xmin:xmax,:]
            sub_table_name = img_name[:-4]+f"_{idx}.jpg"
            if verbose:
                save_path = os.path.join(output_path,'cropped_table',sub_table_name)
                cv2.imwrite(save_path, table_img)

            horizontal_edge = remove_horizontal_edges(table_img)
            if verbose:
                save_path = os.path.join(output_path,'horizontal_edge',sub_table_name)
                cv2.imwrite(save_path, horizontal_edge)
            vertical_edge = remove_vertical_edges(table_img)
            if verbose:
                save_path = os.path.join(output_path,'vertical_edge',sub_table_name)
                cv2.imwrite(save_path, vertical_edge)
           
            no_edge = remove_vertical_edges(horizontal_edge)
            if verbose:
                save_path = os.path.join(output_path,'no_edge',sub_table_name)
                cv2.imwrite(save_path, no_edge)

            preprocess_path = os.path.join(output_path,'preprocessed',sub_table_name)
            pre_processed = pre_process_image(no_edge, preprocess_path)
       
            pixel_distribution = np.sum(pre_processed, axis=0)

            normalized_distribution = (pixel_distribution - np.mean(pixel_distribution)) / np.std(pixel_distribution)
   
            smoothed_distribution = gaussian_filter1d(normalized_distribution, sigma=5) 

            if verbose:
                pixel_distribution_path = os.path.join(output_path,'smooth_gaussian',sub_table_name)
                plt.figure(figsize=(10, 6))
                plt.plot(smoothed_distribution, color="black")
                plt.title("Pixel Distribution Along X-Axis")
                plt.xlabel("X-Axis (Width of Image)")
                plt.ylabel("Pixel Intensity Sum")
                plt.grid()
                plt.savefig(pixel_distribution_path, dpi=300)
                plt.close()
            
            # Find peaks above the threshold
            threshold = 0.46 * np.max(smoothed_distribution)
            peaks, properties = find_peaks(smoothed_distribution, height=threshold)
            
            for boundary in peaks:
                cv2.line(table_img, (boundary, 0), (boundary, image.shape[0]), (0, 0, 255), 2)


            #boundary box
            height, width, _ = table_img.shape
            top_left = (0, 0)  # Top-left corner
            bottom_right = (width - 1, height - 1)  # Bottom-right corner

            # Draw a red rectangle around the boundary of the image
            red_color = (0, 0, 255)  # Red in BGR format
            thickness = 3  # Thickness of the rectangle lines

            cv2.rectangle(table_img, top_left, bottom_right, red_color, thickness)

               
            image[ymin:ymax,xmin:xmax,:] = table_img 
            cv2.imwrite(os.path.join(output_path,'final_output',sub_table_name), image)
        

In [114]:
base_path = '/final_inference'
cropped_table = os.path.join(base_path,'cropped_table')
final_output = os.path.join(base_path,'final_output')
horizontal_edge = os.path.join(base_path,'horizontal_edge')
layout_parser = os.path.join(base_path,'layout_parser')
no_edge = os.path.join(base_path,'no_edge')
preprocessed = os.path.join(base_path,'preprocessed')
smooth_gaussian = os.path.join(base_path,'smooth_gaussian')
vertical_edge = os.path.join(base_path,'vertical_edge')
os.makedirs(cropped_table, exist_ok = True)
os.makedirs(final_output, exist_ok = True)
os.makedirs(horizontal_edge, exist_ok = True)
os.makedirs(layout_parser, exist_ok = True)
os.makedirs(no_edge, exist_ok = True)
os.makedirs(preprocessed, exist_ok = True)
os.makedirs(smooth_gaussian, exist_ok = True)
os.makedirs(vertical_edge, exist_ok = True)


In [125]:
path = '/marmot_dataset_v1.0/data/English/Positive/Raw/'
lst = [file for file in os.listdir(path) if file.endswith('.bmp')]
import time
elapsed_times = []
for items in lst[:500]:
    if items.endswith("'.bmp'",".jpg", ".png"):  
        img_path = path+items
        output_path = '/final_inference'
        img_name = img_path.split('/')[-1]
        threshold=0.8
        image = cv2.imread(img_path)
        image = image[..., ::-1]
        start_time = time.time()
        find_tables(image,model,output_path,img_name,verbose=True)
        end_time = time.time()
        elapsed_time = end_time - start_time
        elapsed_times.append(elapsed_time)

average_time = sum(elapsed_times) / len(elapsed_times) if elapsed_times else 0
print(f"Average time: {average_time} seconds")

saving the image with table predictions
bounding boxes of the table are:[[83], [261], [759], [446]]
saving the image with table predictions
bounding boxes of the table are:[[144, 199], [100, 728], [717, 674], [305, 958]]
saving the image with table predictions
bounding boxes of the table are:[[], [], [], []]
saving the image with table predictions
bounding boxes of the table are:[[415, 65, 70], [388, 73, 694], [750, 366, 393], [745, 425, 814]]
saving the image with table predictions
bounding boxes of the table are:[[161, 167], [354, 838], [692, 696], [474, 897]]
saving the image with table predictions
bounding boxes of the table are:[[138], [550], [675], [879]]
saving the image with table predictions
bounding boxes of the table are:[[], [], [], []]
saving the image with table predictions
bounding boxes of the table are:[[188], [492], [613], [702]]
saving the image with table predictions
bounding boxes of the table are:[[], [], [], []]
saving the image with table predictions
bounding bo

In [None]:

#for single image inference!
img_path = "~/image/10.1.1.1.2018_8.jpg"
output_path = '~/test_outputs'
img_name = img_path.split('/')[-1]
image = cv2.imread(img_path)
image = image[..., ::-1]
find_tables(image,model,output_path,img_name,verbose=True)