In [1]:
import pickle
import os
import json
import cv2 as cv
import numpy as np
from PIL import Image

In [2]:
def thresholding(image):
    return cv.threshold(image, 0, 255, cv.THRESH_BINARY + cv.THRESH_OTSU)[1]

def image_preprocessing(img_path):
    img = cv.imread(img_path)
#     img = cv.resize(img, (1200,800), interpolation = cv.INTER_AREA)
    
    #Binarization
    img = cv.cvtColor(img, cv.COLOR_BGR2GRAY)

    # Do dilation and erosion to eliminate unwanted noises
    kernel = np.ones((1,1), np.uint8)
    img = cv.dilate(img, kernel, iterations=30)
    img = cv.erode(img, kernel, iterations=30)

    #thresholding
    img = thresholding(img)
    img = cv.cvtColor(img, cv.COLOR_BGR2RGB)
    h,w,c = img.shape
    
    return img,h,w

In [3]:
def plot_results(pil_img, prob, boxes):
    plt.figure(figsize=(16,10))
    plt.imshow(pil_img)
    ax = plt.gca()
    n_img = 0
    for p, (xmin, ymin, xmax, ymax), c in zip(prob, boxes.tolist(), COLORS * 100):
        ax.add_patch(plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin,
                                   fill=False, color=c, linewidth=3))
        cl = p.argmax()
        text = f'{CLASSES[cl]}: {p[cl]:0.2f} {n_img}'
        n_img+=1
        ax.text(xmin, ymin, text, fontsize=15,
                bbox=dict(facecolor='yellow', alpha=0.5))
    plt.axis('off')
    plt.show()

# Normalized the bounding box img_w, img_h = im_pil.size
def get_normalized_bbox(b, size):
    img_w, img_h = size
    x0,y0,x1,y1 = b[0:4]
    w = (x1-x0)/img_w
    h = (y1-y0)/img_h
    x_c =  (x0/img_w)+0.5*w
    y_c =  (y0/img_h)+0.5*h
    return [x_c,y_c,w,h]

# for output bounding box post-processing
def box_cxcywh_to_xyxy(x):
    x_c, y_c, w, h = x.unbind(1)
    b = [(x_c - 0.5 * w), (y_c - 0.5 * h),
         (x_c + 0.5 * w), (y_c + 0.5 * h)]
    return torch.stack(b, dim=1)

def rescale_bboxes(out_bbox, size):
    img_w, img_h = size
    b = box_cxcywh_to_xyxy(out_bbox)
    b = b * torch.tensor([img_w, img_h, img_w, img_h], dtype=torch.float32)
    return b

def detect(im, model, transform):
    # mean-std normalize the input image (batch-size: 1)
#     img = transform(im).unsqueeze(0)

#     # demo model only support by default images with aspect ratio between 0.5 and 2
#     # if you want to use images with an aspect ratio outside this range
#     # rescale your image so that the maximum size is at most 1333 for best results
#     assert img.shape[-2] <= 1600 and img.shape[-1] <= 1600, 'demo model only supports images up to 1600 pixels on each side'
    
    inputs = feature_extractor(images=im, return_tensors="pt")
    outputs = model(**inputs)

    # propagate through the model
#     outputs = model(img)

    # keep only predictions with 0.7+ confidence
    probas = outputs['logits'].softmax(-1)[0, :, :-1]
    keep = probas.max(-1).values > 0.4

    # convert boxes from [0; 1] to image scales
    bboxes_scaled = rescale_bboxes(outputs['pred_boxes'][0, keep], im.size)
    return probas[keep], bboxes_scaled

In [12]:
directory = '/data/glosat/Code-Git/docExtractor-master/demo/output_files'
img_directory = '/data/glosat/Code-Git/docformer/dataset/Finetuning/test/images'
out_directory='/data/glosat/glosat_table_dataset/datasets/Test/table_bbox'

In [13]:
!ls {img_directory}/*.jpg > filename.txt

In [14]:
f = open('filename.txt','r')
files = f.readlines()
f.close()

In [15]:
bb_bboxes = {}
f_error = open('test_bbox_error.txt','w')
for file in files:
    file =  file.strip().split('/')
    
    file = file[-1].split('.')
    
    filename = file[0]+'_cell_bounding_box_v2.pkl'
    try:
        f = open(f'{directory}/{filename}','rb')
        bb_box = pickle.load(f)
        f.close()
        correct_cells = bb_box[0]
        blank_cells = bb_box[1]
        exclude_cells = bb_box[2]
        tables = bb_box[3]

        bb_bboxes[file[0]] = bb_box
    except:
        f_error.write(filename+'\n')
        
f_error.close()

In [16]:
training_set = {}

for file in bb_bboxes:
    cell_coords = []
    table_coords = []
    img_path = f'{img_directory}/{file}.jpg'
    img,h,w = image_preprocessing(img_path)
    im_pil = Image.fromarray(img)
    bb_box = bb_bboxes[file]
    
    correct_cells = bb_box[0]
    blank_cells = bb_box[1]
    exclude_cells = bb_box[2]
    tables = bb_box[3]

    for cell in correct_cells:
        cell = get_normalized_bbox(cell,im_pil.size)
        cell_coords.append([cell, (1,0,0,0) ]) #cell, correct/blank/error/padding

    for cell in blank_cells:
        cell = get_normalized_bbox(cell,im_pil.size)
        cell_coords.append([cell, (0,1,0,0)])

    for cell in exclude_cells:
        cell = get_normalized_bbox(cell,im_pil.size)
        cell_coords.append([cell, (0,0,1,0)])

    for cell in tables:
        cell = get_normalized_bbox(cell,im_pil.size) #cell, table/no-table  [can add bordered/borderless]
        table_coords.append([cell, (1,0)])
    
    training_set[file] = (cell_coords,table_coords) #cells,tables

In [18]:
# training_set['412'][1]

In [19]:
out_directory

'/data/glosat/glosat_table_dataset/datasets/Test/table_bbox'

In [20]:
f = open(f'{out_directory}/table_cell_bbox.pkl', 'wb')
pickle.dump(training_set,f)
f.close()

In [27]:
##upto here

In [65]:
out_directory = '/data/glosat/Code-Git/docformer/dataset/Finetuning/train'

In [56]:
label_fp = out_directory+'/table_bbox'
files = os.listdir(label_fp)

f = open(out_directory+'/table_bbox.pkl','rb')
label = pickle.load(f)
f.close()

In [62]:
for img in files:
    if '.pkl' not in img:
        img_file, ext = img.split('.')
        filename, count = img_file.split('_')
        img_path = os.path.join(out_directory,'images',filename+'.'+ext)
        isExist = os.path.exists(out_directory)
        if isExist:                
            image, img_h, img_w = image_preprocessing(img_path)
            im_pil = Image.fromarray(image)
            im_pil = im_pil.resize((800,1200), Image.ANTIALIAS)

            input_l = {}
            bbox = label[int(filename)][0] +[[0,0,0,0]]*(100-len(label[int(filename)][0]))

            labell = [label[int(filename)][1]]*len(label[int(filename)][0])+[[0,0,0]]*(100-len(label[int(filename)][0]))
            
            print(len(bbox),len(labell))


100 100
100 100
100 100
100 100
100 100
100 100
100 100
100 100
100 100
100 100
100 100
100 100
100 100
100 100
100 100
100 100
100 100
100 100
100 100
100 100
100 100
100 100
100 100
100 100
100 100
100 100
100 100
100 100
100 100
100 100
100 100
100 100
100 100
100 100
100 100
100 100
100 100
100 100
100 100
100 100
100 100
100 100
100 100
100 100
100 100
100 100
100 100
100 100
100 100
100 100
100 100
100 100
100 100
100 100
100 100
100 100
100 100
100 100
100 100
100 100
100 100
100 100
100 100
100 100
100 100
100 100
100 100
100 100
100 100
100 100
100 100


KeyboardInterrupt: 

In [63]:
bbox

[[0.46674727932285365,
  0.33026934587430523,
  0.8875453446191052,
  0.34074390765284307],
 [0.46674727932285365,
  0.725096194955109,
  0.8742442563482467,
  0.3215049166310389],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0, 0],
 [0, 0, 0