### utils

In [1]:
import numpy as np
import pandas as pd
from sklearn.cluster import DBSCAN
from sklearn.preprocessing import MinMaxScaler


def dbscan_result_to_aligned_result(result, threshold=0.5):
    aligned_result = []

    for boxes in result:
        confident_boxes = boxes[boxes[:, 4] > threshold]
        if confident_boxes.shape[0] > 0:
            count_confident_boxes = confident_boxes.shape[0]

            updated_confident_boxes = pd.DataFrame(confident_boxes, columns=["x1", "y1", "x2", "y2", "confidance"])

            X = confident_boxes[:, [0, 2]].flatten(order="F")
            Y = confident_boxes[:, [1, 3]].flatten(order="F")

            X_scaled = MinMaxScaler().fit_transform(X.reshape((-1, 1)))
            Y_scaled = MinMaxScaler().fit_transform(Y.reshape((-1, 1)))

            min_samples = int(np.sqrt(count_confident_boxes) / 2)
            if min_samples < 2:
                min_samples = 2
            # print("min_samples = ", min_samples)
            clustering_X = DBSCAN(eps=0.01, min_samples=min_samples).fit_predict(X_scaled)
            clustering_Y = DBSCAN(eps=0.01, min_samples=min_samples).fit_predict(Y_scaled)

            labels_X = set(clustering_X)
            labels_Y = set(clustering_Y)

            updated_confident_boxes["label_x1"] = clustering_X[:count_confident_boxes]
            updated_confident_boxes["label_y1"] = clustering_Y[:count_confident_boxes]
            updated_confident_boxes["label_x2"] = clustering_X[count_confident_boxes:]
            updated_confident_boxes["label_y2"] = clustering_Y[count_confident_boxes:]

            updated_confident_boxes["x1_upd"] = 0
            updated_confident_boxes["y1_upd"] = 0
            updated_confident_boxes["x2_upd"] = 0
            updated_confident_boxes["y2_upd"] = 0

            # img = Image.open(img_path)
            # draw = ImageDraw.Draw(img)
            # colors = [(255, 0, 0), (255, 255, 0), (255, 0, 255)]
            # width = 5

            x_correct = []
            y_correct = []
            for x in labels_X:
                if x != -1:
                    x_mean = int(np.mean(X[np.array(clustering_X) == x]))
                    x_correct.append(x_mean)

                    updated_confident_boxes.loc[updated_confident_boxes["label_x1"] == x, "x1_upd"] = x_mean
                    updated_confident_boxes.loc[updated_confident_boxes["label_x2"] == x, "x2_upd"] = x_mean
            for y in labels_Y:
                if y != -1:
                    y_mean = int(np.mean(Y[np.array(clustering_Y) == y]))
                    y_correct.append(y_mean)

                    updated_confident_boxes.loc[updated_confident_boxes["label_y1"] == y, "y1_upd"] = y_mean
                    updated_confident_boxes.loc[updated_confident_boxes["label_y2"] == y, "y2_upd"] = y_mean

            # for x in labels_X:
            #     if x != -1:
            #         x_mean = int(np.mean(X[np.array(clustering_X) == x]))
            #         endpoints = (x_mean, max(y_correct)), (x_mean, min(y_correct))
            #         draw.line(endpoints, fill=colors[0], width=width)
            # for y in labels_Y:
            #     if y != -1:
            #         y_mean = int(np.mean(Y[np.array(clustering_Y) == y]))
            #         endpoints = (max(x_correct), y_mean), (min(x_correct), y_mean)
            #         draw.line(endpoints, fill=colors[0], width=width)

            # print(f"sorted_confident_x = {sorted(x_correct)}")
            # print(f"sorted_confident_y = {sorted(y_correct)}")

            # img.save("/home/aiarhipov/centernet/imgs/output_lines.jpg")
            res = updated_confident_boxes[["x1_upd", "y1_upd", "x2_upd", "y2_upd", "confidance"]].to_numpy()
            res = res[(res[:, 0] < res[:, 2]) & (res[:, 1] < res[:, 3])]
            aligned_result.append(res)

    return aligned_result



def algo2_result_to_aligned_result(
    result,
    r=0.1,
    dist_thresh=30,
    threshold=0.5,
):
    aligned_result = []

    for boxes in result:
        confident_boxes = boxes[boxes[:, 4] > threshold]

        refined_boxes = set()
        # confident_boxes = confident_boxes.copy()
        for i, cell in enumerate(confident_boxes):
            w_cell = cell[2] - cell[0]
            h_cell = cell[3] - cell[1]
            for v_i, (x_i, y_i) in enumerate(
                (
                    (cell[0], cell[1]),
                    (cell[2], cell[1]),
                    (cell[2], cell[3]),
                    (cell[0], cell[3]),
                )
            ):
                x_offset1 = max(w_cell * r, 4.0)
                y_offset1 = max(h_cell * r, 4.0)

                keep_x = []
                keep_y = []
                idx_i_j = []
                vertex_idx = []

                for j, another_cell in enumerate(confident_boxes):
                    if i != j:
                        x_offset2 = max((another_cell[2] - another_cell[0]) * r, 4.0)
                        y_offset2 = max((another_cell[3] - another_cell[1]) * r, 4.0)

                        for v_j, (x_j, y_j) in enumerate(
                            (
                                (another_cell[0], another_cell[1]),
                                (another_cell[2], another_cell[1]),
                                (another_cell[2], another_cell[3]),
                                (another_cell[0], another_cell[3]),
                            )
                        ):
                            xdist = abs(x_j - x_i)
                            ydist = abs(y_j - y_i)
                            dist = np.sqrt(xdist**2 + ydist**2)

                            if not (
                                xdist > x_offset1
                                or xdist > x_offset2
                                or ydist > y_offset1
                                or ydist > y_offset2
                                or dist > dist_thresh
                            ):
                                keep_x.append(x_j)
                                keep_y.append(y_j)
                                idx_i_j.append(j)
                                vertex_idx.append(v_j)

                            # if (
                            #     i == 62
                            #     and v_i == 0
                            #     and j == 60
                            #     and v_j == 1
                            #     or j == 62
                            #     and v_j == 0
                            #     and i == 60
                            #     and v_i == 1
                            # ):
                            #     print(
                            #         f"i = {i}, j = {j}, v_i = {v_i}, v_j = {v_j}"
                            #     )
                            # print("h_cell = ", h_cell)
                            # print("w_cell = ", w_cell)
                            # print("cell[2] - cell[0] ", cell[2] - cell[0])
                            # print("cell[3] - cell[1] ", cell[3] - cell[1])
                            # print("xdist < x_offset1   ", xdist, x_offset1)
                            # print("xdist < x_offset2   ", xdist, x_offset2)
                            # print("ydist < y_offset1   ", ydist, y_offset1)
                            # print("ydist < y_offset2   ", ydist, y_offset2)
                            # print("dist  < dist_thresh ", dist, dist_thresh)
                # if keep_x:
                keep_x.append(x_i)
                keep_y.append(y_i)
                idx_i_j.append(i)
                vertex_idx.append(v_i)

                mean_x = int(np.mean(keep_x))
                mean_y = int(np.mean(keep_y))

                # print(f"keep_x = {keep_x},  mean = {mean_x} ")
                # print(f"keep_y = {keep_y},  mean = {mean_y} ")
                # print("idx_i_j = ", idx_i_j)
                # print("vertex_idx = ", vertex_idx)
                # print("#" * 100)
                # print(confident_boxes.shape)

                # if 60 in idx_i_j or 62 in idx_i_j:
                #     print("keep_x = ", keep_x)
                #     print("keep_y = ", keep_y)
                #     print("idx_i_j = ", idx_i_j)
                #     print("vertex_idx = ", vertex_idx)
                #     print("#" * 100)

                for idx, v_idx in zip(idx_i_j, vertex_idx):
                    refined_boxes.add(int(str(v_idx) + str(idx)))
                    if v_idx == 0:
                        confident_boxes[idx, 0] = mean_x
                        confident_boxes[idx, 1] = mean_y
                    elif v_idx == 1:
                        confident_boxes[idx, 2] = mean_x
                        confident_boxes[idx, 1] = mean_y
                    elif v_idx == 2:
                        confident_boxes[idx, 2] = mean_x
                        confident_boxes[idx, 3] = mean_y
                    elif v_idx == 3:
                        confident_boxes[idx, 0] = mean_x
                        confident_boxes[idx, 3] = mean_y
                    else:
                        pass
                        # print(f"strange vertex idx{idx}")

        aligned_result.append(confident_boxes)

        # print("refined_boxes = ", refined_boxes)
        # print("len refined_boxes = ", len(refined_boxes))
    return aligned_result



### evaluation

In [2]:
config_file = '/media/quadro/NVME/Mehrab/Current_Experiment/config.py'
checkpoint_file = '/media/quadro/NVME/Mehrab/exps/32_quad_long_retry/latest.pth'

In [3]:
from mmdet.apis import init_detector, inference_detector, show_result_pyplot
import mmcv

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
model = init_detector(config_file, checkpoint_file, device='cuda:3')

load checkpoint from local path: /media/quadro/NVME/Mehrab/exps/32_quad_long_retry/latest.pth


2024-01-20 15:20:45,225 - root - INFO - ModulatedDeformConvPack neck.deconv_layers.0.conv is upgraded to version 2.
2024-01-20 15:20:45,249 - root - INFO - ModulatedDeformConvPack neck.deconv_layers.2.conv is upgraded to version 2.
2024-01-20 15:20:45,274 - root - INFO - ModulatedDeformConvPack neck.deconv_layers.4.conv is upgraded to version 2.


In [5]:
img_path = '/media/quadro/NVME/Mehrab/bank_statement.jpg'

In [7]:
res = inference_detector(model, img_path)

torch.Size([1, 3000, 9]) torch.Size([1, 3000])
before in _get_bboxes_single batch_det_bboxes = torch.Size([1, 3000, 9]), batch_labels = torch.Size([1, 3000])
before in _get_bboxes_single batch_det_bboxes = torch.Size([1, 3000, 9]), batch_labels = torch.Size([1, 3000])
before in _get_bboxes_single batch_det_bboxes = torch.Size([1, 3000, 9]), batch_labels = torch.Size([1, 3000])
before in _get_bboxes_single batch_det_bboxes = torch.Size([1, 3000, 9]), batch_labels = torch.Size([1, 3000])
before in _get_bboxes_single batch_det_bboxes = torch.Size([1, 3000, 9]), batch_labels = torch.Size([1, 3000])
before in _get_bboxes_single batch_det_bboxes = (5400, 5) batch_labels = (3000,)
bbboxes shape  (5400, 5)
labels shape  (3000,)


AssertionError: Lengths of bboxes and labels must match

In [8]:
res1 = res
threshold = 0.5

In [None]:
show_result_pyplot(model, img_path, res, score_thr = threshold, out_file = "/media/quadro/NVME/Mehrab/exps/outputs/output.jpg")

In [6]:
import glob

folder_path = '/media/quadro/NVME/Mehrab/datasets/bank_statements'
image_extensions = ['jpg', 'jpeg', 'png', 'gif', 'bmp']  # Add more if needed

# Use glob to get a list of image file paths
image_paths = []
for extension in image_extensions:
    pattern = f'{folder_path}/*.{extension}'
    image_paths.extend(glob.glob(pattern))

print(image_paths)

['/media/quadro/NVME/Mehrab/datasets/bank_statements/bank_asia.jpg', '/media/quadro/NVME/Mehrab/datasets/bank_statements/bank_asia_2.jpg', '/media/quadro/NVME/Mehrab/datasets/bank_statements/islami_bank.jpg', '/media/quadro/NVME/Mehrab/datasets/bank_statements/islami_bank_blur.jpg', '/media/quadro/NVME/Mehrab/datasets/bank_statements/national_bank.jpg', '/media/quadro/NVME/Mehrab/datasets/bank_statements/sonali_bank.jpg', '/media/quadro/NVME/Mehrab/datasets/bank_statements/sonali_bank_rotated.jpg', '/media/quadro/NVME/Mehrab/datasets/bank_statements/moblie_banking.jpeg', '/media/quadro/NVME/Mehrab/datasets/bank_statements/united_commercial_bank.png']


In [9]:
import os

for img_path in image_paths:
    file_name = os.path.basename(img_path)
    
    print(" processing ", file_name)
    res = inference_detector(model, img_path)
    show_result_pyplot(model, img_path, res, score_thr = threshold, out_file = f"/media/quadro/NVME/Mehrab/datasets/bank_statements/outputs/{file_name}")
    

 processing  bank_asia.jpg
torch.Size([1, 3000, 9]) torch.Size([1, 3000])
before in _get_bboxes_single batch_det_bboxes = torch.Size([1, 3000, 9]), batch_labels = torch.Size([1, 3000])
before in _get_bboxes_single batch_det_bboxes = torch.Size([1, 3000, 9]), batch_labels = torch.Size([1, 3000])
before in _get_bboxes_single batch_det_bboxes = torch.Size([1, 3000, 9]), batch_labels = torch.Size([1, 3000])
before in _get_bboxes_single batch_det_bboxes = torch.Size([1, 3000, 9]), batch_labels = torch.Size([1, 3000])
before in _get_bboxes_single batch_det_bboxes = torch.Size([1, 3000, 9]), batch_labels = torch.Size([1, 3000])
before in _get_bboxes_single batch_det_bboxes = (5400, 5) batch_labels = (3000,)
bbboxes shape  (5400, 5)
labels shape  (3000,)
bbboxes shape  (3000, 5)
labels shape  (3000,)


 processing  bank_asia_2.jpg
torch.Size([1, 3000, 9]) torch.Size([1, 3000])
before in _get_bboxes_single batch_det_bboxes = torch.Size([1, 3000, 9]), batch_labels = torch.Size([1, 3000])
before in _get_bboxes_single batch_det_bboxes = torch.Size([1, 3000, 9]), batch_labels = torch.Size([1, 3000])
before in _get_bboxes_single batch_det_bboxes = torch.Size([1, 3000, 9]), batch_labels = torch.Size([1, 3000])
before in _get_bboxes_single batch_det_bboxes = torch.Size([1, 3000, 9]), batch_labels = torch.Size([1, 3000])
before in _get_bboxes_single batch_det_bboxes = torch.Size([1, 3000, 9]), batch_labels = torch.Size([1, 3000])
before in _get_bboxes_single batch_det_bboxes = (5400, 5) batch_labels = (3000,)
bbboxes shape  (5400, 5)
labels shape  (3000,)
bbboxes shape  (3000, 5)
labels shape  (3000,)
 processing  islami_bank.jpg
torch.Size([1, 3000, 9]) torch.Size([1, 3000])
before in _get_bboxes_single batch_det_bboxes = torch.Size([1, 3000, 9]), batch_labels = torch.Size([1, 3000])
before 

In [None]:
res_algo2 = algo2_result_to_aligned_result(res1, threshold=threshold, r=0.2)
res_dbscan = dbscan_result_to_aligned_result(res1, threshold=threshold)
_res_algo2 = res_algo2
_res_dbscan = res_dbscan

In [None]:
show_result_pyplot(model, img_path, _res_algo2, score_thr = threshold, out_file = "/media/quadro/NVME/Mehrab/exps/outputs/output_algo2.jpg")
show_result_pyplot(model, img_path, _res_dbscan, score_thr = threshold, out_file = "/media/quadro/NVME/Mehrab/exps/outputs/output_dbscan.jpg")

In [None]:
result = res

import numpy as np
import matplotlib.pyplot as plt
from PIL import Image, ImageDraw
from sklearn.cluster import DBSCAN
from sklearn.preprocessing import MinMaxScaler


boxes = result[0]
confident_boxes = boxes[boxes[:, 4] > threshold]
count_confident_boxes = confident_boxes.shape[0]

updated_confident_boxes=confident_boxes

X = confident_boxes[:, [0, 2]].flatten()
Y = confident_boxes[:, [1, 3]].flatten()

X_scaled = MinMaxScaler().fit_transform(X.reshape((-1, 1)))
Y_scaled = MinMaxScaler().fit_transform(Y.reshape((-1, 1)))

min_samples = int(np.sqrt(count_confident_boxes) / 2)
print("min_samples = ", min_samples)
clustering_X = DBSCAN(eps=0.01, min_samples=min_samples).fit_predict(X_scaled)
clustering_Y = DBSCAN(eps=0.01, min_samples=min_samples).fit_predict(Y_scaled)

labels_X = set(clustering_X)
labels_Y = set(clustering_Y)


img = Image.open(img_path)
draw = ImageDraw.Draw(img)
colors = [(255,0,0), (255,255,0), (255,0,255)]
width = 5

x_correct = []
y_correct = []
for x in labels_X:
    if x != -1:
        x_mean = int(np.mean(X[np.array(clustering_X) == x]))
        # print(f"x_label = {x}, x_mean = {x_mean}")
        x_correct.append(x_mean)
    
for y in labels_Y:
    if y != -1:
        y_mean = int(np.mean(Y[np.array(clustering_Y) == y]))
        # print(f"y_label = {y}, y_mean = {y_mean}")
        y_correct.append(y_mean)

for x in labels_X:
    if x != -1:
        x_mean = int(np.mean(X[np.array(clustering_X) == x]))
        endpoints = (x_mean, max(y_correct)), (x_mean, min(y_correct))
        draw.line(endpoints, fill=colors[0], width=width)
for y in labels_Y:
    if y != -1:
        y_mean = int(np.mean(Y[np.array(clustering_Y) == y]))
        endpoints = (max(x_correct), y_mean), (min(x_correct), y_mean)
        draw.line(endpoints, fill=colors[0], width=width)
    
print(f"sorted_confident_x = {sorted(x_correct)}")
print(f"sorted_confident_y = {sorted(y_correct)}")

img.save("/media/quadro/NVME/Mehrab/exps/outputs/output_lines.jpg")

min_samples =  27
sorted_confident_x = [-1, 101, 218, 347, 507, 707]
sorted_confident_y = [678]
