In [None]:
import cv2
import numpy as np
from PIL import Image

In [None]:
SCORE_THRESHOLD = 0.5  # The threshold to consider a prediction as positive
FEATURE_SCALE_FACTOR = 4.0  # The scaling factor used by EAST network due to its architecture

def decode_predictions(scores, geometry):
    (numRows, numCols) = scores.shape[2:4]
    polygons = []
    confidences = []

    for y in range(0, numRows):
        scoresData = scores[0, 0, y]
        distanceToTop = geometry[0, 0, y]
        distanceToRight = geometry[0, 1, y]
        distanceToBottom = geometry[0, 2, y]
        distanceToLeft = geometry[0, 3, y]
        rotationAngle = geometry[0, 4, y]

        for x in range(0, numCols):
            if scoresData[x] < SCORE_THRESHOLD:
                continue
    
            (offsetX, offsetY) = (x * FEATURE_SCALE_FACTOR, y * FEATURE_SCALE_FACTOR)
            cosA = np.cos(rotationAngle[x])
            sinA = np.sin(rotationAngle[x])
    
            # Calculate the bounding box's width and height using the distances
            boxHeight = distanceToTop[x] + distanceToBottom[x]
            boxWidth = distanceToRight[x] + distanceToLeft[x]
    
            # Calculate the top-left and bottom-right corners of the bounding box
            endX = int(offsetX + (cosA * distanceToRight[x]) + (sinA * distanceToBottom[x]))
            endY = int(offsetY - (sinA * distanceToRight[x]) + (cosA * distanceToBottom[x]))
            startX = int(endX - boxWidth)
            startY = int(endY - boxHeight)
    
            # Compute the four corners of the bounding box
            topLeft = (startX, startY)
            topRight = (endX, startY)
            bottomRight = (endX, endY)
            bottomLeft = (startX, endY)
    
            polygons.append([topLeft, topRight, bottomRight, bottomLeft])
            confidences.append(scoresData[x])

    return (polygons, confidences)

In [None]:
def show_image(opencv_image):
    color_coverted = cv2.cvtColor(opencv_image, cv2.COLOR_BGR2RGB)  
    pil_image = Image.fromarray(color_coverted)
    
    return pil_image      

In [None]:
from shapely.geometry import Polygon

def polygon_non_max_suppression(polygons, overlapThresh=0.1):
    # Convert lists of points into Polygon objects and validate or "heal" them
    polygons = [Polygon(p).buffer(0) for p in polygons]
    
    # Check validity of polygons
    valid_polygons = [p for p in polygons if p.is_valid]
    
    # Compute the area of the polygons
    areas = [p.area for p in valid_polygons]
    
    # Filter out polygons with zero area
    valid_idxs = [i for i, area in enumerate(areas) if area > 0]
    valid_polygons = [valid_polygons[i] for i in valid_idxs]
    areas = [areas[i] for i in valid_idxs]
    
    # Sort polygons by their area
    sorted_idxs = sorted(range(len(valid_polygons)), key=lambda i: areas[i])
    
    # Initialize the list to store indices of polygons to keep
    keep = []
    
    while len(sorted_idxs) > 0:
        # Pop the polygon with the largest area
        current = sorted_idxs.pop()
        keep.append(current)
        
        # Compare this polygon with all others
        suppress = [current]
        for idx in sorted_idxs:
            # Compute the intersection area
            intersection_area = valid_polygons[current].intersection(valid_polygons[idx]).area
            # Calculate overlap ratio for the polygon with smaller area
            min_area = min(areas[current], areas[idx])
            if min_area == 0:  # Avoid division by zero
                continue
            
            overlap_ratio = intersection_area / min_area
            
            # If overlap ratio exceeds the threshold, mark for suppression
            if overlap_ratio > overlapThresh:
                suppress.append(idx)
        
        # Suppress the polygons
        for idx in suppress:
            if idx in sorted_idxs:
                sorted_idxs.remove(idx)
                
    # Return the polygons that were kept
    return [list(p.exterior.coords)[:-1] for p in [valid_polygons[i] for i in keep]]

In [None]:
image = cv2.imread('datasets/FUNSD_polygon_augmented/dataset/testing_data/images/82092117_augmented_0.png')
# Load the image and EAST model
east_model = "bin/frozen_east_text_detection.pb"
net = cv2.dnn.readNet(east_model)

blob_size = (512, 512)
image_scale_factor = image.shape[1] / blob_size[0], image.shape[0] / blob_size[1]

# Prepare the image
blob = cv2.dnn.blobFromImage(image, 1.0, blob_size, (123.68, 116.78, 103.94), swapRB=True, crop=False)

# Forward pass
net.setInput(blob)
(scores, geometry) = net.forward(["feature_fusion/Conv_7/Sigmoid", "feature_fusion/concat_3"])

# Decode the predictions
(polygons, confidences) = decode_predictions(scores, geometry)

# Apply non-maxima suppression to suppress weak, overlapping bounding polygons
polygons = polygon_non_max_suppression(polygons)

# Scale polygons with format [(x1, y1), (x2, y2), (x3, y3), (x4, y4)]
for i in range(len(polygons)):
    polygons[i] = np.array(polygons[i]) * image_scale_factor

# Draw polygons
for polygon in polygons:
    cv2.polylines(image, [np.array(polygon, dtype=np.int32)], isClosed=True, color=(0, 255, 0), thickness=2)


show_image(image)