In [None]:
#TODO
#1. Add the 64*64 outputs to one image.
#2. For every pixel, convert it into the most probable one.Maybe depend on neighbours??
# Could be multiple overlaps, find a way to handle that.
#3. Convert every pixel to correct polygon json format

In [4]:
import numpy as np
import cv2


In [5]:
def post_process_optimized(outputs, gamma=0.5):
    empty_img = np.zeros((1024, 1024, 5))
    processed_img = np.zeros((1024, 1024), dtype=int)
    
    for x, y, probs in outputs:
        empty_img[x:x+64, y:y+64] += np.array(probs)
    
    padded = np.pad(empty_img, ((1,1), (1,1), (0,0)))
    neighbors = gamma * (padded[:-2, 1:-1] + padded[2:, 1:-1] + 
                        padded[1:-1, :-2] + padded[1:-1, 2:])
    
    processed_img = np.argmax(empty_img + neighbors, axis=2)
    return processed_img

def test_post_process_optimized():
    positions = np.mgrid[0:1024:64, 0:1024:64].reshape(2, -1).T
    
    probs = np.random.dirichlet(np.ones(5)*2, size=positions.shape[0])
    probs[:,0] = np.clip(probs[:,0], 0.3, 1.0)
    probs = probs / probs.sum(axis=1, keepdims=True)
    
    outputs = [(x, y, p.tolist()) for (x, y), p in zip(positions, probs)]
    return post_process_optimized(outputs)

## torch and cuda version

In [None]:
import torch

def post_process_torch(outputs, gamma=0.5):
    device = torch.device("cuda")  # Assuming model outputs are on CUDA
    empty_img = torch.zeros((1024, 1024, 5), device=device)
    
    # Convert outputs to tensor once
    coords = torch.tensor([[x,y] for x,y,_ in outputs], device=device)
    probs = torch.tensor([p for _,_,p in outputs], device=device)
    
    # Batch assignment
    for coord, prob in zip(coords, probs):
        empty_img[coord[0]:coord[0]+64, coord[1]:coord[1]+64] += prob
    
    padded = torch.nn.functional.pad(empty_img, (0,0,1,1,1,1))
    neighbors = gamma * (padded[:-2, 1:-1] + padded[2:, 1:-1] + 
                        padded[1:-1, :-2] + padded[1:-1, 2:])
    
    return torch.argmax(empty_img + neighbors, dim=2)  # Keep on GPU if needed

## From pixel img to polygon

In [7]:

# Example: A random 1024x1024 tensor with values 0-4
def converter(tensor):
# Dictionary to store polygons for each unique value
    polygons = {}

    for val in range(5):  # Iterate over unique values (0-4)
        mask = (tensor == val).astype(np.uint8)  # Create binary mask for the value

        # Find contours (polygons) of connected components
        contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

        # Convert contours to a list of polygons
        polygons[val] = [c.reshape(-1, 2).tolist() for c in contours]
    return polygons
test=converter(test_post_process_optimized())
for i in range(5):
    print(test[i])
    print(len(test[i]))
# Now, polygons[val] contains a list of polygons for each unique value.


[[[896, 960]], [[703, 960]], [[193, 960], [192, 961], [192, 1023], [319, 1023], [319, 960]], [[383, 959]], [[0, 896]], [[769, 895], [768, 896], [704, 896], [767, 896], [768, 897], [768, 1023], [832, 1023], [832, 960], [833, 959], [894, 959], [832, 959], [831, 958], [831, 897], [832, 896], [831, 896], [830, 895]], [[960, 832], [960, 895], [959, 896], [960, 897], [960, 958], [959, 959], [960, 960], [1023, 960], [1023, 832]], [[896, 831], [958, 831]], [[320, 768], [320, 830], [319, 831], [257, 831], [256, 830], [256, 769], [256, 830], [255, 831], [319, 831], [320, 832], [320, 896], [321, 895], [383, 895], [384, 896], [446, 896], [447, 897], [447, 958], [448, 959], [510, 959], [511, 960], [511, 1023], [575, 1023], [575, 961], [576, 960], [638, 960], [639, 961], [639, 1023], [639, 896], [576, 896], [576, 958], [574, 960], [512, 960], [511, 959], [511, 832], [512, 831], [574, 831], [575, 830], [575, 768], [513, 768], [512, 769], [512, 831], [511, 832], [448, 832], [447, 831], [447, 768]], [[