In [None]:
import os
import numpy as np
import math
import cv2
from PIL import Image
from matplotlib import pyplot as plt

In [None]:
image_folder_path = '../data/raw/train/image'
label_folder_path = '../data/raw/train/label'
jpg_files = [file for file in os.listdir(image_folder_path) if file.endswith('.JPG')]
npy_files = [jpg_file.replace('.JPG', '_gt.npy') for jpg_file in jpg_files]

In [None]:
idx = 1

jpg_file = jpg_files[idx]
jpg_data = np.asarray(Image.open(os.path.join(image_folder_path, jpg_file)))

npy_file = npy_files[idx]
npy_data = np.load(os.path.join(label_folder_path, npy_file))

In [None]:
plt.imshow(jpg_data)
plt.show()

In [None]:
def get_num_tiles(tile_size=384):
    row_size = 1024
    col_size = 1360
    
    num_tiles_row = row_size / tile_size
    num_tiles_col = col_size / tile_size

    overlap_row = math.ceil(math.ceil(tile_size * math.ceil(num_tiles_row) - row_size) / math.floor(num_tiles_row))
    overlap_col = math.ceil(math.ceil(tile_size * math.ceil(num_tiles_col) - col_size) / math.floor(num_tiles_col))
    
    num_tiles_row = math.ceil(num_tiles_row)
    num_tiles_col = math.ceil(num_tiles_col)

    return num_tiles_row, overlap_row, num_tiles_col, overlap_col

In [None]:
num_tiles_row, overlap_row, num_tiles_col, overlap_col = get_num_tiles()

In [None]:
def get_coords_tile(num_tiles_row, overlap_row, num_tiles_col, overlap_col, tile_size=384):
    row_size = 1024
    col_size = 1360
    coords_tile = []
    
    for i in range(num_tiles_row):
        for j in range(num_tiles_col):
            x0 = max(0, i * (tile_size - overlap_row))
            y0 = max(0, j * (tile_size - overlap_col))
            x1 = min(row_size, x0 + tile_size)
            y1 = min(col_size, y0 + tile_size)

            if x1 + 1 == row_size:
                x0 += 1
                x1 += 1

            if y1 + 1 == col_size:
                y0 += 1
                y1 += 1
                
            coords_tile.append((x0, y0, x1, y1))

    return coords_tile

In [None]:
coords_tile = get_coords_tile(num_tiles_row, overlap_row, num_tiles_col, overlap_col)

In [None]:
jpg_data_rect = jpg_data.copy()

for coord_tile in coords_tile:
    x0, y0, x1, y1 = coord_tile
    jpg_data_rect = cv2.rectangle(jpg_data_rect, (y0, x0), (y1, x1), (255, 0, 0), 3)
    plt.imshow(jpg_data_rect)
    plt.show()

In [None]:
count = 0
count_all = 0

for npy_file in npy_files:
    npy_data = np.load(os.path.join(label_folder_path, npy_file))
    
    for coord_tile in coords_tile:
        x0, y0, x1, y1 = coord_tile
        cropped_npy_data = npy_data[x0:x1, y0:y1]
    
        if len(np.unique(cropped_npy_data).tolist()) > 1:
            count += 1

        count_all += 1 

percentage = count / count_all * 100
print(f'On {count_all} tiles, there are {percentage:.2f}% with annotations ({count} tiles).')