In [1]:
import torch
import albumentations as A
from albumentations.pytorch import ToTensorV2
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from PIL import Image
import os
import json
from sklearn.model_selection import train_test_split
import shutil

In [2]:
def polygon_to_bounding_box(json_data, file):
    bounding_box_list = []
    for item in json_data[file].values():
        if isinstance(item, dict):
            if item != {}:
                length = len(item.values())
                for i in range(length):
                    # print(item[str(i)]['shape_attributes']['all_points_x'])
                    # print(item[str(i)]['shape_attributes']['all_points_y'])
                    poly_x = item[str(i)]['shape_attributes']['all_points_x']
                    poly_y = item[str(i)]['shape_attributes']['all_points_y']
                    # Compute bounding box
                    x_min, x_max = min(poly_x), max(poly_x)
                    y_min, y_max = min(poly_y), max(poly_y)
                    bbox_width = x_max - x_min
                    bbox_height = y_max - y_min
                    # Match format: [x, y, width, height]
                    bounding_box_list.append([int(x_min), int(y_min), int(bbox_width), int(bbox_height)])
    return bounding_box_list

In [3]:
transform = A.Compose(
    [
        # Geometric Transformations
        A.OneOf([
            A.HorizontalFlip(p=1),
            A.VerticalFlip(p=1),
            A.RandomRotate90(p=1),
        ], p=1),
        # Photometric Transformations
        A.RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2, p=0.5),
        A.HueSaturationValue(hue_shift_limit=10, sat_shift_limit=15, val_shift_limit=10, p=0.5),
        A.GaussNoise(std_range=(0.1, 0.2), p=1.0),
        A.CoarseDropout(
        num_holes_range=(5, 5),
        hole_height_range=(20, 20),
        hole_width_range=(20, 20),
        fill="random_uniform",
        p=0.5),
        A.GridDropout(ratio=0.05, p=0.5)
        # ToTensorV2()
    ],
    bbox_params=A.BboxParams(format='coco', label_fields=['labels'])
)

In [4]:
def visualization(bboxes, labels, ax):
    for box, label in zip(bboxes, labels):
        # x_min, y_min, box_width, box_height = map(int, box)
        x_min, y_min, box_width, box_height = box
        if label == 'Thyrocyte' or label == 'Thyrocytes':
            rect = plt.Rectangle(
            ( x_min, y_min),
                box_width, box_height,
                linewidth=.2, edgecolor="blue", facecolor="none"
            )
        else:
            rect = plt.Rectangle(
            ( x_min, y_min),
                box_width, box_height,
                linewidth=.5, edgecolor="red", facecolor="none"
            )
        ax.add_patch(rect)
        ax.text(
            x_min, y_min - 5, label,
            color="blue", fontsize=2, weight="bold"
        )

In [5]:
train_df = (pd.read_csv('/home/Special_Problem/train_df_summary.csv'))['File'].to_list()

In [6]:
def get_normalize_bounding_box(x_min, y_min, bbox_width, bbox_height, img_width, img_height):
    x_center = (x_min + bbox_width / 2) / img_width
    y_center = (y_min + bbox_height / 2) / img_height
    w_norm = bbox_width / img_width
    h_norm = bbox_height / img_height
    return x_center, y_center, w_norm, h_norm

In [7]:
def save(bboxes, labels, image, file):
    try:
        basename = file.split('.')[0]
        img_height, img_width = image.shape[:2]   # NumPy shape gives (H, W, C)

        label_map = {
            'Cluster': 0, 'Clusters': 0,
            'Thyrocyte': 1, 'Thyrocytes': 1
        }

        # Paths
        image_path = f"/home/Special_Problem/yolo_dataset/images/train/augmented/{file}"
        label_path = f"/home/Special_Problem/yolo_dataset/labels/train/augmented/{basename}.txt"

        # Save augmented image
        # Image.fromarray(image).save(image_path)

        # Save annotations
        # with open(label_path, "w") as label_file:
        for box, label in zip(bboxes, labels):
            class_id = label_map[label]
            x_min, y_min, bbox_width, bbox_height = box

            # Convert to YOLO format
            x_center, y_center, w_norm, h_norm = get_normalize_bounding_box(
                x_min, y_min, bbox_width, bbox_height, img_width, img_height
            )

            # Write line: class_id x_center y_center w h
            # label_file.write(
            #     f"{class_id} {x_center:.6f} {y_center:.6f} {w_norm:.6f} {h_norm:.6f}\n"
            # )
    except:
        print("{file} Bounding Box Error: ")

In [8]:
dataset = os.walk('/home/Special_Problem/Data')
for root, dirs, files in dataset:
    for file in files:
        if file in train_df:
            # print(f"Processing image: {file}")
            format = file.split('.')[1]
            try:
                image = Image.open(os.path.join(root, file)).convert("RGB")
                
                path = os.path.join(root + " - ANNOTATED FILES", file.replace(f'.{format}', 'A.csv'))
                json_path = os.path.join(root + " - ANNOTATED FILES", file.replace(f'.{format}', 'B.json'))
                
                with open(json_path, 'r') as f:
                    json_data = json.load(f)
                df = pd.read_csv(path)
                
                df_labels  = df['label_name'].tolist()  # Assuming single class for simplicity
                df_bboxes = df[['bbox_x','bbox_y','bbox_width','bbox_height']].apply(lambda x: [x['bbox_x'], x['bbox_y'], x['bbox_width'], x['bbox_height']], axis=1).tolist()
                
                # From JSON polygons
                cluster_bboxes = polygon_to_bounding_box(json_data, file)
                cluster_labels = ["Cluster"] * len(cluster_bboxes)
                
                for box in cluster_bboxes:
                    if box[2] <= 0 or box[3] <= 0:
                        print(f"Invalid 1 bbox in {file}:", box)
                 
                for box in df_bboxes:
                    if box[2] <= 0 or box[3] <= 0:
                        print(f"Invalid 2 bbox in {file}:", box)
                        
                # Combine both
                all_bboxes = df_bboxes + cluster_bboxes
                all_labels = df_labels + cluster_labels
                
                # Augment
                # augmented = transform(image=np.array(image), bboxes=all_bboxes, labels=all_labels)
                # augmented_image = augmented['image']
                # augmented_bboxes = augmented['bboxes']
                # augmented_labels = augmented['labels']
                
                # save(augmented_bboxes, augmented_labels, augmented_image, file)
                
            except FileNotFoundError:
                print("File not found")

Invalid 2 bbox in LS-107.jpg: [np.int64(1304), np.int64(896), np.int64(1), np.int64(0)]
Invalid 2 bbox in LS-104.jpg: [np.int64(1304), np.int64(896), np.int64(1), np.int64(0)]
Invalid 2 bbox in LS-103.jpg: [np.int64(1304), np.int64(896), np.int64(1), np.int64(0)]
Invalid 2 bbox in LS-105.jpg: [np.int64(1304), np.int64(896), np.int64(1), np.int64(0)]
Invalid 2 bbox in LS-108.jpg: [np.int64(1304), np.int64(896), np.int64(1), np.int64(0)]
