In [1]:
import os
import numpy as np
import pandas as pd
import json
import shutil

from copy import deepcopy


In [15]:
# Create YOLO format label
# YOLO format: <class> <x_center> <y_center> <width> <height>
# Values are normalized to [0, 1]
# {"file_name": "0001.png", "objects": {"bbox": [[302.0, 109.0, 73.0, 52.0]], "categories": [0]}}
BBOXS_SIZE = 70
trust = 2


val_split = 0.1
test_split = 0.15

copy_files = True

detections = {
    "images": [],  # list of all images in the dataset
    "annotations": [],  # list of all annotations in the dataset
    "categories": []  # list of all categories
}
count_labl = 0

meta_train = []
meta_test = []
meta_val = []


source = []
destination = []


def splittung(data, val_split=0.1, test_split=0.15):
    # test split
    size_det = len(data)

    test_split_len = int(size_det*test_split)

    test_tomos = np.random.choice(
        data, replace=False, size=test_split_len)

    train_tomos = []
    tomo=()
    [train_tomos.append(tomo) for tomo in data if tomo not in test_tomos]

    # Validation split
    size_train = len(train_tomos)
    val_split_len = int(size_train*val_split)

    val_tomos = np.random.choice(
        train_tomos, replace=False, size=val_split_len)
    train_tomos_new = []
    tomo=()
    [train_tomos_new.append(tomo)for tomo in train_tomos if tomo not in val_tomos]
    train_tomos = train_tomos_new

    return train_tomos, test_tomos, val_tomos


def transform_labels_to_coco(labels, trust):

    detections = {
        "images": [],  # list of all images in the dataset
        "annotations": [],  # list of all annotations in the dataset
        "categories": []  # list of all categories
    }

    detec_count = 0
    img_id_list = []

    for label in labels:

        z=label[2]

        for b in np.arange(int(z)-trust, int(z)+trust+1):
            if b < 0:
                continue
            width = float(BBOXS_SIZE)
            height = float(BBOXS_SIZE)
            x_center=label[4]
            y_center=label[3]

            if BBOXS_SIZE/2 > x_center or BBOXS_SIZE/2 > (label[-3]-x_center):
                width = float((x_center-1)/2)
            if BBOXS_SIZE/2 > y_center or BBOXS_SIZE/2 > (label[-4]-y_center):
                height = float((y_center-1)/2)

            x_top_left=int(x_center-width/2)
            y_top_left=int(y_center-height/2)

            img_id = int(f"{int(label[1].split('_')[1],16)}{b:04d}")
            img_name = f"{label[1]}_slice_{b:04d}.jpg"

            if img_id not in img_id_list:
                img_id_list.append(img_name)
                detections["images"].append({"id": img_id,
                                             "width": label[-3],
                                             "height": label[-4],
                                             "file_name": img_name
                                             })

            detections["annotations"].append({"id": detec_count,
                                              "image_id": img_id,  # the id of the image that the annotation belongs to
                                              "category_id": 0,  # the id of the category that the annotation belongs to
                                              "area": float(width*height),
                                              "bbox": [x_top_left, y_top_left, width, height],
                                              "iscrowd": 0
                                              })
            
            detec_count += 1

    detec_count = 0
    
    return detections


def get_coco_meta_file(detections, tomo_splits, coco_dir_path, source_path, copy_files=False):
    meta_val = {
    "images": [],  
    "annotations": [],  
    "categories": [{"id": 0, "name": "Motor","supercategory": None}]  
    }

    meta_train = {
    "images": [],  
    "annotations": [],  
    "categories": [{"id": 0, "name": "Motor","supercategory": None}]  
    }

    meta_test = {
    "images": [],  
    "annotations": [],  
    "categories": [{"id": 0, "name": "Motor","supercategory": None}]  
    }


    train_split, test_split, val_split = tomo_splits
    train_id = [int(i.split('_')[1], 16) for i in train_split]
    test_id = [int(i.split('_')[1], 16) for i in test_split]
    val_id = [int(i.split('_')[1], 16) for i in val_split]



    for image in detections["images"]:
        
        tomo_name=f"{image['file_name'].split('_')[0]}_{image['file_name'].split('_')[1]}"

        if tomo_name in train_split:
            split_list=image["file_name"].split("_")
            destination = coco_dir_path+"train/"+image["file_name"]
            source = source_path+f"{split_list[0]}_{split_list[1]}/{split_list[2]}_{split_list[-1]}"

            if not os.path.isdir(os.path.dirname(destination)):
                os.mkdir(os.path.dirname(destination))
            shutil.copyfile(source, destination)

            meta_train["images"].append(image)

        if tomo_name in test_split:
            meta_test["images"].append(image)
            split_list=image["file_name"].split("_")
            destination = coco_dir_path+"test/"+image["file_name"]
            source = source_path+f"{split_list[0]}_{split_list[1]}/{split_list[2]}_{split_list[-1]}"

            if not os.path.isdir(os.path.dirname(destination)):
                os.mkdir(os.path.dirname(destination))
            shutil.copyfile(source, destination)

            meta_test["images"].append(image)

        if tomo_name in val_split:
            split_list=image["file_name"].split("_")
            destination = coco_dir_path+"valid/"+image["file_name"]
            source = source_path+f"{split_list[0]}_{split_list[1]}/{split_list[2]}_{split_list[-1]}"

            if not os.path.isdir(os.path.dirname(destination)):
                os.mkdir(os.path.dirname(destination))
            shutil.copyfile(source, destination)
            
            meta_val["images"].append(image)

    for label in detections["annotations"]:
        if int(str(label["image_id"])[:-4]) in train_id:
            meta_train["annotations"].append(label)
        elif int(str(label["image_id"])[:-4]) in test_id:
            meta_test["annotations"].append(label)
        elif int(str(label["image_id"])[:-4]) in val_id:
            meta_val["annotations"].append(label)


    meta_path="coco/"
    with open(meta_path+"train/_annotations.coco.json", 'w') as json_output:
        json.dump(meta_train, json_output)
        json_output.write('\n')

    with open(meta_path+"test/_annotations.coco.json", 'w') as json_output:
        json.dump(meta_test, json_output)
        json_output.write('\n')


    with open(meta_path+"valid/_annotations.coco.json", 'w') as json_output:
        json.dump(meta_val, json_output)
        json_output.write('\n')


if __name__ == "__main__":

    df=pd.read_csv("train_labels.csv")

    # to list
    labels= df[df["Motor axis 0"]!=-1].values
    len(labels)

    tomot_with_motor_list = df[df["Number of motors"] > 0]["tomo_id"].unique()

    data = splittung(tomot_with_motor_list)
    print(len(data[0]),len(data[1]),len(data[2]))
    detections = transform_labels_to_coco(labels, trust)

    dest_path = "coco/"
    source_path = "train/"

    get_coco_meta_file(detections, data, dest_path, source_path, copy_files=True)


278 54 30


In [None]:

def normalize_slice(slice_data):
    """
    Normalize slice data using 2nd and 98th percentiles
    """
    # Calculate percentiles
    p2 = np.percentile(slice_data, 2)
    p98 = np.percentile(slice_data, 98)
    
    # Clip the data to the percentile range
    clipped_data = np.clip(slice_data, p2, p98)
    
    # Normalize to [0, 255] range
    normalized = 255 * (clipped_data - p2) / (p98 - p2)
    
    return np.uint8(normalized)

In [None]:
import albumentations as A
import cv2 as cv
import matplotlib.pyplot as plt

train_pipeline = A.Compose(
    [
    A.SquareSymmetry(p=1),
    A.OneOf([
        A.RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2, p=0.7),
        A.HueSaturationValue(hue_shift_limit=20, sat_shift_limit=30, val_shift_limit=20, p=0.7),
        A.RandomGamma(gamma_limit=(80, 120), p=0.8),
    ], p=0.7),
    A.OneOf([
        A.GaussianBlur(blur_limit=(3, 7), p=0.5),
        A.MedianBlur(blur_limit=5, p=0.5),
        A.MotionBlur(blur_limit=(3, 7), p=0.5),
    ], p=0.5),
        
    A.OneOf([
        A.GaussNoise(std_limit=(0.1, 0.2), p=0.5),
        A.ISONoise(color_shift=(0.01, 0.05), intensity=(0.1, 0.5), p=0.5),
        A.MultiplicativeNoise(multiplier=(0.9, 1.1), per_channel=True, p=0.5),
        A.SaltAndPepper(p=0.5)
    ], p=0.5)
    # Add bbox_params or keypoint_params if dealing with bounding boxes or keypoints
    ],   
    #bbox_params=A.BboxParams(format='coco', label_fields=[])
)

#Remember to visualize the output!

img_list_dir=os.listdir("coco/train")

for p, img in enumerate(img_list_dir):

    if img[0]!="_":
        image = cv.imread("coco/train/"+img)
        image = cv.cvtColor(image, cv.COLOR_BGR2RGB)
        orignal_norm=normalize_slice(image)
        augmented=train_pipeline(image=image)
        augmented_norm=normalize_slice(augmented["image"])
        cv.imwrite(f"coco/train/aug/{p:04d}"+img,augmented_norm)
        cv.imwrite(f"coco/train/aug/"+img,augmented_norm)
    


  A.GaussNoise(std_limit=(0.1, 0.2), p=0.5),
