# Data Preprocessing

## Experiments with CECT & NCCT

In [None]:
import os
import json

def generate_directory_structure(main_directory):
    structure = {}

    for round_name in os.listdir(main_directory):
        round_path = os.path.join(main_directory, round_name)
        if os.path.isdir(round_path):
            structure[round_name] = {}

            for control_name in os.listdir(round_path):
                control_path = os.path.join(round_path, control_name)
                if os.path.isdir(control_path):
                    structure[round_name][control_name] = {
                        "Contrast": {},
                        "Non-Contrast": {}
                    }

                    for category in ["Contrast", "Non-Contrast"]:
                        category_path = os.path.join(control_path, category)
                        if os.path.exists(category_path):
                            for image_id in os.listdir(category_path):
                                image_path = os.path.join(category_path, image_id)
                                if os.path.isdir(image_path):
                                    files = os.listdir(image_path)
                                    structure[round_name][control_name][category][image_id] = files

    return structure


if __name__ == "__main__":
    main_directory = r"[path]" 
    directory_structure = generate_directory_structure(main_directory)
    print(json.dumps(directory_structure, indent=4))


## Creating entire dataset structure.

In [None]:
directory_structure

In [None]:
import shutil

def extract_images(main_directory, directory_structure, output_dir, flag="both"):
    os.makedirs(output_dir, exist_ok=True)
    for round_name, controls in directory_structure.items():
        sanitized_round_name = round_name.replace(" ", "_")

        for control_name, categories in controls.items():
            sanitized_control_name = control_name.replace(" ", "_")

            for category, images in categories.items():
                sanitized_category = category.replace(" ", "_")

                if not (category.lower() == flag.lower() or flag == "both"):
                    continue

                for image_id, files in images.items():
                    sanitized_image_id = image_id.replace(" ", "_")
                    image_dir = os.path.join(main_directory, round_name, control_name, category, image_id)

                    label_file = None
                    image_files = []

                    for file in files:
                        if file.endswith(".json"):
                            continue 

                        if file.endswith(".nii.gz") and not "_" in file:
                            label_file = file
                        else:
                            image_files.append(file)

                    if label_file:
                        label_dest_dir = os.path.join(output_dir, "labelsTr")
                        os.makedirs(label_dest_dir, exist_ok=True)
                    
                        os.makedirs(os.path.join(output_dir, "imagesTs"), exist_ok=True)

                        src_label = os.path.join(image_dir, label_file)
                        dest_label = os.path.join(
                            label_dest_dir, f"{sanitized_round_name}_{sanitized_control_name}_{sanitized_category}_{sanitized_image_id}.nii.gz"
                        )
                        shutil.copy(src_label, dest_label)
                        print(f"Copied label: {src_label} to {dest_label}", end="\n\n")

                    for image_file in image_files:
                        image_dest_dir = os.path.join(output_dir, "imagesTr")
                        os.makedirs(image_dest_dir, exist_ok=True)

                        src_image = os.path.join(image_dir, image_file)
                        dest_image = os.path.join(
                            image_dest_dir, f"{sanitized_round_name}_{sanitized_control_name}_{sanitized_category}_{sanitized_image_id}.nii.gz"
                        )
                        shutil.copy(src_image, dest_image)
                        print(f"Copied image: {src_image} to {dest_image}", end="\n\n")


### Json data list

In [None]:
def generate_json(output_dir):
    image_dir = os.path.join(output_dir, "imagesTr")
    label_dir = os.path.join(output_dir, "labelsTr")

    json_data = []

    if os.path.exists(image_dir) and os.path.exists(label_dir):
        image_files = os.listdir(image_dir)
        label_files = os.listdir(label_dir)

        for image_file in image_files:
            corresponding_label = image_file.replace("_image.nii.gz", "_label.nii.gz")

            if corresponding_label in label_files:
                json_data.append({
                    "image": f"./imagesTr/{image_file}",
                    "label": f"./labelsTr/{corresponding_label}"
                })

    print(f"Total Length: {len(json_data)}", end="\n\n")
    print(json.dumps(json_data, indent=4))
#     with open(os.path.join(output_dir, "dataset.json"), "w") as json_file:
#         json.dump(json_data, json_file, indent=4)
#     print(f"JSON dataset saved to {os.path.join(output_dir, 'dataset.json')}")

### Creating a global combined Data

In [None]:
main_dir = r"[path]"
main_output_dir = r"[path]"

extract_images(main_dir, directory_structure, main_output_dir, "both")

### Creating Liming Data Structure

#### Contrast

In [None]:
contrast_output_dir = r"[path]"

main_dir = r"[path]"

directory_structure = generate_directory_structure(main_dir)

extract_images(main_dir, directory_structure, contrast_output_dir, "contrast")

In [None]:
generate_json(contrast_output_dir)

#### Non-Contrast

In [None]:

non_contrast_output_dir =  r"[path]"

main_dir = r"[path]"

directory_structure = generate_directory_structure(main_dir)

extract_images(main_dir, directory_structure, non_contrast_output_dir, "non-contrast")

In [None]:
generate_json(non_contrast_output_dir)

#### Contrast & Non-Contrast

In [None]:


combined_output_dir =  r"[path]"

main_dir = r"[path]"

directory_structure = generate_directory_structure(main_dir)

extract_images(main_dir, directory_structure, combined_output_dir, "both")

In [None]:
generate_json(combined_output_dir)

### Testcases

In [None]:
testcases_dir = "[path]"

generate_json(testcases_dir)

## The End!