# Image processing

In [1]:
input_folder  = "input-folder"

In [None]:
import nibabel as nib
import numpy as np
import h5py
import os

def nifti_to_h5(nifti_file, h5_file, dataset_name):
    nii_img = nib.load(nifti_file)
    image_data = nii_img.get_fdata()
    image_data = np.transpose(image_data, (2, 0, 1))
    image_data[image_data == 4] = 3
    target_depth, target_height, target_width = 155, 240, 240
    start_h = (image_data.shape[1] - target_height) // 2
    start_w = (image_data.shape[2] - target_width) // 2
    image_data = image_data[:, start_h:start_h + target_height, start_w:start_w + target_width]
    

    with h5py.File(h5_file, 'a') as hf:
        if dataset_name in hf:
            print(f"Dataset {dataset_name} already exists in {h5_file}. Skipping.")
        else:
            hf.create_dataset(dataset_name, data=image_data, compression="gzip")
            print(f"Added {nifti_file} as {dataset_name} to {h5_file}")

def batch_convert_nifti_to_h5(input_folder):
    for case_folder in os.listdir(input_folder):
        case_path = os.path.join(input_folder, case_folder)
        
        if os.path.isdir(case_path):
            output_h5_file = os.path.join(input_folder, f"{case_folder}.h5")
            
            for file_name in os.listdir(case_path):
                if file_name.endswith('.nii.gz') or file_name.endswith('.nii'):
                    nifti_file = os.path.join(case_path, file_name)
                    
                    if 't1c' in file_name:
                        nifti_to_h5(nifti_file, output_h5_file, 'image')
                    elif 'seg' in file_name:
                        nifti_to_h5(nifti_file, output_h5_file, 'label')

batch_convert_nifti_to_h5(input_folder)

In [None]:
import h5py
# Replace 'your_file.h5' with the path to your HDF5 file
file_path = "PKG - BraTS-Africa\\BraTS-Africa\\95_Glioma\\BraTS-SSA-00228-000.h5"

# Open the file in read mode
with h5py.File(file_path, 'r') as h5_file:
    # List all the groups/datasets in the file
    print("Keys in the file:", list(h5_file.keys()))
    # Check the shape of the 'images' dataset
    if 'image' in h5_file:
        print("Image shape:", h5_file['image'].shape)
    
    # Check the shape of the 'labels' dataset
    if 'label' in h5_file:
        print("Label shape:", h5_file['label'].shape)

# Data split preparation

In [3]:
import os
import random
all_files = os.listdir("Africa-BraTS")

In [5]:
random.shuffle(all_files)

In [7]:
files = []
for filename in all_files:
    filename = filename[:-3]
    files.append(filename)

In [9]:
train_set = files[:60]
val_set = files[60:80]
test_set = files[80:]

In [11]:
# Optional: Save splits to text files
with open("Africa-BraTS\\train.txt", "w") as f:
    for name in train_set:
        f.write(name + "\n")

with open("Africa-BraTS\\val.txt", "w") as f:
    for name in val_set:
        f.write(name + "\n")

with open("Africa-BraTS\\test.txt", "w") as f:
    for name in test_set:
        f.write(name + "\n")