# Copying all images to images directory and all labels to labels directory

In [None]:
import os
import shutil
import jsonlines

In [None]:
root_dir = f"[path]"
data_dir = f"[path]"
split_dir = f"[path]"

## This code move images to images folder and label after renaming same as image name to label folder

In [None]:
def prepare_training_data(root_dir, output_dir):

    images_dir = os.path.join(output_dir, 'images')
    labels_dir = os.path.join(output_dir, 'labels')
    os.makedirs(images_dir, exist_ok=True)
    os.makedirs(labels_dir, exist_ok=True)

    for root, dirs, files in os.walk(root_dir):
        for file in files:
            file_path = os.path.join(root, file)
            
            if file.lower().endswith('.json'):
                continue
            if file.lower().startswith('im') and file.lower().count('_') == 0:
                
                
                label_name = file
                for i in files:
                    
                    if i.lower().endswith('.json'):
                        continue
                    if i.lower().startswith('im') and i.lower().count('_') == 0:
                        continue
                    elif i.lower().endswith('.nii.gz'):
                        label_name = i
                        
                    
                shutil.copy(file_path, os.path.join(labels_dir, label_name))
                print(f"Copied label: {file_path} to {labels_dir}")
            elif file.lower().endswith('.nii.gz'):
                
                shutil.copy(file_path, os.path.join(images_dir, file))
                print(f"Copied image: {file_path} to {images_dir}")

In [None]:
prepare_training_data(root_dir, data_dir)

### Showing list images for split instances.

In [None]:
data = os.listdir(data_dir + "/images")
data

### Count contrast and non-contrast images.

In [None]:
def count_constrast(data):
    n = len(data)
    count = 0
    for i in data:
        if "Non-Contrast" in i:
            count += 1     
    print(f"Total Images are {n}")
    print(f"Contrast images are {n-count}")
    print(f"Non-Contrast images are {count}")

## Spliting data into train, test, and validation into 70:15:15 ratio with equal participation of contrast and non-contrast instances.

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split

contrast_files = [file for file in data if '_Contrast' in file]
non_contrast_files = [file for file in data if 'Non-Contrast' in file]


c_df = pd.DataFrame(contrast_files)

nc_df = pd.DataFrame(non_contrast_files)


train_c_df, temp_df = train_test_split(c_df, test_size=0.3, random_state=42)
val_c_df, test_c_df = train_test_split(temp_df, test_size=0.5, random_state=42)


train_df, temp_df = train_test_split(nc_df, test_size=0.3, random_state=42)
val_df, test_df = train_test_split(temp_df, test_size=0.5, random_state=42)

train_df = pd.concat([train_df, train_c_df])
val_df = pd.concat([val_df, val_c_df])
test_df = pd.concat([test_df, test_c_df])


train_df.to_excel('train_images.xlsx', index=False)
val_df.to_excel('val_images.xlsx', index=False)
test_df.to_excel('test_images.xlsx', index=False)


train_df.to_json('train_images.jsonl', orient='records', lines=True)
val_df.to_json('val_images.jsonl', orient='records', lines=True)
test_df.to_json('test_images.jsonl', orient='records', lines=True)

print("Data has been split and saved into JSON and JSONL files.")


### Contrast and Non-contrast count.

In [None]:
print(f"Total Instances Split:")
count_constrast(data)

print("\nTraining Data Split:")
count_constrast(list(train_df[0]))

print("\nValidation Data Split:")
count_constrast(list(val_df[0]))

print("\nTest Data Split:")
count_constrast(list(test_df[0]))

## Copying files and per train, val, and test instances split.

In [None]:

source_img_dir = f'{data_dir}\\images'
source_label_dir = f'{data_dir}\\labels'
train_dir = f'{split_dir}\\train'
val_dir =  f'{split_dir}\\val'
test_dir =  f'{split_dir}\\test'


os.makedirs(f"{train_dir}\\images", exist_ok=True)
os.makedirs(f"{val_dir}\\images", exist_ok=True)
os.makedirs(f"{test_dir}\\images", exist_ok=True)


os.makedirs(f"{train_dir}\\labels", exist_ok=True)
os.makedirs(f"{val_dir}\\labels", exist_ok=True)
os.makedirs(f"{test_dir}\\labels", exist_ok=True)


def copy_images(file_list, target_dir):
    with jsonlines.open(file_list) as reader:
        for obj in reader:
            
            img_name = obj['0']  
            src_img_path = os.path.join(source_img_dir, img_name)
            src_label_path = os.path.join(source_label_dir, img_name)

            dst_img_path = os.path.join(target_dir, "images", img_name)
            dst_label_path = os.path.join(target_dir, "labels", img_name)

            if os.path.exists(src_img_path) and os.path.exists(src_label_path):
                print(f"Copying image from {src_img_path} to {dst_img_path}")
                print(f"Copying label from {src_label_path} to {dst_label_path}", end="\n\n")
            
                shutil.copy(src_img_path, dst_img_path)
                shutil.copy(src_label_path, dst_label_path)
            else:
                print(f"Warning: {src_img_path} or {src_label_path} does not exist.")


copy_images('train_images.jsonl', train_dir)
copy_images('val_images.jsonl', val_dir)
copy_images('test_images.jsonl', test_dir)

print("Image copying completed.")
