<a href="https://colab.research.google.com/github/apham2-del/Liver-segmentationND/blob/main/CT_patch_preprocessing_script_with_JSON_placeholders.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Step 1: Mount Google Drive
from google.colab import drive
drive.mount('/content/my_drive')  # change if you want a different folder

# Step 2: Import libraries
import os
import nibabel as nib
import numpy as np
import json

# Step 3: Set paths
dataset_dir = "/content/my_drive/MyDrive/liver_datasetIRCAD1"
patch_output_dir = "/content/my_drive/MyDrive/liver_datasetIRCAD1_patches"
os.makedirs(patch_output_dir, exist_ok=True)

# Parameters
patch_size = 64   # 64x64 patches
stride = 32       # overlap between patches

# Step 4: List all NIfTI files in your dataset folder
files = [f for f in os.listdir(dataset_dir) if f.endswith(".nii")]
print("Found NIfTI files:", files)

# Step 5: Loop through each CT volume
for file in files:
    ct_path = os.path.join(dataset_dir, file)
    ct_name = os.path.splitext(file)[0]

    # Load CT volume
    ct = nib.load(ct_path).get_fdata()
    print(f"Processing {file}, shape: {ct.shape}")

    # Loop through slices along z-axis
    for z in range(ct.shape[2]):
        slice_2d = ct[:, :, z]

        # Loop through patches
        for i in range(0, slice_2d.shape[0]-patch_size+1, stride):
            for j in range(0, slice_2d.shape[1]-patch_size+1, stride):
                patch = slice_2d[i:i+patch_size, j:j+patch_size]

                # Save patch as .npy
                patch_filename = f"{ct_name}_slice{z}_patch_{i}_{j}.npy"
                patch_path = os.path.join(patch_output_dir, patch_filename)
                np.save(patch_path, patch)

                # Generate JSON placeholder for LLaVA-Med
                json_data = {
                    "patch_file": patch_filename,
                    "slice_index": z,
                    "patch_position": [i, j],
                    "lesion_present": None,     # to be filled by LLaVA-Med
                    "lesion_type": None,        # to be filled by LLaVA-Med
                    "confidence": None,         # to be filled by LLaVA-Med
                    "comments": None            # to be filled by LLaVA-Med
                }

                # Save JSON alongside patch
                json_filename = f"{ct_name}_slice{z}_patch_{i}_{j}.json"
                json_path = os.path.join(patch_output_dir, json_filename)
                with open(json_path, 'w') as jf:
                    json.dump(json_data, jf, indent=4)

print("✅ All patches and JSON placeholders created in:", patch_output_dir)


Drive already mounted at /content/my_drive; to attempt to forcibly remount, call drive.mount("/content/my_drive", force_remount=True).
Found NIfTI files: ['ircad_e17_orig.nii', 'ircad_e18_orig.nii', 'ircad_e19_orig.nii', 'ircad_e20_orig.nii', 'ircad_e13_orig.nii', 'ircad_e14_orig.nii', 'ircad_e15_orig.nii', 'ircad_e16_orig.nii', 'ircad_e12_orig.nii', 'ircad_e11_orig.nii', 'ircad_e10_orig.nii', 'ircad_e09_orig.nii', 'ircad_e08_orig.nii', 'ircad_e07_orig.nii', 'ircad_e06_orig.nii', 'ircad_e05_orig.nii', 'ircad_e04_orig.nii', 'ircad_e03_orig.nii', 'ircad_e02_orig.nii', 'ircad_e01_orig.nii']
Processing ircad_e17_orig.nii, shape: (512, 512, 119)


In [None]:
# Step 1: Mount Google Drive
from google.colab import drive
drive.mount('/content/my_drive')

# Step 2: Imports
import os
import nibabel as nib
import numpy as np
import json

# Step 3: Paths
dataset_dir = "/content/my_drive/MyDrive/liver_datasetIRCAD1"
patch_output_dir = "/content/my_drive/MyDrive/liver_datasetIRCAD1_patches"
os.makedirs(patch_output_dir, exist_ok=True)

# Parameters
patch_size = 64
stride = 32

# Step 4: List all NIfTI files
all_files = [f for f in os.listdir(dataset_dir) if f.endswith(".nii")]

# Pick only 2 files
files = all_files[:2]
print("Processing these files:", files)

# Step 5: Loop through selected volumes
for file in files:
    ct_path = os.path.join(dataset_dir, file)
    ct_name = os.path.splitext(file)[0]

    # Load CT volume
    ct = nib.load(ct_path).get_fdata()
    print(f"Processing {file}, shape: {ct.shape}")

    # Loop through slices (z-axis)
    for z in range(ct.shape[2]):
        slice_2d = ct[:, :, z]

        # Loop through **full slice** in x and y
        for i in range(0, slice_2d.shape[0]-patch_size+1, stride):
            for j in range(0, slice_2d.shape[1]-patch_size+1, stride):
                patch = slice_2d[i:i+patch_size, j:j+patch_size]

                # Save patch as .npy
                patch_filename = f"{ct_name}_slice{z}_patch_{i}_{j}.npy"
                patch_path = os.path.join(patch_output_dir, patch_filename)
                np.save(patch_path, patch)

                # JSON placeholder for LLaVA-Med
                json_data = {
                    "patch_file": patch_filename,
                    "slice_index": z,
                    "patch_position": [i, j],
                    "lesion_present": None,
                    "lesion_type": None,
                    "confidence": None,
                    "comments": None
                }
                json_filename = f"{ct_name}_slice{z}_patch_{i}_{j}.json"
                json_path = os.path.join(patch_output_dir, json_filename)
                with open(json_path, 'w') as jf:
                    json.dump(json_data, jf, indent=4)

print("✅ All patches and JSON placeholders created in:", patch_output_dir)


Mounted at /content/my_drive
Processing these files: ['ircad_e17_orig.nii', 'ircad_e18_orig.nii']
Processing ircad_e17_orig.nii, shape: (512, 512, 119)
Processing ircad_e18_orig.nii, shape: (512, 512, 74)
✅ All patches and JSON placeholders created in: /content/my_drive/MyDrive/liver_datasetIRCAD1_patches
