<a href="https://colab.research.google.com/github/amorenooya/VIU_masterthesis/blob/main/0_finetuning_preprocess.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# ATENCIÓN!! Modificar ruta relativa a la práctica si es distinta (drive_root)
mount='/content/gdrive'
drive_root = mount + "/My Drive/TFM"

try:
  from google.colab import drive
  IN_COLAB=True
except:
  IN_COLAB=False

In [None]:
from google.colab import drive
drive.mount('/content/drive')

# Define the path to your TFM folder
drive_root = "/content/drive/My Drive/TFM"

# Add the folder to Python's module search path
import sys
sys.path.append(drive_root)  # Now Python can find `transformer_maskgit`

In [None]:
!pip install pydicom nibabel

In [None]:
import os
import numpy as np
import pydicom
import nibabel as nib

from pydicom.filereader import dcmread
from pydicom.errors import InvalidDicomError
from tqdm import tqdm

# Paths
input_root = f'{drive_root}/manifest-1751367906950'      # Folder with patient subdirectories
output_root =  f'{drive_root}/manifest-1751367906950/preprocessed'   # Where to save .nii.gz files

os.makedirs(output_root, exist_ok=True)

In [None]:
# Helper to check if folder contains DICOM files
def is_dicom_folder(path):
    try:
        for file in os.listdir(path):
            full_path = os.path.join(path, file)
            if os.path.isfile(full_path):
                dcm = dcmread(full_path, stop_before_pixels=True)
                return True
    except InvalidDicomError:
        return False
    return False

In [None]:
import os
import numpy as np
import nibabel as nib
from pydicom import dcmread

# Process each DICOM folder
for root, dirs, files in os.walk(input_root):
    dicom_files = [f for f in files if f.lower().endswith('.dcm')]
    if len(dicom_files) >= 3:  # Heuristic: folder with multiple slices
        try:
            # Construct output path
            rel_path = os.path.relpath(root, input_root)
            output_path = os.path.join(output_root, rel_path)
            os.makedirs(os.path.dirname(output_path), exist_ok=True)
            output_file = output_path + '.nii.gz'

            # Skip if already processed
            if os.path.exists(output_file):
                print(f"Skipping (already processed): {output_file}")
                continue

            dicom_paths = [os.path.join(root, f) for f in dicom_files]

            # Read slices and sort by InstanceNumber or ImagePositionPatient
            slices = []
            for path in dicom_paths:
                ds = dcmread(path)
                if hasattr(ds, 'InstanceNumber'):
                    slices.append((ds.InstanceNumber, ds))
                elif hasattr(ds, 'ImagePositionPatient'):
                    slices.append((ds.ImagePositionPatient[2], ds))

            if len(slices) < 3:
                continue  # Not a valid 3D scan

            # Sort slices along z-axis
            slices.sort(key=lambda x: x[0])
            dicom_series = [s[1] for s in slices]

            # Extract voxel data and metadata
            images = []
            for ds in dicom_series:
                image = ds.pixel_array.astype(np.float32)
                hu = image * float(ds.RescaleSlope) + float(ds.RescaleIntercept)
                hu = np.clip(hu, -1000, 1000)
                images.append(hu)

            volume = np.stack(images, axis=0)  # Shape: [Z, Y, X]
            print(f"Volume shape for {rel_path}: {volume.shape}")

            # Compute affine from DICOM metadata
            first_ds = dicom_series[0]
            pixel_spacing = [float(sp) for sp in first_ds.PixelSpacing]
            slice_thickness = float(getattr(first_ds, 'SliceThickness', 1.0))

            affine = np.diag([
                pixel_spacing[1],           # x spacing
                pixel_spacing[0],           # y spacing
                slice_thickness,            # z spacing
                1
            ])

            # Save NIfTI
            nifti_img = nib.Nifti1Image(volume, affine)
            nib.save(nifti_img, output_file)
            print(f"Saved NIfTI: {output_file}")

        except Exception as e:
            print(f"Failed: {root}\nReason: {e}")