In [None]:
import os
import numpy as np
import tifffile as tiff
import trimesh
from scipy.ndimage import distance_transform_edt, zoom
from pathlib import Path
import random

# ---------- CONFIG ----------
INPUT_DIR = "path/to/ct_tiff_volumes"        # Folder of grayscale CT .tiff volumes
OUTPUT_DIR = "datasets/my_ct_tsdf_dataset"   # Folder to save TSDF and partials
TSDF_RES = 32                                # Size of output TSDF grid (32x32x32)
THRESHOLD = 100                              # Intensity threshold for binarizing CT
TRUNC_DIST = 10                              # TSDF truncation distance
HOLES_PER_VOLUME = 3                         # How many hole variants to make per volume
MAX_HOLES = 5                                # Max primitives per hole
MISSING_RANGE = (0.05, 0.4)                  # Acceptable missing volume %

# Primitives to use for hole cutting
PRIMITIVES = ["box", "cylinder", "cone", "capsule", "uv_sphere"]
PRIM_SIZE_RANGE = (0.4, 0.9)


# ---------- UTILS ----------

def segment_ct(volume, method="auto", manual_thresh=12000):
    """
    Converts a CT volume to binary mask using threshold.
    method: "auto" (dynamic) or "manual"
    """
    if method == "manual":
        threshold = manual_thresh
    elif method == "auto":
        threshold = volume.min() + (volume.max() - volume.min()) * 0.1  # ~10% into intensity range
    else:
        raise ValueError("method must be 'auto' or 'manual'")

    print(f"[INFO] Using threshold: {threshold}")
    return (volume > threshold).astype(np.uint8)

def binary_to_tsdf(mask, trunc=10):
    mask = mask.astype(bool)
    dist_out = distance_transform_edt(~mask)
    dist_in = distance_transform_edt(mask)
    tsdf = dist_out - dist_in
    tsdf = np.clip(tsdf / trunc, -1, 1)
    return tsdf.astype(np.float32)

def resize_volume(vol, target_shape=(32,32,32)):
    zoom_factors = [t/s for t, s in zip(target_shape, vol.shape)]
    return zoom(vol, zoom_factors, order=1).astype(np.float32)

def make_primitive(name, size_range):
    if name == "box":
        dims = np.random.uniform(*size_range, 3)
        return trimesh.creation.box(extents=dims)
    elif name == "cylinder":
        return trimesh.creation.cylinder(
            radius=np.random.uniform(*size_range),
            height=np.random.uniform(*size_range)
        )
    elif name == "cone":
        return trimesh.creation.cone(
            radius=np.random.uniform(*size_range),
            height=np.random.uniform(*size_range)
        )
    elif name == "capsule":
        return trimesh.creation.capsule(
            radius=np.random.uniform(*size_range),
            height=np.random.uniform(*size_range)
        )
    elif name == "uv_sphere":
        return trimesh.creation.uv_sphere(
            radius=np.random.uniform(*size_range)
        )
    else:
        raise ValueError(f"Unknown primitive: {name}")

def randomly_place(mesh, target_bounds):
    # Place primitive within bounds
    bbmin, bbmax = target_bounds
    translation = np.random.uniform(bbmin, bbmax)
    mesh.apply_translation(translation)
    mesh.apply_transform(trimesh.transformations.random_rotation_matrix())
    return mesh

def subtract_holes(original_mesh, N=5):
    diff_mesh = original_mesh.copy()
    for _ in range(N):
        prim = make_primitive(random.choice(PRIMITIVES), PRIM_SIZE_RANGE)
        prim = randomly_place(prim, original_mesh.bounds * 0.8)
        try:
            diff_mesh = diff_mesh.difference(prim, check_volume=False)
        except:
            continue
    return diff_mesh


# ---------- MAIN ----------

def process_ct_file(tiff_path, output_dir):
    volume = tiff.imread(tiff_path)  # shape: (50,50,50)
    name = Path(tiff_path).stem

    binary = segment_ct(volume, THRESHOLD)
    tsdf_gt = binary_to_tsdf(binary, TRUNC_DIST)
    tsdf_gt = resize_volume(tsdf_gt, (TSDF_RES, TSDF_RES, TSDF_RES))

    # Save ground truth TSDF
    gt_path = Path(output_dir) / f"{TSDF_RES}" / "gt"
    gt_path.mkdir(parents=True, exist_ok=True)
    np.save(gt_path / f"{name}.npy", tsdf_gt)

    # Convert mesh for cutting
    verts, faces = trimesh.voxel.ops.matrix_to_marching_cubes(binary)
    full_mesh = trimesh.Trimesh(vertices=verts, faces=faces)

    for i in range(HOLES_PER_VOLUME):
        for _ in range(30):  # try up to 30 times to get valid cut
            mesh_hole = subtract_holes(full_mesh, N=random.randint(1, MAX_HOLES))
            try:
                binary_hole = mesh_hole.voxelized(pitch=1.0).matrix.astype(np.uint8)
                tsdf_hole = binary_to_tsdf(binary_hole, TRUNC_DIST)
                tsdf_hole = resize_volume(tsdf_hole, (TSDF_RES, TSDF_RES, TSDF_RES))

                # Check missing volume ratio
                vol_gt = np.count_nonzero(tsdf_gt <= 0)
                vol_hole = np.count_nonzero(tsdf_hole <= 0)
                missing = (vol_gt - vol_hole) / vol_gt
                if missing < MISSING_RANGE[0] or missing > MISSING_RANGE[1]:
                    continue

                missing_bucket = round(missing, 1)
                save_dir = Path(output_dir) / f"{TSDF_RES}" / f"{missing_bucket}"
                save_dir.mkdir(parents=True, exist_ok=True)
                np.save(save_dir / f"{name}_{i}.npy", tsdf_hole)
                break  # success!
            except:
                continue

    print(f"✅ Processed {name}")


if __name__ == "__main__":
    Path(OUTPUT_DIR).mkdir(parents=True, exist_ok=True)
    tiff_files = sorted(Path(INPUT_DIR).glob("*.tiff"))

    for file in tiff_files:
        process_ct_file(file, OUTPUT_DIR)


In [None]:
import matplotlib.pyplot as plt
plt.imshow(volume[25], cmap='gray')
plt.contour(binary_mask[25], colors='red')
plt.title("Check Segmentation")
plt.show()

In [None]:
import os
import numpy as np
import tifffile as tiff
from scipy.ndimage import distance_transform_edt, zoom

def segment_ct(volume, threshold=12000):
    return (volume > threshold).astype(np.uint8)

def binary_to_tsdf(mask, trunc=10):
    dist_out = distance_transform_edt(~mask)
    dist_in = distance_transform_edt(mask)
    tsdf = dist_out - dist_in
    tsdf = np.clip(tsdf / trunc, -1, 1)
    return tsdf.astype(np.float32)

def resize_to_shape(volume, shape=(32, 32, 32)):
    factors = [t/s for t, s in zip(shape, volume.shape)]
    return zoom(volume, factors, order=1)

def create_random_hole(volume, hole_size=10):
    """Zero out a random cube inside the volume."""
    x, y, z = [np.random.randint(0, s - hole_size) for s in volume.shape]
    volume_with_hole = volume.copy()
    volume_with_hole[x:x+hole_size, y:y+hole_size, z:z+hole_size] = 1  # simulate air (far)
    return volume_with_hole

def preprocess_volume(tiff_path, save_path, name):
    vol = tiff.imread(tiff_path)
    mask = segment_ct(vol)
    tsdf_full = binary_to_tsdf(mask)
    tsdf_full = resize_to_shape(tsdf_full)

    mask_with_hole = create_random_hole(mask)
    tsdf_partial = binary_to_tsdf(mask_with_hole)
    tsdf_partial = resize_to_shape(tsdf_partial)

    np.savez_compressed(os.path.join(save_path, f"{name}.npz"),
                        gt=tsdf_full,
                        cond=tsdf_partial)

if __name__ == "__main__":
    tiff_folder = "/path/to/tiffs"
    output_folder = "/path/to/tsdf_data"
    os.makedirs(output_folder, exist_ok=True)
    filelist = []

    for fname in sorted(os.listdir(tiff_folder)):
        if fname.endswith(".tiff") or fname.endswith(".tif"):
            name = os.path.splitext(fname)[0]
            preprocess_volume(os.path.join(tiff_folder, fname), output_folder, name)
            filelist.append(name)

    # Write the file list
    with open(os.path.join(output_folder, "filelist.txt"), "w") as f:
        for name in filelist:
            f.write(name + "\n")


In [None]:
import os
import numpy as np
import tifffile as tiff
from scipy.ndimage import distance_transform_edt, zoom
from skimage.filters import threshold_otsu

def segment_ct(volume):
    thresh = threshold_otsu(volume)
    return (volume > thresh).astype(np.uint8)

def binary_to_tsdf(mask, trunc=10):
    dist_out = distance_transform_edt(1 - mask)
    dist_in = distance_transform_edt(mask)
    tsdf = dist_out - dist_in
    tsdf = np.clip(tsdf / trunc, -1, 1)
    return tsdf.astype(np.float32)

def resize_to_shape(volume, shape=(32, 32, 32)):
    factors = [t / s for t, s in zip(shape, volume.shape)]
    return zoom(volume, factors, order=1)

def create_random_hole(mask, hole_size=10):
    mask = mask.copy()
    x, y, z = [np.random.randint(0, s - hole_size) for s in mask.shape]
    mask[x:x+hole_size, y:y+hole_size, z:z+hole_size] = 0
    return mask

def preprocess_volume(tiff_path, save_path, name, output_size=(32, 32, 32)):
    vol = tiff.imread(tiff_path)
    mask = segment_ct(vol)
    tsdf_full = binary_to_tsdf(mask)
    tsdf_full = resize_to_shape(tsdf_full, shape=output_size)

    mask_with_hole = create_random_hole(mask)
    tsdf_partial = binary_to_tsdf(mask_with_hole)
    tsdf_partial = resize_to_shape(tsdf_partial, shape=output_size)

    out_path = os.path.join(save_path, f"{name}.npz")
    np.savez_compressed(out_path, gt=tsdf_full, cond=tsdf_partial)

if __name__ == "__main__":
    tiff_folder = "/path/to/your/tiff/files"  # ← Change this
    save_folder = "./tsdf_dataset"
    os.makedirs(save_folder, exist_ok=True)

    filelist = []
    for fname in sorted(os.listdir(tiff_folder)):
        if "r02" in fname and fname.endswith((".tiff", ".tif")):
            name = os.path.splitext(fname)[0]
            print(f"✅ Processing: {name}")
            preprocess_volume(os.path.join(tiff_folder, fname), save_folder, name)
            filelist.append(name)

    with open(os.path.join(save_folder, "filelist.txt"), "w") as f:
        for name in filelist:
            f.write(name + "\n")

    print(f"\n✅ Done! Saved {len(filelist)} files in: {save_folder}")