In [1]:
pip install juliacall

Collecting juliacall
  Using cached juliacall-0.9.31-py3-none-any.whl.metadata (4.5 kB)
Collecting juliapkg<0.2,>=0.1.21 (from juliacall)
  Using cached juliapkg-0.1.22-py3-none-any.whl.metadata (6.8 kB)
Collecting filelock<4.0,>=3.16 (from juliapkg<0.2,>=0.1.21->juliacall)
  Downloading filelock-3.20.3-py3-none-any.whl.metadata (2.1 kB)
Collecting semver<4.0,>=3.0 (from juliapkg<0.2,>=0.1.21->juliacall)
  Using cached semver-3.0.4-py3-none-any.whl.metadata (6.8 kB)
Collecting tomlkit<0.14,>=0.13.3 (from juliapkg<0.2,>=0.1.21->juliacall)
  Using cached tomlkit-0.13.3-py3-none-any.whl.metadata (2.8 kB)
Using cached juliacall-0.9.31-py3-none-any.whl (12 kB)
Using cached juliapkg-0.1.22-py3-none-any.whl (21 kB)
Downloading filelock-3.20.3-py3-none-any.whl (16 kB)
Using cached semver-3.0.4-py3-none-any.whl (17 kB)
Using cached tomlkit-0.13.3-py3-none-any.whl (38 kB)
Installing collected packages: tomlkit, semver, filelock, juliapkg, juliacall
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [3]:
pip uninstall curl

[0mNote: you may need to restart the kernel to use updated packages.


In [6]:
pip install SimpleITK

Collecting SimpleITK
  Using cached simpleitk-2.5.3-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (7.4 kB)
Using cached simpleitk-2.5.3-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (52.6 MB)
Installing collected packages: SimpleITK
Successfully installed SimpleITK-2.5.3
Note: you may need to restart the kernel to use updated packages.


In [9]:
pip install tqdm

Collecting tqdm
  Downloading tqdm-4.67.1-py3-none-any.whl.metadata (57 kB)
Downloading tqdm-4.67.1-py3-none-any.whl (78 kB)
Installing collected packages: tqdm
Successfully installed tqdm-4.67.1
Note: you may need to restart the kernel to use updated packages.


In [4]:
from juliacall import Main as jl
jl.seval('import Pkg; Pkg.add("Radiomics")')

   Resolving package versions...
     Project No packages added to or removed from `~/.venv/julia_env/Project.toml`
    Manifest No packages added to or removed from `~/.venv/julia_env/Manifest.toml`


In [10]:
import json
import time
from pathlib import Path

import numpy as np
import SimpleITK as sitk
from juliacall import Main as jl

# --- SET PATHS (edit these) ---
CT_DIR = Path("nlst_representive_sample_nifti")   # <- your CT folder (check spelling!)
MOOSE_DIR = Path("moose_segmentation_outputs")  # <- your MOOSE output folder

OUT_JSONL = Path("moose_radiomics_features.jsonl")
OUT_JSON  = Path("moose_radiomics_features.json")

# --- init Julia once ---
jl.seval("using Radiomics")

print("CT_DIR exists:", CT_DIR.exists(), CT_DIR.resolve())
print("MOOSE_DIR exists:", MOOSE_DIR.exists(), MOOSE_DIR.resolve())


CT_DIR exists: True /home/jupyter/nlst_representive_sample_nifti
MOOSE_DIR exists: True /home/jupyter/moose_segmentation_outputs


In [None]:
from tqdm import tqdm

def read_sitk_array(img_path: Path):
    """Read image with SimpleITK and return (array_zyx, spacing_zyx)."""
    img = sitk.ReadImage(str(img_path))
    arr = sitk.GetArrayFromImage(img)  # z,y,x
    sx, sy, sz = img.GetSpacing()      # x,y,z
    spacing_zyx = [sz, sy, sx]         # match array order
    return arr, spacing_zyx

def binarize_mask(mask_arr: np.ndarray) -> np.ndarray:
    """Ensure mask is binary {0,1}."""
    return (mask_arr > 0).astype(np.uint8)

def extract_features(ct_arr: np.ndarray, mask_arr: np.ndarray, spacing_zyx):
    feats = jl.Radiomics.extract_radiomic_features(ct_arr, mask_arr, spacing_zyx)
    return dict(feats)

def find_ct_for_moose_folder(ct_dir: Path, moose_folder_name: str) -> Path | None:
    """
    Match CT file to MOOSE subfolder by prefix:
      MOOSE folder: nlst__218284__...9004
      CT file:      CT_nlst__218284__...9004.<more>.nii(.gz)
    """
    pattern = f"CT_{moose_folder_name}*.nii*"
    matches = sorted(ct_dir.glob(pattern))
    if not matches:
        return None
    return matches[0]  # deterministic

def iter_masks(moose_case_dir: Path):
    for p in sorted(list(moose_case_dir.glob("*.nii")) + list(moose_case_dir.glob("*.nii.gz"))):
        yield p

t0 = time.time()

case_dirs = sorted([p for p in MOOSE_DIR.iterdir() if p.is_dir()])
print(f"Found {len(case_dirs)} MOOSE case folders")

missing_ct = []
empty_masks = []
errors = []

n_cases_done = 0
n_masks_done = 0

results_for_big_json = []  # optional (can be large)

with OUT_JSONL.open("w", encoding="utf-8") as f_jsonl:
    for case_dir in tqdm(case_dirs, desc="CT cases"):
        moose_case_name = case_dir.name
        ct_path = find_ct_for_moose_folder(CT_DIR, moose_case_name)

        if ct_path is None:
            missing_ct.append(moose_case_name)
            continue

        # Read CT once per case
        try:
            ct_arr, spacing_zyx = read_sitk_array(ct_path)
        except Exception as e:
            errors.append({"case": moose_case_name, "ct": str(ct_path), "error": str(e)})
            continue

        mask_files = list(iter_masks(case_dir))
        if not mask_files:
            empty_masks.append(moose_case_name)
            continue

        for mask_path in mask_files:
            try:
                mask_arr_raw, _ = read_sitk_array(mask_path)
                mask_arr = binarize_mask(mask_arr_raw)

                if mask_arr.sum() == 0:
                    # skip empty masks
                    continue

                feats = extract_features(ct_arr, mask_arr, spacing_zyx)

                record = {
                    "ct_file": ct_path.name,
                    "ct_path": str(ct_path),
                    "moose_case_folder": moose_case_name,
                    "mask_file": mask_path.name,
                    "mask_path": str(mask_path),
                    "spacing_zyx": spacing_zyx,
                    "features": feats,
                }

                f_jsonl.write(json.dumps(record))
                f_jsonl.write("\n")

                results_for_big_json.append(record)
                n_masks_done += 1

            except Exception as e:
                errors.append({"case": moose_case_name, "mask": str(mask_path), "error": str(e)})
                continue

        n_cases_done += 1

# Optional: big JSON (nice for small runs; for large runs JSONL is better)
with OUT_JSON.open("w", encoding="utf-8") as f_json:
    json.dump(results_for_big_json, f_json)

print("\n--- SUMMARY ---")
print("Cases processed:", n_cases_done)
print("Masks processed:", n_masks_done)
print("Missing CT matches:", len(missing_ct))
print("Cases with no masks:", len(empty_masks))
print("Errors:", len(errors))
print("JSONL:", OUT_JSONL.resolve())
print("JSON :", OUT_JSON.resolve())
print(f"Elapsed: {time.time() - t0:.1f}s")


Found 42 MOOSE case folders


└ @ Radiomics ~/.julia/packages/Radiomics/MvOYc/src/utils.jl:246


Island sizes (in voxels):
  Island 1: 28861 voxels
  Island 2: 2519 voxels


└ @ Radiomics ~/.julia/packages/Radiomics/MvOYc/src/utils.jl:246
└ @ Radiomics ~/.julia/packages/Radiomics/MvOYc/src/utils.jl:246


Island sizes (in voxels):
  Island 1: 2193916 voxels
  Island 2: 2169379 voxels
Island sizes (in voxels):
  Island 1: 252131 voxels
  Island 2: 241229 voxels
