In [0]:
# %pip install numpy==1.26.4 pydicom==3.0.1 SimpleITK monai monailabel nvidia-nvimgcodec-cu12[all] highdicom

In [0]:
dbutils.library.restartPython()

In [0]:
    %load_ext autoreload
    %autoreload 2

In [0]:
import yaml
cfg = yaml.safe_load(open('config.yaml'))
compression = "nvImage_HTJ2K_progressive"
input_path = cfg.get("input_path")
output_path = cfg.get("output_path").replace("{compression}", f"{compression}")
input_path, output_path

In [0]:
%sh 
ls -alH /Volumes/vdm-classic-q6xfh3_catalog/random/pixels_volume/large-multi-frame-dicoms
echo "---"
ls -alH /Volumes/vdm-classic-q6xfh3_catalog/random/pixels_volume/compressed

### Signature
```python
def transcode_dicom_to_htj2k(
    input_dir: str,
    output_dir: str = None,
    num_resolutions: int = 6,
    code_block_size: tuple = (64, 64),
    max_batch_size: int = 256,
    add_basic_offset_table: bool = True,
) -> str:
```

In [0]:
import logging
logging.basicConfig(level=logging.INFO)

In [0]:
# In your notebook
import sys
import os
sys.path.append(os.path.abspath('/Workspace/Users/douglas.moore@databricks.com/pixels-jpeg2000/notebooks/transcoding'))


In [0]:
from htj2k_convert import transcode_dicom_to_htj2k

In [0]:
import time

In [0]:
start = time.time()
transcode_dicom_to_htj2k(
  input_dir=input_path,
  output_dir=output_path
)
duration = time.time() - start
duration

In [0]:
# test decode
import glob
import pydicom
import pylibjpeg
import time

decode_durations = []

for dcm_file in glob.glob(os.path.join(output_path, "*.dcm")):
    start = time.time()
    ds = pydicom.dcmread(dcm_file)
    pixel_array = ds.pixel_array
    decode_duration_s = time.time() - start
    decode_durations.append((dcm_file, decode_duration_s))
    # pixel_array.shape

decode_durations

In [0]:
%sh ls -alH /Volumes/vdm-classic-q6xfh3_catalog/random/pixels_volume/compressed

In [0]:
mlflow.end_run()

In [0]:
import mlflow
import glob

#mlflow.set_experiment(cfg.get("experiment_name"))

with mlflow.start_run(run_name=f"compress_{compression}") as run:

    # Log parameters
    mlflow.log_param("compression", compression)
    mlflow.log_param("input_path", input_path)
    mlflow.log_param("output_path", output_path)
    mlflow.log_param("table", table)
    mlflow.log_param("encoder", "nvimgcodec")
#    mlflow.log_param("sop_class_uid", ds.SOPClassUID)
#    mlflow.log_param("transfer_syntax_uid", ds.file_meta.TransferSyntaxUID)
#    mlflow.log_param("enc_params.jpeg2k_params.ht", enc_params.jpeg2k_params.ht)
#    mlflow.log_param("enc_params.jpeg2k_params.num_resolutions", enc_params.jpeg2k_params.num_resolutions)

    # Log metrics


input_files = sorted(glob.glob(os.path.join(input_path, "*")))
output_files = sorted(glob.glob(os.path.join(output_path, "*")))

for in_file, out_file in zip(input_files, output_files):
    input_size = os.stat(in_file).st_size
    output_size = os.stat(out_file).st_size
    compression_ratio = round(input_size / output_size, 2) if output_size else 0
    savings_percent = round(100 * (input_size - output_size) / input_size, 2) if input_size else 0

    mlflow.log_metric(f"{os.path.basename(in_file)}_input_size_bytes", input_size)
    mlflow.log_metric(f"{os.path.basename(out_file)}_output_size_bytes", output_size)
    mlflow.log_metric(f"{os.path.basename(out_file)}_compression_ratio", compression_ratio)
    mlflow.log_metric(f"{os.path.basename(out_file)}_savings_percent", savings_percent)

    print(f"{in_file}")
    print(f"{out_file}")
    print(f"Input  size: {input_size:>15,}")
    print(f"Output size: {output_size:>15,}")
    print(f"Reduction:   {compression_ratio:>18.2f}x")
    print(f"Savings:     {savings_percent:>18.2f}%")

    mlflow.log_metric("input_size_bytes", input_size)
    mlflow.log_metric("output_size_bytes", output_size)
    mlflow.log_metric("compression_ratio", round(input_size/output_size, 2))
    mlflow.log_metric("savings_percent", round(100*(input_size-output_size)/input_size, 2))
    mlflow.log_metric("duration_seconds", round(duration, 2))
    mlflow.log_metric("decode_duration_seconds", round(decode_duration_s, 2))

    # Log artifacts (source DICOM, compressed DICOM, config)
    mlflow.log_artifact("config.yaml", artifact_path="config")

    # Check for a GPU
    try:
        import subprocess
        gpu_info = subprocess.check_output("nvidia-smi -L", shell=True).decode().strip()
        mlflow.log_param("gpu_info", gpu_info)
    except (subprocess.CalledProcessError, FileNotFoundError):
        mlflow.log_param("gpu_info", "No GPU detected")

    # Log the full requirements.txt file of the current environment
    with open("full_requirements.txt", "w") as f:
        subprocess.run(["pip", "freeze"], stdout=f)
    mlflow.log_artifact("full_requirements.txt")

    # Optionally log code snapshot
    #mlflow.log_artifact("notebook.py", artifact_path="source_code")