In [None]:
# DiCube: Medical Image Compression Library

High-performance medical image storage with advanced compression codecs:
- **JPEG-XL (dcba)**: Next-generation compression with excellent quality
- **HTJ2K (dcbs)**: High-performance JPEG 2000 for medical imaging


In [None]:
import numpy as np
import pandas as pd
import time
import tempfile
from pathlib import Path

import pydicom
import nibabel as nib
from dicube.core.image import DicomCubeImage
import dicube.codecs

print(f"Available codecs: {dicube.codecs.list_codecs()}")


In [None]:
## Load Real DICOM Data


In [None]:
# Load DICOM series
dicom_folder = "testdata/dicom/sample_150"
image = DicomCubeImage.from_dicom_folder(dicom_folder)

print(f"Loaded DICOM series:")
print(f"  Shape: {image.shape}")
print(f"  Dtype: {image.raw_image.dtype}")
print(f"  Size: {image.raw_image.nbytes:,} bytes")
print(f"  Value range: {image.raw_image.min()} - {image.raw_image.max()}")


In [None]:
## Compression Performance Comparison


In [None]:
def benchmark_codec(image_data, codec_name, runs=3):
    """Benchmark compression codec performance"""
    codec = dicube.codecs.get_codec(codec_name)
    
    # Warm up
    encoded = codec['encode'](image_data)
    decoded = codec['decode'](encoded)
    
    # Measure encoding time
    encode_times = []
    for _ in range(runs):
        start = time.time()
        encoded = codec['encode'](image_data)
        encode_times.append(time.time() - start)
    
    # Measure decoding time
    decode_times = []
    for _ in range(runs):
        start = time.time()
        decoded = codec['decode'](encoded)
        decode_times.append(time.time() - start)
    
    # Verify lossless compression
    if codec_name == 'dcbs':  # JPH should be perfectly lossless
        assert np.array_equal(image_data, decoded)
    
    return {
        'codec': codec_name,
        'original_size': image_data.nbytes,
        'compressed_size': len(encoded),
        'compression_ratio': image_data.nbytes / len(encoded),
        'encode_time': np.mean(encode_times),
        'decode_time': np.mean(decode_times),
        'encode_speed_mb_s': (image_data.nbytes / 1024 / 1024) / np.mean(encode_times),
        'decode_speed_mb_s': (image_data.nbytes / 1024 / 1024) / np.mean(decode_times)
    }

# Test single slice
single_slice = image.raw_image[0]
print(f"Testing single slice: {single_slice.shape}, {single_slice.dtype}")

results = []
for codec_name in ['dcba', 'dcbs']:
    print(f"Benchmarking {codec_name}...")
    result = benchmark_codec(single_slice, codec_name)
    results.append(result)

# Create comparison table
df = pd.DataFrame(results)
df = df.round(3)
print("\n🔥 Performance Comparison (Single Slice):")
print(df[['codec', 'original_size', 'compressed_size', 'compression_ratio', 
          'encode_time', 'decode_time', 'encode_speed_mb_s', 'decode_speed_mb_s']].to_string(index=False))


In [None]:
## File I/O Performance


In [None]:
def benchmark_file_io(image, output_prefix, runs=3):
    """Benchmark file I/O performance"""
    results = []
    
    with tempfile.TemporaryDirectory() as tmpdir:
        tmpdir = Path(tmpdir)
        
        # Test uncompressed DICOM
        dicom_path = tmpdir / f"{output_prefix}_uncompressed"
        start = time.time()
        image.to_dicom_folder(str(dicom_path))
        save_time = time.time() - start
        
        start = time.time()
        loaded_image = DicomCubeImage.from_dicom_folder(str(dicom_path))
        load_time = time.time() - start
        
        dicom_size = sum(f.stat().st_size for f in dicom_path.rglob("*.dcm"))
        
        results.append({
            'format': 'DICOM_uncompressed',
            'file_size': dicom_size,
            'save_time': save_time,
            'load_time': load_time,
            'compression_ratio': image.raw_image.nbytes / dicom_size
        })
        
        # Test compressed formats
        for codec_name in ['dcba', 'dcbs']:
            codec = dicube.codecs.get_codec(codec_name)
            ext = '.jxl' if codec_name == 'dcba' else '.j2k'
            
            # Save to compressed file
            compressed_path = tmpdir / f"{output_prefix}_{codec_name}{ext}"
            
            save_times = []
            for _ in range(runs):
                start = time.time()
                image.to_file(str(compressed_path), codec=codec_name)
                save_times.append(time.time() - start)
            
            # Load from compressed file  
            load_times = []
            for _ in range(runs):
                start = time.time()
                loaded_image = DicomCubeImage.from_file(str(compressed_path))
                load_times.append(time.time() - start)
            
            file_size = compressed_path.stat().st_size
            
            results.append({
                'format': f'{codec_name}_{ext[1:]}',
                'file_size': file_size,
                'save_time': np.mean(save_times),
                'load_time': np.mean(load_times),
                'compression_ratio': image.raw_image.nbytes / file_size
            })
    
    return results

print("Benchmarking file I/O performance...")
io_results = benchmark_file_io(image, "test")

# Create I/O comparison table
io_df = pd.DataFrame(io_results)
io_df = io_df.round(3)
print("\n💾 File I/O Performance Comparison:")
print(io_df.to_string(index=False))


In [None]:
## NIfTI Data Test


In [None]:
# Load NIfTI data
nifti_path = "testdata/nifti/s0000.nii.gz"
nifti_image = DicomCubeImage.from_nifti(nifti_path)

print(f"Loaded NIfTI data:")
print(f"  Shape: {nifti_image.shape}")
print(f"  Dtype: {nifti_image.raw_image.dtype}")
print(f"  Size: {nifti_image.raw_image.nbytes:,} bytes")

# Test compression on NIfTI slice
nifti_slice = nifti_image.raw_image[:, :, nifti_image.shape[2]//2]  # middle slice
print(f"\nTesting NIfTI slice: {nifti_slice.shape}")

nifti_results = []
for codec_name in ['dcba', 'dcbs']:
    result = benchmark_codec(nifti_slice, codec_name)
    nifti_results.append(result)

nifti_df = pd.DataFrame(nifti_results)
nifti_df = nifti_df.round(3)
print("\n🧠 NIfTI Compression Performance:")
print(nifti_df[['codec', 'original_size', 'compressed_size', 'compression_ratio', 
               'encode_time', 'decode_time']].to_string(index=False))


In [None]:
## Summary

**JPEG-XL (dcba)**: Better compression ratios, especially for 16-bit medical images. Higher computational cost but excellent quality.

**HTJ2K (dcbs)**: Ultra-fast compression with good ratios. Perfect for real-time applications requiring lossless compression.

Both codecs seamlessly integrate with DicomCubeImage for easy medical image workflow.
