# Cutana Backend Demo

This notebook demonstrates how to use the Cutana backend for astronomical image cutout processing.

## Setup and Configuration

First, let's import the necessary modules and configure logging to see what's happening:

In [None]:
import pandas as pd
import sys
import json
from pathlib import Path
from loguru import logger

# Add parent directory to path
sys.path.insert(0, str(Path().parent))
import cutana
# Configure logging for Jupyter
logger.remove()  # Remove default handler
logger.add(sys.stdout, level="INFO", format="{time:YYYY-MM-DD HH:mm:ss} | {level} | {name}:{function}:{line} - {message}")
logger.add("../logs/cutana_demo.log", level="DEBUG", format="{time:YYYY-MM-DD HH:mm:ss} | {level} | {name}:{function}:{line} - {message}")

print("‚úÖ Imports and logging configured successfully")

## Load Test Data

We'll use the mock test data that includes proper file paths:

In [None]:
# Load source catalogue - use the Euclid-compliant mock data
# first generate the test data using calling generate_test_data.py

import subprocess
import sys

# Generate test data first
print("üì¶ Generating test data...")
try:
    result = subprocess.run([
        sys.executable, "../tests/test_data/generate_test_data.py", "--size", "small"
    ], capture_output=True, text=True, cwd=Path.cwd())
    
    if result.returncode == 0:
        print("‚úÖ Test data generated successfully")
        if result.stdout:
            print(result.stdout)
    else:
        print(f"‚ùå Error generating test data: {result.stderr}")
        raise Exception(f"Test data generation failed: {result.stderr}")
        
except Exception as e:
    print(f"‚ùå Error running generate_test_data.py: {e}")
    raise

catalogue_path = "../tests/test_data/euclid_cutana_catalogue_small.csv"
print(f"Loading catalogue from: {catalogue_path}")

try:
    catalogue = pd.read_csv(catalogue_path)
    print(f"‚úÖ Loaded {len(catalogue)} sources")
    print(f"Columns: {list(catalogue.columns)}")
    
    # Show first few rows
    print("\nFirst 3 sources:")
    display(catalogue.head(3))
    
    # Check if FITS files exist
    first_fits_path = eval(catalogue.iloc[0]['fits_file_paths'])[0]
    fits_exists = Path(first_fits_path).exists()
    print(f"\nFITS file check: {first_fits_path}")
    print(f"File exists: {fits_exists}")
    
except Exception as e:
    print(f"‚ùå Error loading catalogue: {e}")
    raise

## Configure Processing Parameters

Let's create a robust configuration for processing:

In [None]:
# Create output directories
output_dir = Path("../examples/output")
logs_dir = Path("../logs")
output_dir.mkdir(exist_ok=True)
logs_dir.mkdir(exist_ok=True)

# Configure processing with robust error handling

config = get_default_config()
config.fits_extensions= ["PRIMARY"],  # Use PRIMARY extension for mock data
config.target_resolution= 64,
config.file_type= "float32",
config.stretch="linear",
    
    # Workflow parameters - conservative for demo
config.max_workers= 1,  # Single worker for clearer debugging
    
    # Output configuration
config.output_format = "zarr"
config.output_dir = str(output_dir)

    # Logging and tracking
config.log_level = "INFO"
config.workflow_file = str(output_dir / "workflow_state.json")
config.tracking_file = str(output_dir / "tracking.json")

print("üìã Configuration:")
print(json.dumps({k: v for k, v in config.items()}, indent=2))
print(f"\n‚úÖ Output directory: {output_dir.absolute()}")

## Initialize and Run Processing

Now let's create the orchestrator and start processing with comprehensive error handling:

In [None]:
try:
    # Initialize orchestrator
    print("üöÄ Initializing Cutana orchestrator...")
    orchestrator = Orchestrator(config)
    
    # Use only first 5 sources for demo
    demo_catalogue = catalogue.head(5).copy()
    print(f"\nüìä Processing {len(demo_catalogue)} sources for demo...")
    
    # Start processing with detailed monitoring
    print("‚è≥ Starting cutout processing...")
    print("This may take a few minutes. Watch the logs above for progress.")
    
    results = orchestrator.start_processing(demo_catalogue)
    
    print(f"\nüéâ Processing completed!")
    print(f"Status: {results['status']}")
    
except Exception as e:
    print(f"\n‚ùå Error during processing: {e}")
    logger.exception("Detailed error information:")
    
    # Print last few lines from log file for debugging
    log_file = Path("../logs/cutana_demo.log")
    if log_file.exists():
        print("\nüìã Last 20 lines from log file:")
        with open(log_file, 'r') as f:
            lines = f.readlines()
            for line in lines[-20:]:
                print(line.strip())
    
    raise

## Analyze Results

Let's examine what was produced:

In [None]:
# Check processing results
if 'results' in locals() and results['status'] == 'completed':
    print("üìà Processing Summary:")
    print(f"- Total sources: {results['total_sources']}")
    print(f"- Completed batches: {results['completed_batches']}")
    
    # Check output files
    zarr_files = list(output_dir.glob("*.zarr"))
    fits_files = list(output_dir.glob("*.fits"))
    json_files = list(output_dir.glob("*.json"))
    
    print(f"\nüìÅ Output files created:")
    print(f"- Zarr archives: {len(zarr_files)}")
    print(f"- FITS files: {len(fits_files)}")
    print(f"- JSON metadata: {len(json_files)}")
    
    if zarr_files:
        print("\nüì¶ Zarr files:")
        for zarr_file in zarr_files:
            size_mb = zarr_file.stat().st_size / (1024*1024)
            print(f"  - {zarr_file.name} ({size_mb:.1f} MB)")
    
    # Check progress tracking
    progress = orchestrator.get_progress()
    print(f"\nüìä Final progress: {progress.get('progress_percent', 0):.1f}%")
    
else:
    print("‚ùå Processing did not complete successfully")
    if 'results' in locals():
        print(f"Status: {results.get('status', 'unknown')}")
        if 'error' in results:
            print(f"Error: {results['error']}")

## Debug Information

If something went wrong, let's gather debug information:

In [None]:
# Debug information
print("üîç Debug Information:")
print(f"- Working directory: {Path.cwd()}")
print(f"- Output directory exists: {output_dir.exists()}")
print(f"- Log file exists: {Path('../logs/cutana_demo.log').exists()}")

# Check if any subprocess files were created
temp_files = list(output_dir.glob("cutout*"))
if temp_files:
    print(f"\nüìÑ Temporary/process files found:")
    for temp_file in temp_files:
        print(f"  - {temp_file.name}")

# Show system info
import psutil
memory = psutil.virtual_memory()
print(f"\nüíª System Resources:")
print(f"- CPU cores: {psutil.cpu_count()}")
print(f"- Memory: {memory.total / (1024**3):.1f} GB total, {memory.available / (1024**3):.1f} GB available")
print(f"- Python version: {sys.version}")

print("\n‚úÖ Demo completed! Check the logs and output files for detailed results.")

In [None]:
import images_to_zarr as i2z

i2z.inspect("output/images.zarr")
# i2z.display_sample_images("output/images.zarr")

# Display a specific image
import zarr
from matplotlib import pyplot as plt
file = zarr.open("output/images.zarr", mode='r')
image = file['images'][3]

plt.imshow(image, cmap='gray')
plt.axis('off')
plt.show()

In [None]:
import images_to_zarr as i2z

i2z.inspect("../benchmarking/output/cutouts_cutout-process-15584_1754487403884154100.zarr/")
# i2z.display_sample_images("../benchmarking/output/cutouts_cutout-process-15584_1754487403884154100.zarr/")

In [None]:
# Display a specific image
import zarr
from matplotlib import pyplot as plt
import numpy as np
file = zarr.open("../benchmarking/output/batch_003/images.zarr", mode='r')

# Display 16 random images in a grid
fig, axes = plt.subplots(4, 4, figsize=(8, 8))
for ax in axes.flatten():
    image = file['images'][np.random.randint(0, file['images'].shape[0])]
    ax.imshow(image, cmap='gray')
    ax.axis('off')
plt.tight_layout()
plt.show()