In [None]:
# 🌟 V1.3: Pre-Trained Segmentation Configuration
CSV_FOLDER = "../../data/Paradise_CSV/"
CSV_LABELS_FILE = "Labeled_Data_RAW_Sample.csv"
CSV_SEPARATOR = ";"

# Download parameters  
DOWNLOAD_PATH = '../../data/Paradise_Test_DICOMs'
IMAGES_PATH = '../../data/Paradise_Test_Images'
EXPORT_METADATA = True
CONVERT = True

# V1.3 Segmentation Settings - CONFIGURABLE FOR EASY TESTING
USE_LUNG_SEGMENTATION = True
SEGMENTATION_MODEL = 'torchxrayvision'  # Options: 'torchxrayvision', 'lungs_segmentation'

# 🎛️ CONFIGURABLE PARAMETERS FOR TESTING
LUNG_SEGMENTATION_THRESHOLD = 0.1  # 🔧 Sensitivity threshold (lower = more sensitive, detects more lung area)
LUNG_CROP_PADDING = 120  # 🔧 Padding around detected lungs in pixels (higher = less zoom)

# Quality Control Thresholds
MIN_LUNG_AREA_RATIO = 0.02  # 🔧 MUCH LOWER: More permissive minimum area (was 0.10)
MAX_LUNG_AREA_RATIO = 0.90  # 🔧 Permissive maximum area
SAVE_SEGMENTATION_MASKS = True
MASKS_PATH = '../../data/Paradise_Masks'

# Enhanced Parameters
TARGET_SIZE = (518, 518)
PRESERVE_ASPECT_RATIO = True
BIT_DEPTH = 8
MONOCHROME = 1

print("🌟 V1.3 Pre-trained segmentation configuration loaded!")
print(f"🫁 Using model: {SEGMENTATION_MODEL}")
print(f"📐 Target size: {TARGET_SIZE}")
print(f"🎛️ Segmentation threshold: {LUNG_SEGMENTATION_THRESHOLD} (lower = more sensitive)")
print(f"🔧 Lung padding: {LUNG_CROP_PADDING} pixels (higher = less zoom)")
print(f"📊 Min area ratio: {MIN_LUNG_AREA_RATIO} (much lower for better detection)")
print("🚀 Ready for optimized lung segmentation!")


# 🌟 V1.3: Enhanced ArchiMed Download with Pre-trained Lung Segmentation
**Professional lung segmentation using TorchXRayVision and lungs-segmentation models**

**<h1 align="center">Download ArchiMed Images V1.3 - PRE-TRAINED LUNG SEGMENTATION</h1>**

## 🌟 **V1.3: Professional Chest X-Ray Segmentation**
- **TorchXRayVision**: Pre-trained segmentation models from medical imaging library
- **Proven Performance**: Trained on large chest X-ray datasets (NIH, CheXpert, MIMIC)
- **No More Issues**: No tensor mismatches, proper lung detection
- **Multiple Fallbacks**: Includes alternative models for maximum reliability

## 🚀 **Key Improvements:**
- **Professional Models**: Uses medically-validated segmentation
- **Better Cropping**: Accurate lung boundary detection with proper padding
- **Robust Pipeline**: Multiple fallback options
- **Quality Validation**: Automatic detection quality checks

## 🔧 **V1.3.2 Major Update (Reference Image Matching):**
- **Increased Padding**: 120px padding around lungs (user feedback: less zoom)
- **Clean Binary Masks**: Simple contours instead of "terrain maps" 
- **Reference-Style Output**: RED contours + BLUE crop box (matches user's reference)
- **Better Morphology**: Cleaner lung shapes with hole filling
- **Flat DICOM Storage**: Files saved directly to main folder (no subfolders)
- **Ready for Step 3**: Zone division implementation prepared


In [None]:
# 📋 Mask Interpretation Guide

print("🎯 UPDATED MASK INTERPRETATION GUIDE:")
print("• mask.png files: Clean binary lung masks (white = lung tissue, black = background)")
print("• overlay.png files: Shows detection like your reference image")
print("  - BLUE contours = Detected lung boundaries (Step 1 in your reference)")  
print("  - ORANGE rectangle = Final crop region (Step 2 in your reference)")
print("• 🎛️ CONFIGURABLE PARAMETERS (set at top of notebook):")
print(f"  - LUNG_SEGMENTATION_THRESHOLD = {LUNG_SEGMENTATION_THRESHOLD} (lower = more sensitive)")
print(f"  - LUNG_CROP_PADDING = {LUNG_CROP_PADDING}px (higher = less zoom)")
print("• If lung detection misses areas: DECREASE LUNG_SEGMENTATION_THRESHOLD")
print("• If crop rectangle too zoomed in: INCREASE LUNG_CROP_PADDING")
print("• Ready for Step 3: Zone division (to be implemented later)")


In [None]:
# Core dependencies
import ArchiMedConnector.A3_Connector as A3_Conn
import pandas as pd
import os
import pydicom
import numpy as np
from PIL import Image
import glob
from tqdm import tqdm
import warnings
import cv2
import io
import subprocess
import sys

# Colors for output
ANSI = {
    'R': '\033[91m', 'G': '\033[92m', 'B': '\033[94m', 'Y': '\033[93m',
    'W': '\033[0m', 'M': '\033[95m', 'C': '\033[96m'
}

print(f"{ANSI['G']}✅ Core dependencies loaded{ANSI['W']}")

# Initialize ArchiMed connector
a3conn = A3_Conn.A3_Connector()


In [None]:
# 🌟 Install and Import Pre-trained Segmentation Models

# Install TorchXRayVision if not already installed
try:
    import torchxrayvision as xrv
    import torch
    TORCHXRAY_AVAILABLE = True
    print(f"{ANSI['G']}✅ TorchXRayVision loaded successfully{ANSI['W']}")
except ImportError:
    print(f"{ANSI['Y']}⚠️ Installing TorchXRayVision...{ANSI['W']}")
    try:
        subprocess.check_call([sys.executable, "-m", "pip", "install", "torchxrayvision"])
        import torchxrayvision as xrv
        import torch
        TORCHXRAY_AVAILABLE = True
        print(f"{ANSI['G']}✅ TorchXRayVision installed and loaded{ANSI['W']}")
    except Exception as e:
        TORCHXRAY_AVAILABLE = False
        print(f"{ANSI['R']}❌ Failed to install TorchXRayVision: {e}{ANSI['W']}")

# Try alternative: lungs-segmentation package
try:
    from lungs_segmentation.pre_trained_models import create_model
    LUNGS_SEG_AVAILABLE = True
    print(f"{ANSI['G']}✅ lungs-segmentation available{ANSI['W']}")
except ImportError:
    print(f"{ANSI['Y']}⚠️ Installing lungs-segmentation...{ANSI['W']}")
    try:
        subprocess.check_call([sys.executable, "-m", "pip", "install", "lungs-segmentation"])
        from lungs_segmentation.pre_trained_models import create_model
        LUNGS_SEG_AVAILABLE = True
        print(f"{ANSI['G']}✅ lungs-segmentation installed{ANSI['W']}")
    except Exception as e:
        LUNGS_SEG_AVAILABLE = False
        print(f"{ANSI['Y']}⚠️ lungs-segmentation not available: {e}{ANSI['W']}")

# Check what we have available
available_models = []
if TORCHXRAY_AVAILABLE:
    available_models.append('torchxrayvision')
if LUNGS_SEG_AVAILABLE:
    available_models.append('lungs_segmentation')

print(f"{ANSI['C']}🏥 Available pre-trained models: {available_models}{ANSI['W']}")

if not available_models:
    print(f"{ANSI['R']}❌ No pre-trained models available, falling back to enhanced thresholding{ANSI['W']}")
    USE_LUNG_SEGMENTATION = True  # Still use segmentation, but with fallback method


In [None]:
class PreTrainedLungSegmentation:
    """Professional lung segmentation using pre-trained models"""
    
    def __init__(self, model_type='torchxrayvision'):
        self.model_type = model_type
        self.model = None
        self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
        
        print(f"{ANSI['C']}🔧 Initializing {model_type} segmentation...{ANSI['W']}")
        
        if model_type == 'torchxrayvision' and TORCHXRAY_AVAILABLE:
            self._init_torchxray()
        elif model_type == 'lungs_segmentation' and LUNGS_SEG_AVAILABLE:
            self._init_lungs_seg()
        else:
            print(f"{ANSI['Y']}⚠️ Requested model not available, using enhanced fallback{ANSI['W']}")
            self.model = None
    
    def _init_torchxray(self):
        """Initialize TorchXRayVision segmentation model"""
        try:
            # Load pre-trained segmentation model from TorchXRayVision
            self.seg_model = xrv.baseline_models.chestx_det.PSPNet()
            print(f"{ANSI['G']}✅ TorchXRayVision PSPNet loaded{ANSI['W']}")
            print(f"{ANSI['B']}   Targets: {self.seg_model.targets}{ANSI['W']}")
            self.model = 'torchxray'
        except Exception as e:
            print(f"{ANSI['Y']}⚠️ TorchXRayVision init failed: {e}{ANSI['W']}")
            self.model = None
    
    def _init_lungs_seg(self):
        """Initialize lungs-segmentation model"""
        try:
            self.seg_model = create_model("resnet34")
            self.seg_model = self.seg_model.to(self.device)
            self.seg_model.eval()
            print(f"{ANSI['G']}✅ lungs-segmentation ResNet34 loaded{ANSI['W']}")
            self.model = 'lungs_seg'
        except Exception as e:
            print(f"{ANSI['Y']}⚠️ lungs-segmentation init failed: {e}{ANSI['W']}")
            self.model = None
    
    def segment_lungs(self, image):
        """Segment lungs using the loaded model"""
        if self.model is None:
            return self._enhanced_fallback_segmentation(image)
        
        try:
            if self.model == 'torchxray':
                return self._torchxray_segment(image)
            elif self.model == 'lungs_seg':
                return self._lungs_seg_segment(image)
        except Exception as e:
            print(f"{ANSI['Y']}⚠️ Segmentation failed: {e}, using enhanced fallback{ANSI['W']}")
            return self._enhanced_fallback_segmentation(image)
    
    def _torchxray_segment(self, image):
        """Segment using TorchXRayVision"""
        # Convert to proper format
        if len(image.shape) == 3:
            image_gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
        else:
            image_gray = image
        
        # Normalize to [-1024, 1024] range as expected by TorchXRayVision
        image_norm = xrv.datasets.normalize(image_gray, 255)
        image_norm = image_norm[None, ...]  # Add channel dimension
        
        # Resize to 512x512 as expected by the model
        transform = xrv.datasets.XRayResizer(512)
        image_resized = transform(image_norm)
        
        # Convert to tensor
        image_tensor = torch.from_numpy(image_resized).float().unsqueeze(0)
        
        # Inference
        with torch.no_grad():
            output = self.seg_model(image_tensor)
        
        # Extract lung masks (Left Lung: index 4, Right Lung: index 5)
        lung_targets = ['Left Lung', 'Right Lung']
        lung_mask = np.zeros((512, 512))
        
        for i, target in enumerate(self.seg_model.targets):
            if target in lung_targets:
                lung_mask += output[0, i].cpu().numpy()
        
        # Resize back to original size
        lung_mask = cv2.resize(lung_mask, (image.shape[1], image.shape[0]))
        
        # Create clean binary mask (like reference image)
        binary_mask = (lung_mask > LUNG_SEGMENTATION_THRESHOLD).astype(np.uint8)  # Use configurable threshold
        print(f"{ANSI['C']}🎛️ Using threshold: {LUNG_SEGMENTATION_THRESHOLD} (configurable: LUNG_SEGMENTATION_THRESHOLD){ANSI['W']}")
        
        # Clean up the mask with morphological operations for cleaner contours
        kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (10, 10))
        binary_mask = cv2.morphologyEx(binary_mask, cv2.MORPH_CLOSE, kernel)
        binary_mask = cv2.morphologyEx(binary_mask, cv2.MORPH_OPEN, kernel)
        
        # Fill holes to create solid lung regions
        kernel_fill = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (15, 15))
        binary_mask = cv2.morphologyEx(binary_mask, cv2.MORPH_CLOSE, kernel_fill)
        
        return binary_mask, binary_mask.astype(float)
    
    def _lungs_seg_segment(self, image):
        """Segment using lungs-segmentation"""
        from lungs_segmentation import inference
        
        # Run inference
        processed_image, masks = inference.inference(self.seg_model, image, 0.2)
        
        # Combine left and right lung masks
        if len(masks) >= 2:
            combined_mask = masks[0] + masks[1]  # Left + Right lung
        elif len(masks) == 1:
            combined_mask = masks[0]
        else:
            return self._enhanced_fallback_segmentation(image)
        
        binary_mask = (combined_mask > 0.5).astype(np.uint8)
        return binary_mask, combined_mask
    
    def _enhanced_fallback_segmentation(self, image):
        """Enhanced fallback segmentation using multiple techniques"""
        print(f"{ANSI['B']}🔄 Using enhanced professional fallback segmentation...{ANSI['W']}")
        
        if len(image.shape) == 3:
            gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
        else:
            gray = image.copy()
        
        # Preprocessing: Apply CLAHE for better contrast
        clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8,8))
        gray = clahe.apply(gray)
        
        # Method 1: Otsu thresholding
        _, otsu_mask = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
        
        # Method 2: Adaptive threshold for local contrast
        adaptive_mask = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2)
        
        # Method 3: Multiple Otsu on different intensity ranges
        percentile_75 = np.percentile(gray, 75)
        _, high_thresh = cv2.threshold(gray, percentile_75, 255, cv2.THRESH_BINARY)
        
        # Combine masks with weighted approach
        combined = np.maximum(np.maximum(otsu_mask * 0.6, adaptive_mask * 0.3), high_thresh * 0.1)
        
        # Morphological operations to clean up and connect lung regions
        # Use larger kernel for chest X-rays
        kernel_open = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (15, 15))
        kernel_close = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (25, 25))
        
        combined = cv2.morphologyEx(combined, cv2.MORPH_OPEN, kernel_open)
        combined = cv2.morphologyEx(combined, cv2.MORPH_CLOSE, kernel_close)
        
        # Remove small noise and keep only significant lung regions
        contours, _ = cv2.findContours(combined.astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        
        if contours:
            # Calculate areas and keep largest contours (likely lung regions)
            contour_areas = [(cv2.contourArea(cnt), cnt) for cnt in contours]
            contour_areas.sort(key=lambda x: x[0], reverse=True)
            
            # Keep top contours that represent lungs
            mask_clean = np.zeros_like(combined)
            total_image_area = combined.shape[0] * combined.shape[1]
            min_area_threshold = total_image_area * 0.01  # At least 1% of image
            
            kept_contours = 0
            for area, contour in contour_areas:
                if area > min_area_threshold and kept_contours < 4:  # Max 4 regions (2 lungs possibly split)
                    cv2.fillPoly(mask_clean, [contour], 255)
                    kept_contours += 1
                elif kept_contours >= 2:  # Have at least 2 significant regions
                    break
            
            if np.sum(mask_clean) > 0:
                combined = mask_clean
        
        # Final cleanup: Fill holes within lung regions
        kernel_fill = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (20, 20))
        combined = cv2.morphologyEx(combined, cv2.MORPH_CLOSE, kernel_fill)
        
        print(f"{ANSI['G']}✅ Enhanced fallback segmentation complete{ANSI['W']}")
        
        return (combined > 0).astype(np.uint8), combined / 255.0

# Initialize the segmentation pipeline
if USE_LUNG_SEGMENTATION:
    if 'torchxrayvision' in available_models:
        lung_segmenter = PreTrainedLungSegmentation('torchxrayvision')
    elif 'lungs_segmentation' in available_models:
        lung_segmenter = PreTrainedLungSegmentation('lungs_segmentation')
    else:
        lung_segmenter = PreTrainedLungSegmentation('fallback')
    print(f"{ANSI['C']}🎯 Professional lung segmentation initialized{ANSI['W']}")
else:
    lung_segmenter = None
    print(f"{ANSI['Y']}⚠️ Lung segmentation disabled{ANSI['W']}")


In [None]:
def process_image_with_segmentation(image_array, file_id):
    """Process image with professional lung segmentation and improved cropping"""
    if lung_segmenter is None:
        return image_array
    
    try:
        print(f"{ANSI['B']}🫁 Segmenting lungs for {file_id}...{ANSI['W']}")
        
        # Get lung segmentation
        binary_mask, prob_mask = lung_segmenter.segment_lungs(image_array)
        
        # Validate segmentation quality
        total_pixels = binary_mask.shape[0] * binary_mask.shape[1]
        lung_pixels = np.sum(binary_mask)
        lung_ratio = lung_pixels / total_pixels
        
        print(f"{ANSI['C']}📊 Lung area detected: {lung_ratio:.3f} of image{ANSI['W']}")
        
        # Quality check with improved thresholds
        if lung_ratio < MIN_LUNG_AREA_RATIO:
            print(f"{ANSI['Y']}⚠️ Detected area too small ({lung_ratio:.3f} < {MIN_LUNG_AREA_RATIO}), using original{ANSI['W']}")
            return image_array  # Return original
        
        if lung_ratio > MAX_LUNG_AREA_RATIO:
            print(f"{ANSI['Y']}⚠️ Detected area too large ({lung_ratio:.3f} > {MAX_LUNG_AREA_RATIO}), using original{ANSI['W']}")
            return image_array  # Return original
        
        # Find bounding box of lung regions - IMPROVED to ensure full coverage
        coords = np.column_stack(np.where(binary_mask > 0))
        if len(coords) == 0:
            print(f"{ANSI['Y']}⚠️ No lung coordinates found{ANSI['W']}")
            return image_array
        
        y_min, x_min = coords.min(axis=0)
        y_max, x_max = coords.max(axis=0)
        
        # Add small safety margin to ensure all detected lung pixels are included
        safety_margin = 10  # Increased to 10px for better coverage
        y_min = max(0, y_min - safety_margin)
        x_min = max(0, x_min - safety_margin)
        y_max = min(image_array.shape[0], y_max + safety_margin)
        x_max = min(image_array.shape[1], x_max + safety_margin)
        
        # Add generous padding to ensure lungs are fully included
        h, w = image_array.shape[:2]
        padding = LUNG_CROP_PADDING
        
        # Calculate padded boundaries
        y_min_padded = max(0, y_min - padding)
        x_min_padded = max(0, x_min - padding)
        y_max_padded = min(h, y_max + padding)
        x_max_padded = min(w, x_max + padding)
        
        # Ensure minimum crop size to avoid over-cropping
        crop_height = y_max_padded - y_min_padded
        crop_width = x_max_padded - x_min_padded
        min_dimension = min(h, w) * 0.5  # At least 50% of smallest dimension
        
        if crop_height < min_dimension or crop_width < min_dimension:
            print(f"{ANSI['Y']}⚠️ Crop too small ({crop_height}x{crop_width}), using more conservative crop{ANSI['W']}")
            # Use more conservative padding
            center_y, center_x = (y_min + y_max) // 2, (x_min + x_max) // 2
            half_size = int(min_dimension // 2)
            
            y_min_padded = max(0, center_y - half_size)
            x_min_padded = max(0, center_x - half_size)
            y_max_padded = min(h, center_y + half_size)
            x_max_padded = min(w, center_x + half_size)
        
        # Crop the image
        if len(image_array.shape) == 3:
            cropped = image_array[y_min_padded:y_max_padded, x_min_padded:x_max_padded, :]
        else:
            cropped = image_array[y_min_padded:y_max_padded, x_min_padded:x_max_padded]
        
        # Calculate area reduction
        original_area = h * w
        cropped_area = (y_max_padded - y_min_padded) * (x_max_padded - x_min_padded)
        area_reduction = cropped_area / original_area
        
        print(f"{ANSI['G']}✅ Cropped to {area_reduction:.2f} area reduction{ANSI['W']}")
        print(f"{ANSI['B']}   Original: {h}x{w} → Cropped: {y_max_padded-y_min_padded}x{x_max_padded-x_min_padded}{ANSI['W']}")
        print(f"{ANSI['C']}   Padding applied: {padding}px on each side (configurable: LUNG_CROP_PADDING){ANSI['W']}")
        
        # Save segmentation mask if requested
        if SAVE_SEGMENTATION_MASKS:
            os.makedirs(MASKS_PATH, exist_ok=True)
            
            # Save clean binary mask (like reference image - simple contours)
            mask_path = os.path.join(MASKS_PATH, f"{file_id}_mask.png")
            mask_image = (binary_mask * 255).astype(np.uint8)
            cv2.imwrite(mask_path, mask_image)
            
            # Save overlay like reference image (lung contours + crop box)
            overlay_path = os.path.join(MASKS_PATH, f"{file_id}_overlay.png")
            overlay = image_array.copy()
            if len(overlay.shape) == 2:
                overlay = cv2.cvtColor(overlay, cv2.COLOR_GRAY2RGB)
            
            # Draw lung contours in BLUE (as observed by user)
            contours, _ = cv2.findContours(binary_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
            cv2.drawContours(overlay, contours, -1, (255, 0, 0), 2)
            
            # Draw ORANGE crop rectangle (as observed by user)  
            cv2.rectangle(overlay, (x_min_padded, y_min_padded), (x_max_padded, y_max_padded), (0, 165, 255), 3)
            
            # Add clear legend - CORRECTED COLORS
            cv2.putText(overlay, "BLUE = Lung contours", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 0, 0), 2)
            cv2.putText(overlay, "ORANGE = Crop region", (10, 55), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 165, 255), 2)
            cv2.imwrite(overlay_path, overlay)
            
            print(f"{ANSI['C']}💾 Saved: {file_id}_mask.png (clean binary) & {file_id}_overlay.png (like reference){ANSI['W']}")
        
        return cropped
        
    except Exception as e:
        print(f"{ANSI['Y']}⚠️ Segmentation failed for {file_id}: {e}{ANSI['W']}")
        return image_array

print(f"{ANSI['G']}✅ Enhanced image processing functions loaded{ANSI['W']}")


In [None]:
# 🚀 Main Processing Pipeline

# Load CSV data
try:
    user_info = a3conn.getUserInfos()
    print(f"{ANSI['G']}🔐 ArchiMed User Info{ANSI['W']}")
    print(f"User info: {user_info}")
    
    # Load CSV
    csv_path = os.path.join(CSV_FOLDER, CSV_LABELS_FILE)
    df = pd.read_csv(csv_path, sep=CSV_SEPARATOR)
    print(f"{ANSI['G']}✅ Loaded CSV with {len(df)} rows{ANSI['W']}")
    
    # Check for FileID column (handle different naming conventions)
    file_id_column = None
    for col in ['FileID', 'file_id', 'File_ID']:
        if col in df.columns:
            file_id_column = col
            break
    
    if file_id_column is None:
        print(f"{ANSI['R']}❌ No FileID column found in CSV{ANSI['W']}")
        raise ValueError("FileID column not found")
    
    print(f"{ANSI['C']}📊 Available columns: {list(df.columns)}{ANSI['W']}")
    
    # Get file IDs to download
    file_ids = df[file_id_column].dropna().unique()
    total_files = len(file_ids)
    
    print(f"{ANSI['M']}🚀 Starting enhanced download with pre-trained lung segmentation{ANSI['W']}")
    print(f"Total files to process: {total_files}")
    print(f"Destination: {DOWNLOAD_PATH}")
    print(f"🫁 Lung segmentation: {'ENABLED' if USE_LUNG_SEGMENTATION else 'DISABLED'}")
    
    # Download files
    downloaded_files = []
    
    for i, file_id in enumerate(file_ids):
        progress = ((i + 1) / total_files) * 100
        # Convert numpy.int64 to string for API compatibility
        file_id_str = str(file_id)
        print(f"{ANSI['B']}⬇️ Downloading file {file_id_str} (Progress: {progress:.1f}% - {i+1}/{total_files}) from ArchiMed{ANSI['W']}")
        
        try:
            # Define output path - FLAT STRUCTURE (no subfolders)
            dicom_file_path = os.path.join(DOWNLOAD_PATH, f"{file_id}.dcm")
            # Create download directory if it doesn't exist
            os.makedirs(DOWNLOAD_PATH, exist_ok=True)
            
            # Check if the file already exists
            if os.path.exists(dicom_file_path):
                print(f"{ANSI['Y']}File {file_id} already exists, skipping download{ANSI['W']}")
                downloaded_files.append(dicom_file_path)
                continue
            
            # Download using the WORKING v1.1 pattern - MODIFIED for flat structure
            result = a3conn.downloadFile(
                int(file_id_str),  # Convert back to int as API expects
                asStream=False,
                destDir=DOWNLOAD_PATH,  # Use main directory directly
                filename=f"{file_id_str}.dcm",
                inWorklist=False
            )
            
            if result and os.path.exists(dicom_file_path):
                downloaded_files.append(dicom_file_path)
                print(f"{ANSI['G']}✅ Successfully downloaded: {dicom_file_path}{ANSI['W']}")
            else:
                print(f"{ANSI['Y']}⚠️ Download result unclear for {file_id_str}{ANSI['W']}")
        except Exception as e:
            print(f"{ANSI['Y']}⚠️ Failed to download {file_id_str}: {e}{ANSI['W']}")
    
    print(f"{ANSI['G']}✅ Downloaded {len(downloaded_files)} files successfully{ANSI['W']}")
    
except Exception as e:
    print(f"{ANSI['R']}❌ Setup failed: {e}{ANSI['W']}")
    downloaded_files = []


In [None]:
# 🏥 Enhanced DICOM Conversion with Pre-trained Lung Segmentation

def convert_dicom_to_image_with_segmentation(dicom_path, output_path, target_size=TARGET_SIZE):
    """Enhanced DICOM conversion with professional lung segmentation"""
    try:
        file_id = os.path.splitext(os.path.basename(dicom_path))[0]
        
        # Read DICOM file
        dicom_data = pydicom.dcmread(dicom_path)
        
        # Extract image data
        image_array = dicom_data.pixel_array
        print(f"{ANSI['C']}📁 Processing {file_id}: {image_array.shape}{ANSI['W']}")
        
        # Handle different photometric interpretations
        if hasattr(dicom_data, 'PhotometricInterpretation'):
            if dicom_data.PhotometricInterpretation == 'MONOCHROME1':
                image_array = np.max(image_array) - image_array
        
        # Normalize to 0-255 range
        if image_array.max() > 255:
            image_array = ((image_array - image_array.min()) / 
                          (image_array.max() - image_array.min()) * 255).astype(np.uint8)
        else:
            image_array = image_array.astype(np.uint8)
        
        # Apply lung segmentation and cropping
        processed_image = process_image_with_segmentation(image_array, file_id)
        
        # Convert to PIL Image
        if len(processed_image.shape) == 2:
            pil_image = Image.fromarray(processed_image, mode='L')
        else:
            pil_image = Image.fromarray(processed_image)
        
        # Resize while preserving aspect ratio
        if PRESERVE_ASPECT_RATIO:
            pil_image.thumbnail(target_size, Image.Resampling.LANCZOS)
            
            # Create new image with target size and paste centered
            final_image = Image.new('L', target_size, 0)
            paste_x = (target_size[0] - pil_image.width) // 2
            paste_y = (target_size[1] - pil_image.height) // 2
            final_image.paste(pil_image, (paste_x, paste_y))
            pil_image = final_image
        else:
            pil_image = pil_image.resize(target_size, Image.Resampling.LANCZOS)
        
        # Save image
        os.makedirs(os.path.dirname(output_path), exist_ok=True)
        pil_image.save(output_path)
        
        return True
        
    except Exception as e:
        print(f"{ANSI['R']}❌ Failed to convert {dicom_path}: {e}{ANSI['W']}")
        return False

# Convert downloaded DICOM files
if CONVERT and downloaded_files:
    print(f"{ANSI['M']}🔄 Converting {len(downloaded_files)} DICOM files with lung segmentation{ANSI['W']}")
    
    # Ensure output directory exists
    os.makedirs(IMAGES_PATH, exist_ok=True)
    
    converted_count = 0
    
    for dicom_path in tqdm(downloaded_files, desc="Converting DICOMs"):
        file_id = os.path.splitext(os.path.basename(dicom_path))[0]
        output_path = os.path.join(IMAGES_PATH, f"{file_id}.png")
        
        if convert_dicom_to_image_with_segmentation(dicom_path, output_path):
            converted_count += 1
    
    print(f"{ANSI['G']}✅ Successfully converted {converted_count}/{len(downloaded_files)} files{ANSI['W']}")
    print(f"{ANSI['C']}📂 Images saved to: {IMAGES_PATH}{ANSI['W']}")
    
    if SAVE_SEGMENTATION_MASKS:
        print(f"{ANSI['C']}🎯 Segmentation masks saved to: {MASKS_PATH}{ANSI['W']}")
        
    print(f"{ANSI['M']}🎉 V1.3 Pre-trained lung segmentation processing complete!{ANSI['W']}")
else:
    print(f"{ANSI['Y']}⚠️ No files to convert or conversion disabled{ANSI['W']}")
