In [None]:
# Simple Configuration
CSV_FOLDER = "/home/pyuser/data/Paradise_CSV/"
CSV_LABELS_FILE = "Labeled_Data_RAW_Sample.csv"
CSV_SEPARATOR = ";"

# Paths
DOWNLOAD_PATH = '/home/pyuser/data/Paradise_Test_DICOMs'
IMAGES_PATH = '/home/pyuser/data/Paradise_Test_Images'
MASKS_PATH = '/home/pyuser/data/Paradise_Masks'

# Processing settings
TARGET_SIZE = (518, 518)
LUNG_THRESHOLD = 0.1  # Single threshold for lung detection
CROP_MARGIN = 40  # Margin around lungs for final crop

# Options
CONVERT = True
SAVE_MASKS = True

print("V1.4 Simplified configuration loaded!")
print(f"Target size: {TARGET_SIZE}")
print(f"Lung threshold: {LUNG_THRESHOLD}")


In [None]:
# Core dependencies
import ArchiMedConnector.A3_Connector as A3_Conn
import pandas as pd
import os
import pydicom
import numpy as np
from PIL import Image
import glob
from tqdm import tqdm
import cv2
import subprocess
import sys

print("Core dependencies loaded")

# Initialize ArchiMed connector
a3conn = A3_Conn.A3_Connector()


In [None]:
# Simple segmentation model setup
segmentation_model = None
model_type = None

# Try TorchXRayVision first (best option)
try:
    import torchxrayvision as xrv
    import torch
    segmentation_model = xrv.baseline_models.chestx_det.PSPNet()
    model_type = 'torchxray'
    print("✅ TorchXRayVision loaded")
except ImportError:
    print("⚠️ Installing TorchXRayVision...")
    try:
        subprocess.check_call([sys.executable, "-m", "pip", "install", "torchxrayvision"])
        import torchxrayvision as xrv
        import torch
        segmentation_model = xrv.baseline_models.chestx_det.PSPNet()
        model_type = 'torchxray'
        print("✅ TorchXRayVision installed and loaded")
    except:
        print("❌ TorchXRayVision unavailable, using fallback")
        model_type = 'fallback'

print(f"Segmentation method: {model_type}")


In [None]:
def simple_lung_segmentation(image):
    """Simple lung segmentation using available model"""
    
    if model_type == 'torchxray':
        # TorchXRayVision segmentation
        if len(image.shape) == 3:
            image_gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
        else:
            image_gray = image
        
        # Normalize and resize for model
        image_norm = xrv.datasets.normalize(image_gray, 255)
        image_norm = image_norm[None, ...]  
        transform = xrv.datasets.XRayResizer(512)
        image_resized = transform(image_norm)
        image_tensor = torch.from_numpy(image_resized).float().unsqueeze(0)
        
        # Run inference
        with torch.no_grad():
            output = segmentation_model(image_tensor)
        
        # Extract lung masks
        lung_mask = np.zeros((512, 512))
        for i, target in enumerate(segmentation_model.targets):
            if target in ['Left Lung', 'Right Lung']:
                lung_mask += output[0, i].cpu().numpy()
        
        # Resize back and threshold
        lung_mask = cv2.resize(lung_mask, (image.shape[1], image.shape[0]))
        binary_mask = (lung_mask > LUNG_THRESHOLD).astype(np.uint8)
        
    else:
        # Fallback segmentation
        if len(image.shape) == 3:
            gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
        else:
            gray = image.copy()
        
        # Simple thresholding approach
        clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
        gray = clahe.apply(gray)
        
        _, otsu_mask = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
        adaptive_mask = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2)
        
        combined = np.maximum(otsu_mask * 0.7, adaptive_mask * 0.3)
        binary_mask = (combined > 128).astype(np.uint8)
    
    # Clean up mask
    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (10, 10))
    binary_mask = cv2.morphologyEx(binary_mask, cv2.MORPH_CLOSE, kernel)
    binary_mask = cv2.morphologyEx(binary_mask, cv2.MORPH_OPEN, kernel)
    
    return binary_mask

def crop_to_clear_area(image, binary_mask):
    """Crop image to clear area (like overlay bright zone)"""
    
    # Get lung boundaries
    coords = np.column_stack(np.where(binary_mask > 0))
    if len(coords) == 0:
        return image  # No lungs found, return original
    
    y_min, x_min = coords.min(axis=0)
    y_max, x_max = coords.max(axis=0)
    
    # Calculate clear area boundaries (like overlay)
    h, w = image.shape[:2]
    lung_center_y = (y_min + y_max) // 2
    lung_center_x = (x_min + x_max) // 2
    
    # Calculate crop size with target aspect ratio
    target_aspect = TARGET_SIZE[0] / TARGET_SIZE[1]
    lung_width = x_max - x_min + 2 * CROP_MARGIN
    lung_height = y_max - y_min + 2 * CROP_MARGIN
    
    if lung_width / lung_height > target_aspect:
        crop_width = lung_width
        crop_height = int(crop_width / target_aspect)
    else:
        crop_height = lung_height
        crop_width = int(crop_height * target_aspect)
    
    # Center crop on lungs
    crop_x_min = max(0, lung_center_x - crop_width // 2)
    crop_y_min = max(0, lung_center_y - crop_height // 2)
    crop_x_max = min(w, crop_x_min + crop_width)
    crop_y_max = min(h, crop_y_min + crop_height)
    
    # Adjust if hitting boundaries
    if crop_x_max == w:
        crop_x_min = w - crop_width
    if crop_y_max == h:
        crop_y_min = h - crop_height
    
    crop_x_min = max(0, crop_x_min)
    crop_y_min = max(0, crop_y_min)
    
    # Crop image
    if len(image.shape) == 3:
        cropped = image[crop_y_min:crop_y_max, crop_x_min:crop_x_max, :]
    else:
        cropped = image[crop_y_min:crop_y_max, crop_x_min:crop_x_max]
    
    return cropped, (crop_x_min, crop_y_min, crop_x_max, crop_y_max)

def save_simple_masks(image, binary_mask, crop_bounds, file_id):
    """Save simple mask and overlay"""
    if not SAVE_MASKS:
        return
    
    os.makedirs(MASKS_PATH, exist_ok=True)
    
    # Save binary mask
    mask_path = os.path.join(MASKS_PATH, f"{file_id}_mask.png")
    mask_image = (binary_mask * 255).astype(np.uint8)
    cv2.imwrite(mask_path, mask_image)
    
    # Save simple overlay
    overlay_path = os.path.join(MASKS_PATH, f"{file_id}_overlay.png")
    overlay = image.copy()
    if len(overlay.shape) == 2:
        overlay = cv2.cvtColor(overlay, cv2.COLOR_GRAY2RGB)
    
    # Darken areas outside crop
    crop_x_min, crop_y_min, crop_x_max, crop_y_max = crop_bounds
    mask = np.ones(overlay.shape[:2], dtype=bool)
    mask[crop_y_min:crop_y_max, crop_x_min:crop_x_max] = False
    overlay[mask] = (overlay[mask] * 0.5).astype(np.uint8)
    
    # Add lung contours in red
    contours, _ = cv2.findContours(binary_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    cv2.drawContours(overlay, contours, -1, (0, 0, 255), 2)
    
    # Add crop rectangle in cyan
    cv2.rectangle(overlay, (crop_x_min, crop_y_min), (crop_x_max, crop_y_max), (255, 255, 0), 2)
    
    cv2.imwrite(overlay_path, overlay)

print("Simple processing functions loaded")


In [None]:
# Load CSV and download files
try:
    user_info = a3conn.getUserInfos()
    print(f"ArchiMed user: {user_info}")
    
    # Load CSV
    csv_path = os.path.join(CSV_FOLDER, CSV_LABELS_FILE)
    df = pd.read_csv(csv_path, sep=CSV_SEPARATOR)
    print(f"Loaded CSV with {len(df)} rows")
    
    # Find FileID column
    file_id_column = None
    for col in ['FileID', 'file_id', 'File_ID']:
        if col in df.columns:
            file_id_column = col
            break
    
    if file_id_column is None:
        raise ValueError("FileID column not found")
    
    # Get file IDs
    file_ids = df[file_id_column].dropna().unique()
    total_files = len(file_ids)
    
    print(f"Starting download of {total_files} files")
    
    # Download files
    downloaded_files = []
    os.makedirs(DOWNLOAD_PATH, exist_ok=True)
    
    for i, file_id in enumerate(file_ids):
        progress = ((i + 1) / total_files) * 100
        file_id_str = str(file_id)
        print(f"Downloading {file_id_str} ({progress:.1f}% - {i+1}/{total_files})")
        
        dicom_file_path = os.path.join(DOWNLOAD_PATH, f"{file_id}.dcm")
        
        if os.path.exists(dicom_file_path):
            print(f"File {file_id} already exists, skipping")
            downloaded_files.append(dicom_file_path)
            continue
        
        try:
            result = a3conn.downloadFile(
                int(file_id_str),
                asStream=False,
                destDir=DOWNLOAD_PATH,
                filename=f"{file_id_str}.dcm",
                inWorklist=False
            )
            
            if result and os.path.exists(dicom_file_path):
                downloaded_files.append(dicom_file_path)
                print(f"✅ Downloaded: {dicom_file_path}")
            else:
                print(f"⚠️ Download unclear for {file_id_str}")
        except Exception as e:
            print(f"⚠️ Failed to download {file_id_str}: {e}")
    
    print(f"Downloaded {len(downloaded_files)} files successfully")
    
except Exception as e:
    print(f"Setup failed: {e}")
    downloaded_files = []


In [None]:
# Convert DICOM files with simplified processing
def convert_dicom_simple(dicom_path, output_path):
    """Simple DICOM conversion with lung segmentation and clear area cropping"""
    try:
        file_id = os.path.splitext(os.path.basename(dicom_path))[0]
        
        # Read DICOM
        dicom_data = pydicom.dcmread(dicom_path)
        image_array = dicom_data.pixel_array
        
        # Handle photometric interpretation
        if hasattr(dicom_data, 'PhotometricInterpretation'):
            if dicom_data.PhotometricInterpretation == 'MONOCHROME1':
                image_array = np.max(image_array) - image_array
        
        # Normalize to 0-255
        if image_array.max() > 255:
            image_array = ((image_array - image_array.min()) / 
                          (image_array.max() - image_array.min()) * 255).astype(np.uint8)
        else:
            image_array = image_array.astype(np.uint8)
        
        print(f"Processing {file_id}: {image_array.shape}")
        
        # Lung segmentation
        binary_mask = simple_lung_segmentation(image_array)
        
        # Check if segmentation found reasonable lung area
        lung_ratio = np.sum(binary_mask) / binary_mask.size
        if lung_ratio < 0.01 or lung_ratio > 0.8:
            print(f"⚠️ Unusual lung ratio {lung_ratio:.3f}, using original image")
            processed_image = image_array
            crop_bounds = (0, 0, image_array.shape[1], image_array.shape[0])
        else:
            # Crop to clear area
            processed_image, crop_bounds = crop_to_clear_area(image_array, binary_mask)
            print(f"✅ Cropped to clear area: {processed_image.shape}")
        
        # Save masks
        save_simple_masks(image_array, binary_mask, crop_bounds, file_id)
        
        # Convert to PIL and resize
        if len(processed_image.shape) == 2:
            pil_image = Image.fromarray(processed_image, mode='L')
        else:
            pil_image = Image.fromarray(processed_image)
        
        # Maintain aspect ratio by cropping then resizing
        current_width, current_height = pil_image.size
        target_width, target_height = TARGET_SIZE
        
        current_ratio = current_width / current_height
        target_ratio = target_width / target_height
        
        if current_ratio > target_ratio:
            # Crop width
            new_width = int(current_height * target_ratio)
            left = (current_width - new_width) // 2
            pil_image = pil_image.crop((left, 0, left + new_width, current_height))
        else:
            # Crop height
            new_height = int(current_width / target_ratio)
            top = (current_height - new_height) // 2
            pil_image = pil_image.crop((0, top, current_width, top + new_height))
        
        # Final resize
        pil_image = pil_image.resize(TARGET_SIZE, Image.Resampling.LANCZOS)
        
        # Save
        os.makedirs(os.path.dirname(output_path), exist_ok=True)
        pil_image.save(output_path)
        
        return True
        
    except Exception as e:
        print(f"❌ Failed to convert {dicom_path}: {e}")
        return False

# Convert all downloaded files
if CONVERT and downloaded_files:
    print(f"Converting {len(downloaded_files)} DICOM files")
    
    os.makedirs(IMAGES_PATH, exist_ok=True)
    converted_count = 0
    
    for dicom_path in tqdm(downloaded_files, desc="Converting"):
        file_id = os.path.splitext(os.path.basename(dicom_path))[0]
        output_path = os.path.join(IMAGES_PATH, f"{file_id}.png")
        
        if convert_dicom_simple(dicom_path, output_path):
            converted_count += 1
    
    print(f"✅ Successfully converted {converted_count}/{len(downloaded_files)} files")
    print(f"Images saved to: {IMAGES_PATH}")
    if SAVE_MASKS:
        print(f"Masks saved to: {MASKS_PATH}")
    print("🎉 V1.4 Simplified processing complete!")
else:
    print("⚠️ No files to convert or conversion disabled")
