In [2]:
# ============================================================================
# CELL 0: AUTO JSON NORMALIZATION & VALIDATION
# ============================================================================
"""
Bu cell:
1. data/X/ klas√∂rlerindeki T√úM JSON dosyalarƒ±nƒ± bulur
2. Formatƒ± kontrol eder
3. Otomatik d√ºzeltir
4. data/X_normalized/ klas√∂r√ºne kaydeder
5. Hata raporu verir
"""

import json
from pathlib import Path
from copy import deepcopy

# ============================================================================
# NORMALIZATION RULES
# ============================================================================

CANONICAL_ENTITY = {
    "abnormality": None,
    "finding": None,
    "presence": "unknown",
    "location": [],
    "degree": [],
    "measurement": None,
    "comparison": None
}

def normalize_entity(entity: dict) -> dict:
    """Normalize a single entity"""
    normalized = deepcopy(CANONICAL_ENTITY)
    
    for key in normalized:
        if key not in entity:
            continue
        
        value = entity[key]
        
        # "None" string ‚Üí None
        if isinstance(value, str) and value.lower() == "none":
            value = None
        
        # Empty string ‚Üí None
        if isinstance(value, str) and value.strip() == "":
            value = None
        
        # location & degree MUST be list
        if key in ("location", "degree"):
            if value is None:
                value = []
            elif isinstance(value, str):
                value = [value] if value.strip() else []
            elif not isinstance(value, list):
                value = [str(value)]
            # Remove "None" from lists
            value = [v for v in value if str(v).lower() != "none"]
        
        # presence normalize
        if key == "presence":
            if value is None:
                value = "unknown"
            elif isinstance(value, str):
                value = value.lower()
                if value not in ("present", "absent", "uncertain"):
                    value = "unknown"
        
        normalized[key] = value
    
    return normalized

def normalize_json(data: dict) -> dict:
    """Normalize entire JSON structure"""
    
    # Handle array format [{"instruction": ...}]
    if isinstance(data, list):
        if len(data) > 0:
            data = data[0]
        else:
            raise ValueError("Empty array")
    
    # Ensure required fields
    if "output" not in data:
        raise ValueError("Missing 'output' field")
    
    normalized_output = []
    for ent in data.get("output", []):
        normalized_output.append(normalize_entity(ent))
    
    return {
        "instruction": data.get("instruction", "").strip() or 
                      "Extract medical entities from the given radiology report snippet and format them into the specified JSON schema. Pay attention to negations and normal anatomy.",
        "input": data.get("input", "").strip(),
        "output": normalized_output
    }

# ============================================================================
# AUTO-DISCOVER & NORMALIZE ALL DIRECTORIES
# ============================================================================

def auto_normalize_all(base_dir: str = "data"):
    """
    Automatically find and normalize all data directories
    """
    base_path = Path(base_dir)
    
    if not base_path.exists():
        print(f"‚ùå Base directory not found: {base_dir}")
        return
    
    # Find all numbered directories (0, 1, 2, ...)
    data_dirs = [d for d in base_path.iterdir() 
                 if d.is_dir() and d.name.isdigit()]
    
    print("="*70)
    print("AUTO JSON NORMALIZATION")
    print("="*70)
    print(f"\nBase directory: {base_dir}")
    print(f"Found {len(data_dirs)} data directories: {[d.name for d in data_dirs]}")
    
    total_processed = 0
    total_errors = 0
    
    for data_dir in sorted(data_dirs):
        print(f"\n{'='*70}")
        print(f"Processing: {data_dir}")
        print(f"{'='*70}")
        
        # Create normalized output directory
        output_dir = base_path / f"{data_dir.name}_normalized"
        output_dir.mkdir(exist_ok=True)
        
        # Find all JSON files (gt*.json and sample*.json)
        json_files = list(data_dir.glob("gt*.json")) + list(data_dir.glob("sample*.json"))
        
        if not json_files:
            print(f"‚ö† No JSON files found in {data_dir}")
            continue
        
        print(f"Found {len(json_files)} JSON files")
        
        for json_file in sorted(json_files):
            try:
                # Load
                with open(json_file, 'r', encoding='utf-8') as f:
                    raw_data = json.load(f)
                
                # Normalize
                normalized = normalize_json(raw_data)
                
                # Save
                output_file = output_dir / json_file.name
                with open(output_file, 'w', encoding='utf-8') as f:
                    json.dump(normalized, f, indent=2, ensure_ascii=False)
                
                print(f"  ‚úÖ {json_file.name} ‚Üí {output_dir.name}/{json_file.name}")
                total_processed += 1
                
            except Exception as e:
                print(f"  ‚ùå {json_file.name}: {e}")
                total_errors += 1
    
    print(f"\n{'='*70}")
    print("NORMALIZATION SUMMARY")
    print(f"{'='*70}")
    print(f"Total processed: {total_processed}")
    print(f"Total errors: {total_errors}")
    
    if total_errors == 0:
        print("\n‚úÖ ALL FILES NORMALIZED SUCCESSFULLY!")
    else:
        print(f"\n‚ö† {total_errors} files had errors")
    
    return total_processed, total_errors

# ============================================================================
# RUN AUTO-NORMALIZATION
# ============================================================================

print("üîÑ Starting auto-normalization...\n")
processed, errors = auto_normalize_all("data")

if errors == 0 and processed > 0:
    print("\n" + "="*70)
    print("‚úÖ READY FOR EVALUATION!")
    print("="*70)
    print("\nNormalized directories:")
    for i in range(10):  # Check 0-9
        norm_dir = Path(f"data/{i}_normalized")
        if norm_dir.exists():
            files = list(norm_dir.glob("*.json"))
            print(f"  data/{i}_normalized/: {len(files)} files")


üîÑ Starting auto-normalization...

AUTO JSON NORMALIZATION

Base directory: data
Found 10 data directories: ['1', '2', '3', '4', '5', '6', '0', '7', '8', '9']

Processing: data/0
Found 6 JSON files
  ‚úÖ gt0.json ‚Üí 0_normalized/gt0.json
  ‚úÖ sample0.0.json ‚Üí 0_normalized/sample0.0.json
  ‚úÖ sample0.1.json ‚Üí 0_normalized/sample0.1.json
  ‚úÖ sample0.2.json ‚Üí 0_normalized/sample0.2.json
  ‚úÖ sample0.3.json ‚Üí 0_normalized/sample0.3.json
  ‚úÖ sample0.4.json ‚Üí 0_normalized/sample0.4.json

Processing: data/1
‚ö† No JSON files found in data/1

Processing: data/2
‚ö† No JSON files found in data/2

Processing: data/3
‚ö† No JSON files found in data/3

Processing: data/4
‚ö† No JSON files found in data/4

Processing: data/5
‚ö† No JSON files found in data/5

Processing: data/6
‚ö† No JSON files found in data/6

Processing: data/7
‚ö† No JSON files found in data/7

Processing: data/8
‚ö† No JSON files found in data/8

Processing: data/9
‚ö† No JSON files found in data/9

NORMALI

In [1]:
# ============================================================================
# comprehensive_evaluation.ipynb - CELL 1
# ============================================================================

from comprehensive_evaluation import ComprehensiveMultiModelEvaluator
    
# API Keys
API_KEYS = {
    "gemini": "AIzaSyDKfk3iyWUilm8SU-f70PSRjo9etZBxrDk",
    "gemma": "KGAT_7b8482384bb20717b1fa8b9c914ff365",
    "glm": "sk-t80kLqA1bkLIoTi0x0vjmno3-gbMvrX3A44SOh4QWHRpiYJvMeOTpUOScAAWzOPzpDxC8AyC0KPdgaqHrn_5RPa_RhY_",
    "deepseek": "sk-450186e490b34beb8347badc0fa91e6b",
}

# Configuration
DATA_DIR = "./data/0_normalized/"
OUTPUT_DIR = "./data/0_normalized/ulti_comp_results/"
GT_FILE = "gt0.json"

# Model selection (ba≈ülangƒ±√ß i√ßin az sayƒ±da)
SELECTED_LLMS = [
    "gemini_flash",      # Hƒ±zlƒ± test i√ßin
    # "gemini_pro",      # Kalite i√ßin (uncomment)
    # "gemma",           # A√ßƒ±k kaynak (uncomment)
]

SELECTED_EMBEDDINGS = [
    "general_baseline",  # Baseline
    "pubmedbert",     # PubMed (uncomment)
    "s_pubmedbert",   # Clinical (uncomment)
    "neuml_pubmedbert", # NeuML (uncomment)
]


# Run modes se√ß
RUN_MODES = [
    'structural',  # Baseline
    'embedding',   # Sadece embedding
    'llm',       # Sadece LLM (uncomment)
    'combined'   # Full pipeline (uncomment - EN UZUN S√úRER)
]



# Initialize
evaluator = ComprehensiveMultiModelEvaluator(
    api_keys=API_KEYS,
    data_dir=DATA_DIR,
    output_base_dir=OUTPUT_DIR,
    selected_llms=SELECTED_LLMS,
    selected_embeddings=SELECTED_EMBEDDINGS
)




# RUN
results = evaluator.run_full_evaluation(
    gt_file=GT_FILE,
    run_modes=RUN_MODES
)

print("\n‚úÖ EVALUATION COMPLETE!")
print(f"Results saved to: {OUTPUT_DIR}/")

  from .autonotebook import tqdm as notebook_tqdm


COMPREHENSIVE MULTI-MODEL EVALUATOR

Data directory: data/0_normalized
Output directory: data/0_normalized/ulti_comp_results

LLM models to test: 1
  - gemini_flash: models/gemini-2.5-flash

Embedding models to test: 4
  - general_baseline: sentence-transformers/all-MiniLM-L6-v2
  - pubmedbert: microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext
  - s_pubmedbert: pritamdeka/S-PubMedBert-MS-MARCO
  - neuml_pubmedbert: NeuML/pubmedbert-base-embeddings

‚ö†Ô∏è  Total combinations: 10

LOADING DATA
Ground truth: gt0.json
Samples: 5

MODE 1: STRUCTURAL EVALUATION ONLY
  sample0.0.json: 0.540
  sample0.1.json: 0.270
  sample0.2.json: 0.500
  sample0.3.json: 0.530
  sample0.4.json: 0.902
    üíæ Saved to: data/0_normalized/ulti_comp_results/structural_only/

MODE 2: EMBEDDING MODELS EVALUATION

--- Testing: general_baseline ---
    Loading: sentence-transformers/all-MiniLM-L6-v2...


Loading weights: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 103/103 [00:00<00:00, 680.08it/s, Materializing param=pooler.dense.weight]                             


    ‚úÖ Loaded successfully
  sample0.0.json: struct=0.540, sem=0.786
  sample0.1.json: struct=0.270, sem=0.226
  sample0.2.json: struct=0.500, sem=0.919
  sample0.3.json: struct=0.530, sem=0.681
  sample0.4.json: struct=0.902, sem=0.926
    üíæ Saved to: data/0_normalized/ulti_comp_results/embedding_general_baseline/

--- Testing: pubmedbert ---
    Loading: microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext...


No sentence-transformers model found with name microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext. Creating a new one with mean pooling.
Loading weights: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 199/199 [00:00<00:00, 627.30it/s, Materializing param=pooler.dense.weight]                               


    ‚úÖ Loaded successfully
  sample0.0.json: struct=0.540, sem=0.984
  sample0.1.json: struct=0.270, sem=0.966
  sample0.2.json: struct=0.500, sem=0.992
  sample0.3.json: struct=0.530, sem=0.995
  sample0.4.json: struct=0.902, sem=0.995
    üíæ Saved to: data/0_normalized/ulti_comp_results/embedding_pubmedbert/

--- Testing: s_pubmedbert ---
    Loading: pritamdeka/S-PubMedBert-MS-MARCO...


Loading weights: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 199/199 [00:00<00:00, 746.21it/s, Materializing param=pooler.dense.weight]                               


    ‚úÖ Loaded successfully
  sample0.0.json: struct=0.540, sem=0.970
  sample0.1.json: struct=0.270, sem=0.896
  sample0.2.json: struct=0.500, sem=0.969
  sample0.3.json: struct=0.530, sem=0.975
  sample0.4.json: struct=0.902, sem=0.988
    üíæ Saved to: data/0_normalized/ulti_comp_results/embedding_s_pubmedbert/

--- Testing: neuml_pubmedbert ---
    Loading: NeuML/pubmedbert-base-embeddings...


Loading weights: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 199/199 [00:00<00:00, 785.82it/s, Materializing param=pooler.dense.weight]                               


    ‚úÖ Loaded successfully
  sample0.0.json: struct=0.540, sem=0.791
  sample0.1.json: struct=0.270, sem=0.056
  sample0.2.json: struct=0.500, sem=0.888
  sample0.3.json: struct=0.530, sem=0.818
  sample0.4.json: struct=0.902, sem=0.964
    üíæ Saved to: data/0_normalized/ulti_comp_results/embedding_neuml_pubmedbert/

MODE 3: LLM MODELS EVALUATION

--- Testing: gemini_flash ---
‚úì gemini modeli ba≈ülatƒ±ldƒ±: models/gemini-2.5-flash
  Rate limit: Her istek arasƒ± 1.0 saniye bekleme
  sample0.0.json: struct=0.540, llm=0.400
  sample0.1.json: struct=0.270, llm=0.000
  sample0.2.json: struct=0.500, llm=0.000
  sample0.3.json: struct=0.530, llm=0.100
  sample0.4.json: struct=0.902, llm=0.700
    üíæ Saved to: data/0_normalized/ulti_comp_results/llm_gemini_flash/

MODE 4: COMBINED EVALUATION (Embedding + LLM)
Total combinations: 4

[1/4] general_baseline + gemini_flash
    Loading: sentence-transformers/all-MiniLM-L6-v2...


Loading weights: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 103/103 [00:00<00:00, 946.12it/s, Materializing param=pooler.dense.weight]                             


    ‚úÖ Loaded successfully
‚úì gemini modeli ba≈ülatƒ±ldƒ±: models/gemini-2.5-flash
  Rate limit: Her istek arasƒ± 1.0 saniye bekleme
  sample0.0.json: struct=0.540, sem=0.786, llm=0.350
  sample0.1.json: struct=0.270, sem=0.226, llm=0.000
  sample0.2.json: struct=0.500, sem=0.919, llm=0.000
  sample0.3.json: struct=0.530, sem=0.681, llm=0.333
  sample0.4.json: struct=0.902, sem=0.926, llm=0.900
    üíæ Saved to: data/0_normalized/ulti_comp_results/combined_general_baseline_gemini_flash/

[2/4] pubmedbert + gemini_flash
    Loading: microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext...


No sentence-transformers model found with name microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext. Creating a new one with mean pooling.
Loading weights: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 199/199 [00:00<00:00, 828.41it/s, Materializing param=pooler.dense.weight]                               


    ‚úÖ Loaded successfully
‚úì gemini modeli ba≈ülatƒ±ldƒ±: models/gemini-2.5-flash
  Rate limit: Her istek arasƒ± 1.0 saniye bekleme
  sample0.0.json: struct=0.540, sem=0.984, llm=0.400
  sample0.1.json: struct=0.270, sem=0.966, llm=0.000
  sample0.2.json: struct=0.500, sem=0.992, llm=0.000
  sample0.3.json: struct=0.530, sem=0.995, llm=0.100
  sample0.4.json: struct=0.902, sem=0.995, llm=0.920
    üíæ Saved to: data/0_normalized/ulti_comp_results/combined_pubmedbert_gemini_flash/

[3/4] s_pubmedbert + gemini_flash
    Loading: pritamdeka/S-PubMedBert-MS-MARCO...


Loading weights: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 199/199 [00:00<00:00, 956.37it/s, Materializing param=pooler.dense.weight]                                


    ‚úÖ Loaded successfully
‚úì gemini modeli ba≈ülatƒ±ldƒ±: models/gemini-2.5-flash
  Rate limit: Her istek arasƒ± 1.0 saniye bekleme
  sample0.0.json: struct=0.540, sem=0.970, llm=0.600
  sample0.1.json: struct=0.270, sem=0.896, llm=0.000
  sample0.2.json: struct=0.500, sem=0.969, llm=0.100
  sample0.3.json: struct=0.530, sem=0.975, llm=0.100
  sample0.4.json: struct=0.902, sem=0.988, llm=1.000
    üíæ Saved to: data/0_normalized/ulti_comp_results/combined_s_pubmedbert_gemini_flash/

[4/4] neuml_pubmedbert + gemini_flash
    Loading: NeuML/pubmedbert-base-embeddings...


Loading weights: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 199/199 [00:00<00:00, 833.49it/s, Materializing param=pooler.dense.weight]                               


    ‚úÖ Loaded successfully
‚úì gemini modeli ba≈ülatƒ±ldƒ±: models/gemini-2.5-flash
  Rate limit: Her istek arasƒ± 1.0 saniye bekleme
  sample0.0.json: struct=0.540, sem=0.791, llm=0.400
  sample0.1.json: struct=0.270, sem=0.056, llm=0.000
  sample0.2.json: struct=0.500, sem=0.888, llm=0.000
  sample0.3.json: struct=0.530, sem=0.818, llm=0.390
  sample0.4.json: struct=0.902, sem=0.964, llm=0.950
    üíæ Saved to: data/0_normalized/ulti_comp_results/combined_neuml_pubmedbert_gemini_flash/

‚úÖ FINAL REPORT: data/0_normalized/ulti_comp_results/FINAL_COMPARISON_REPORT.txt

‚úÖ EVALUATION COMPLETE!
Results saved to: ./data/0_normalized/ulti_comp_results//
