In [1]:
import os
import glob
import torch
import pandas as pd
import numpy as np
import random
from PIL import Image
from tqdm.auto import tqdm
from torch.utils.data import Dataset, DataLoader
import evaluate
from transformers import SegformerImageProcessor, SegformerForSemanticSegmentation
from collections import defaultdict
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime

# GPU Kontrol√º
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"üî• Cihaz: {device}")
if device == "cuda":
    print(f"üß† Ekran Kartƒ±: {torch.cuda.get_device_name(0)}")

# GELI≈ûMI≈û AYARLAR (Eƒüitim filtreleme ve dengeli test seti i√ßin)
CONFIG = {
    # Temel Ayarlar
    'root_dir': r"C:\AI_DATA\SEMI_TRUTHS_extracted",
    'model_path': "./segformer_b4_stable/final_best_model",
    'batch_size': 16,
    
    # YENI: Eƒüitim verileri (filtreleme i√ßin)
    'train_csv_path': r"segformer_b4_stable\segmentation_dataset_balanced.csv",
    
    # YENI: Test CSV dosyalarƒ± (t√ºm√º kullanƒ±lacak)
    'test_csv_paths': [
        r"dataset_splits\fake_only_split\fake_test.csv",
        r"dataset_splits\fake_only_split\fake_train.csv",
        r"dataset_splits\fake_only_split\fake_val.csv"
    ],
    
    # YENI: Dengeleme Modu
    'balancing_mode': 'fixed_number',  # Se√ßenekler: 'fixed_number' veya 'percentage'
    'samples_per_magnitude_class': 50,  # balancing_mode='fixed_number' i√ßin
    'percentage_to_keep': 70,  # balancing_mode='percentage' i√ßin (0-100 arasƒ±)
    
    # YENI: Yol D√∂n√º≈üt√ºrme Ayarlarƒ±
    'training_path_prefix': r"C:\Users\DeepLab\Desktop\Grup-17\SEMI_TRUTHS-extracted",
    'test_path_prefix': r"C:\AI_DATA\SEMI_TRUTHS_extracted",
    
    # YENI: CSV Export & Visualization Settings
    'save_results': True,  # Enable/disable CSV export
    'output_dir': 'segformer_test_results',  # Base output directory
    'generate_visualizations': True,  # Enable/disable plots
}

# Yapƒ±landƒ±rma Doƒürulama
print("\n" + "="*70)
print("‚öôÔ∏è YAPILANDIRMA KONTROL√ú")
print("="*70)

if CONFIG['balancing_mode'] not in ['fixed_number', 'percentage']:
    raise ValueError("‚ùå balancing_mode 'fixed_number' veya 'percentage' olmalƒ±!")

if CONFIG['balancing_mode'] == 'fixed_number' and CONFIG['samples_per_magnitude_class'] <= 0:
    raise ValueError("‚ùå samples_per_magnitude_class pozitif bir sayƒ± olmalƒ±!")

if CONFIG['balancing_mode'] == 'percentage' and not (0 < CONFIG['percentage_to_keep'] <= 100):
    raise ValueError("‚ùå percentage_to_keep 0-100 arasƒ±nda olmalƒ±!")

print(f"‚úÖ Dengeleme Modu: {CONFIG['balancing_mode']}")
if CONFIG['balancing_mode'] == 'fixed_number':
    print(f"   üìä Hedef: Her magnitude sƒ±nƒ±fƒ±ndan {CONFIG['samples_per_magnitude_class']:,} g√∂rsel")
else:
    print(f"   üìä Hedef: Kalan verilerin %{CONFIG['percentage_to_keep']}'i")
print(f"   üìÇ Veri dizini: {CONFIG['root_dir']}")
print(f"   üíæ Sonu√ßlarƒ± kaydet: {CONFIG['save_results']}")
print(f"   üìä G√∂rselle≈ütirme: {CONFIG['generate_visualizations']}")
print("="*70)

  torch.utils._pytree._register_pytree_node(


üî• Cihaz: cuda
üß† Ekran Kartƒ±: NVIDIA GeForce RTX 4060 Laptop GPU

‚öôÔ∏è YAPILANDIRMA KONTROL√ú
‚úÖ Dengeleme Modu: fixed_number
   üìä Hedef: Her magnitude sƒ±nƒ±fƒ±ndan 50 g√∂rsel
   üìÇ Veri dizini: C:\AI_DATA\SEMI_TRUTHS_extracted
   üíæ Sonu√ßlarƒ± kaydet: True
   üìä G√∂rselle≈ütirme: True


In [2]:
print("=" * 70)
print("üöÄ GELƒ∞≈ûMƒ∞≈û TEST VERƒ∞Sƒ∞ HAZIRLAMA (Fƒ∞LTRELEME & DENGELEME)")
print("=" * 70)

# ==============================================================================
# FAZ 1: TEST CSV DOSYALARINI Y√úKLE VE Bƒ∞RLE≈ûTƒ∞R
# ==============================================================================
print("\nüìÇ FAZ 1: Test CSV dosyalarƒ± y√ºkleniyor ve birle≈ütiriliyor...")

combined_test_dfs = []
total_original_samples = 0

for csv_path in CONFIG['test_csv_paths']:
    if not os.path.exists(csv_path):
        print(f"‚ö†Ô∏è  Uyarƒ±: {csv_path} bulunamadƒ±, atlanƒ±yor...")
        continue
    
    df = pd.read_csv(csv_path)
    combined_test_dfs.append(df)
    total_original_samples += len(df)
    print(f"   ‚úì Y√ºklendi: {os.path.basename(csv_path)} ({len(df):,} √∂rnek)")

if not combined_test_dfs:
    raise FileNotFoundError("‚ùå Test CSV dosyasƒ± bulunamadƒ±!")

df_combined = pd.concat(combined_test_dfs, ignore_index=True)
print(f"\n‚úÖ Birle≈ütirilmi≈ü test verisi: {len(df_combined):,} √∂rnek")

# Gerekli s√ºtunlarƒ± kontrol et
required_cols = ['perturbed_img_id', 'fake_img_path', 'sem_magnitude', 'mask_id']
missing_cols = [col for col in required_cols if col not in df_combined.columns]
if missing_cols:
    raise ValueError(f"‚ùå Eksik s√ºtunlar: {missing_cols}")

# ==============================================================================
# FAZ 2: Eƒûƒ∞Tƒ∞M VERƒ∞LERƒ∞Nƒ∞ Y√úKLE VE DI≈ûLAMA SETƒ∞ OLU≈ûTUR
# ==============================================================================
print("\nüìÇ FAZ 2: Eƒüitim verileri y√ºkleniyor (filtreleme i√ßin)...")

if not os.path.exists(CONFIG['train_csv_path']):
    raise FileNotFoundError(f"‚ùå Eƒüitim CSV bulunamadƒ±: {CONFIG['train_csv_path']}")

df_train = pd.read_csv(CONFIG['train_csv_path'])
print(f"   ‚úì Eƒüitim CSV y√ºklendi: {len(df_train):,} √∂rnek")

# Eƒüitim yollarƒ±nƒ± d√∂n√º≈üt√ºr ve dosya adlarƒ±nƒ± √ßƒ±kar
training_filenames = set()
path_conversions_checked = 0

for train_path in df_train['image_path']:
    # Yol d√∂n√º≈ü√ºm√º: eƒüitim prefix'ini test prefix'i ile deƒüi≈ütir
    converted_path = str(train_path).replace(
        CONFIG['training_path_prefix'], 
        CONFIG['test_path_prefix']
    )
    
    # Sadece dosya adƒ±nƒ± al (√∂rn: "image.png")
    filename = os.path.basename(converted_path)
    training_filenames.add(filename)
    
    # ƒ∞lk birka√ß d√∂n√º≈ü√ºm√º g√∂ster (debug i√ßin)
    if path_conversions_checked < 3:
        print(f"   üîÑ Yol d√∂n√º≈ü√ºm √∂rneƒüi:")
        print(f"      Eƒüitim: {train_path}")
        print(f"      D√∂n√º≈üt√ºr√ºlm√º≈ü: {converted_path}")
        print(f"      Dosya adƒ±: {filename}")
        path_conversions_checked += 1

print(f"\n‚úÖ Eƒüitim dƒ±≈ülama seti: {len(training_filenames):,} benzersiz dosya adƒ±")

# ==============================================================================
# FAZ 3: Eƒûƒ∞Tƒ∞M G√ñRSELLERƒ∞Nƒ∞ Fƒ∞LTRELE
# ==============================================================================
print("\nüîç FAZ 3: Eƒüitim g√∂rselleri filtreleniyor...")

# Test verisinden dosya adlarƒ±nƒ± √ßƒ±kar
df_combined['filename'] = df_combined['fake_img_path'].apply(lambda x: os.path.basename(str(x)))

# Filtrele: eƒüitim setinde OLMAYAN g√∂rselleri tut
df_filtered = df_combined[~df_combined['filename'].isin(training_filenames)].copy()

removed_count = len(df_combined) - len(df_filtered)
print(f"   ‚úó √áƒ±karƒ±ldƒ±: {removed_count:,} eƒüitim g√∂rseli")
print(f"   ‚úì Kalan: {len(df_filtered):,} test i√ßin √∂rnek")

# Ge√ßici s√ºtunu temizle
df_filtered = df_filtered.drop(columns=['filename'])

# ==============================================================================
# FAZ 4: SEM_MAGNITUDE DAƒûILIMINI ANALƒ∞Z ET
# ==============================================================================
print("\nüìä FAZ 4: sem_magnitude daƒüƒ±lƒ±mƒ± analiz ediliyor...")

magnitude_groups = df_filtered.groupby('sem_magnitude')
magnitude_counts = df_filtered['sem_magnitude'].value_counts().to_dict()

print("\n   Dengeleme √ñNCESƒ∞ daƒüƒ±lƒ±m:")
for mag in ['small', 'medium', 'large']:
    count = magnitude_counts.get(mag, 0)
    percentage = (count / len(df_filtered) * 100) if len(df_filtered) > 0 else 0
    print(f"   ‚Ä¢ {mag:8s}: {count:6,} √∂rnek ({percentage:5.2f}%)")

# √ú√ß sƒ±nƒ±fƒ±n da var olduƒüunu doƒürula
missing_classes = [mag for mag in ['small', 'medium', 'large'] if mag not in magnitude_counts]
if missing_classes:
    raise ValueError(f"‚ùå Eksik sem_magnitude sƒ±nƒ±flarƒ±: {missing_classes}")

# ==============================================================================
# FAZ 5: SINIFLARI DENGELE
# ==============================================================================
print(f"\n‚öñÔ∏è  FAZ 5: Sƒ±nƒ±flar dengeleniyor (mod: {CONFIG['balancing_mode']})...")

random.seed(42)  # Tekrarlanabilirlik i√ßin
balanced_samples = []

if CONFIG['balancing_mode'] == 'fixed_number':
    # SE√áENEK 1: Her sƒ±nƒ±ftan sabit sayƒ±
    target_per_class = CONFIG['samples_per_magnitude_class']
    print(f"   Hedef: Her sƒ±nƒ±ftan {target_per_class:,} √∂rnek\n")
    
    for mag in ['small', 'medium', 'large']:
        class_data = magnitude_groups.get_group(mag)
        available = len(class_data)
        
        if available >= target_per_class:
            # Yeterli √∂rnek var: hedef kadar rastgele se√ß
            sampled = class_data.sample(n=target_per_class, random_state=42)
            balanced_samples.append(sampled)
            print(f"   ‚úì {mag:8s}: {available:,} mevcut √∂rnekten {target_per_class:,} se√ßildi")
        else:
            # Yeterli √∂rnek yok: hepsini al ve uyar
            balanced_samples.append(class_data)
            print(f"   ‚ö†Ô∏è  {mag:8s}: sadece {available:,} mevcut (hedef {target_per_class:,}), HEPSƒ∞ alƒ±nƒ±yor")

elif CONFIG['balancing_mode'] == 'percentage':
    # SE√áENEK 2: Her sƒ±nƒ±ftan y√ºzde olarak
    percentage = CONFIG['percentage_to_keep']
    print(f"   Hedef: Her sƒ±nƒ±fƒ±n %{percentage}'i\n")
    
    for mag in ['small', 'medium', 'large']:
        class_data = magnitude_groups.get_group(mag)
        available = len(class_data)
        target = int(available * percentage / 100)
        target = max(1, target)  # En az 1 √∂rnek
        
        sampled = class_data.sample(n=target, random_state=42)
        balanced_samples.append(sampled)
        print(f"   ‚úì {mag:8s}: {available:,} √∂rnekten {target:,} se√ßildi (%{percentage})")

# ==============================================================================
# FAZ 6: SON DATAFRAME'ƒ∞ OLU≈ûTUR
# ==============================================================================
print("\nüéØ FAZ 6: Son test veri seti olu≈üturuluyor...")

df_test = pd.concat(balanced_samples, ignore_index=True)
df_test = df_test.sample(frac=1, random_state=42).reset_index(drop=True)  # Karƒ±≈ütƒ±r

final_magnitude_counts = df_test['sem_magnitude'].value_counts().to_dict()

print(f"\n‚úÖ SON TEST VERƒ∞ SETƒ∞: {len(df_test):,} √∂rnek")
print("\n   Dengeleme SONRASI daƒüƒ±lƒ±m:")
for mag in ['small', 'medium', 'large']:
    count = final_magnitude_counts.get(mag, 0)
    percentage = (count / len(df_test) * 100) if len(df_test) > 0 else 0
    print(f"   ‚Ä¢ {mag:8s}: {count:6,} √∂rnek ({percentage:5.2f}%)")

# ==============================================================================
# FAZ 7: MASKE VERƒ∞TABANI OLU≈ûTUR VE MASKE YOLLARINI EKLE
# ==============================================================================
print("\nüóÇÔ∏è  FAZ 7: Maske veritabanƒ± olu≈üturuluyor...")

# Root dizinden maskeleri indeksle
mask_db = {}
search_patterns = [
    os.path.join(CONFIG['root_dir'], "**", "masks", "*.png"),
    os.path.join(CONFIG['root_dir'], "**", "masks", "*.jpg"),
    os.path.join(CONFIG['root_dir'], "masks", "**", "*.png"),
    os.path.join(CONFIG['root_dir'], "original", "**", "*.png"),
]

for pattern in search_patterns:
    for m_path in glob.glob(pattern, recursive=True):
        filename = os.path.basename(m_path)
        name_no_ext = os.path.splitext(filename)[0]
        mask_db[name_no_ext] = m_path

print(f"   ‚úì Maske veritabanƒ±: {len(mask_db):,} maske indekslendi")

# Maske yollarƒ±nƒ± ekle
mask_paths = []
missing_masks = 0

for idx, row in tqdm(df_test.iterrows(), total=len(df_test), desc="Maskeler e≈üle≈ütiriliyor"):
    mask_id = str(row['mask_id']).strip()
    
    if mask_id in mask_db:
        mask_paths.append(mask_db[mask_id])
    else:
        # Yedek: perturbed_img_id kullanmayƒ± dene
        img_id = os.path.splitext(str(row['perturbed_img_id']))[0]
        if img_id in mask_db:
            mask_paths.append(mask_db[img_id])
        else:
            mask_paths.append(None)
            missing_masks += 1

df_test['mask_path'] = mask_paths

# Eksik maskeli satƒ±rlarƒ± kaldƒ±r
if missing_masks > 0:
    print(f"   ‚ö†Ô∏è  Eksik maskeler: {missing_masks}, bu √∂rnekler √ßƒ±karƒ±lƒ±yor...")
    df_test = df_test.dropna(subset=['mask_path']).reset_index(drop=True)
    print(f"   ‚úì Maske doƒürulamasƒ±ndan sonra son sayƒ±: {len(df_test):,} √∂rnek")

# ==============================================================================
# FAZ 8: SON DOƒûRULAMA
# ==============================================================================
print("\nüîç FAZ 8: Yollar doƒürulanƒ±yor...")

# Mevcut kodla uyumluluk i√ßin s√ºtun adƒ±nƒ± deƒüi≈ütir
df_test = df_test.rename(columns={'fake_img_path': 'image_path'})

# Birka√ß √∂rneƒüi doƒürula
sample_size = min(5, len(df_test))
validation_passed = 0
validation_failed = 0

for idx in range(sample_size):
    row = df_test.iloc[idx]
    img_exists = os.path.exists(row['image_path'])
    mask_exists = os.path.exists(row['mask_path'])
    
    if img_exists and mask_exists:
        validation_passed += 1
    else:
        validation_failed += 1
        print(f"   ‚ö†Ô∏è  √ñrnek {idx}: G√∂rsel var: {img_exists}, Maske var: {mask_exists}")

print(f"   ‚úì √ñrnek doƒürulama: {validation_passed}/{sample_size} ba≈üarƒ±lƒ±")

if validation_failed > 0:
    print(f"   ‚ö†Ô∏è  Uyarƒ±: {validation_failed} √∂rnekte eksik dosya var")

print("\n" + "=" * 70)
print("‚úÖ TEST VERƒ∞Sƒ∞ HAZIRLAMA TAMAMLANDI!")
print("=" * 70)
print(f"üìä {len(df_test):,} dengeli √∂rnek ile test i√ßin hazƒ±r")
print("   Test fonksiyonunu √ßalƒ±≈ütƒ±rmak i√ßin H√ºcre 4'√º √ßalƒ±≈ütƒ±rabilirsiniz.")
print("=" * 70)

üöÄ GELƒ∞≈ûMƒ∞≈û TEST VERƒ∞Sƒ∞ HAZIRLAMA (Fƒ∞LTRELEME & DENGELEME)

üìÇ FAZ 1: Test CSV dosyalarƒ± y√ºkleniyor ve birle≈ütiriliyor...
   ‚úì Y√ºklendi: fake_test.csv (16,216 √∂rnek)
   ‚úì Y√ºklendi: fake_train.csv (75,154 √∂rnek)
   ‚úì Y√ºklendi: fake_val.csv (16,291 √∂rnek)

‚úÖ Birle≈ütirilmi≈ü test verisi: 107,661 √∂rnek

üìÇ FAZ 2: Eƒüitim verileri y√ºkleniyor (filtreleme i√ßin)...
   ‚úì Eƒüitim CSV y√ºklendi: 60,000 √∂rnek
   üîÑ Yol d√∂n√º≈ü√ºm √∂rneƒüi:
      Eƒüitim: C:\Users\DeepLab\Desktop\Grup-17\SEMI_TRUTHS-extracted\inpainting\CelebAHQ\StableDiffusion_v5\11709_nose_CelebAHQ_StableDiffusion_v5.png
      D√∂n√º≈üt√ºr√ºlm√º≈ü: C:\AI_DATA\SEMI_TRUTHS_extracted\inpainting\CelebAHQ\StableDiffusion_v5\11709_nose_CelebAHQ_StableDiffusion_v5.png
      Dosya adƒ±: 11709_nose_CelebAHQ_StableDiffusion_v5.png
   üîÑ Yol d√∂n√º≈ü√ºm √∂rneƒüi:
      Eƒüitim: C:\Users\DeepLab\Desktop\Grup-17\SEMI_TRUTHS-extracted\inpainting\CityScapes\Kandinsky_2_2\hamburg_000000_066988_instance03

Maskeler e≈üle≈ütiriliyor:   0%|          | 0/150 [00:00<?, ?it/s]

   ‚ö†Ô∏è  Eksik maskeler: 21, bu √∂rnekler √ßƒ±karƒ±lƒ±yor...
   ‚úì Maske doƒürulamasƒ±ndan sonra son sayƒ±: 129 √∂rnek

üîç FAZ 8: Yollar doƒürulanƒ±yor...
   ‚úì √ñrnek doƒürulama: 5/5 ba≈üarƒ±lƒ±

‚úÖ TEST VERƒ∞Sƒ∞ HAZIRLAMA TAMAMLANDI!
üìä 129 dengeli √∂rnek ile test i√ßin hazƒ±r
   Test fonksiyonunu √ßalƒ±≈ütƒ±rmak i√ßin H√ºcre 4'√º √ßalƒ±≈ütƒ±rabilirsiniz.


In [3]:
class TestDataset(Dataset):
    def __init__(self, df, processor):
        self.df = df.reset_index(drop=True)
        self.processor = processor
    
    def __len__(self): return len(self.df)
    
    def __getitem__(self, idx):
        try:
            row = self.df.iloc[idx]
            image = Image.open(row['image_path']).convert("RGB")
            mask = Image.open(row['mask_path']).convert("L")
            
            # Maske Binary (0-1)
            mask_np = np.array(mask)
            mask_np = np.where(mask_np > 0, 1, 0).astype(np.uint8)
            
            encoded = self.processor(images=image, segmentation_maps=mask_np, return_tensors="pt")
            
            return {
                "pixel_values": encoded["pixel_values"].squeeze(),
                "labels": encoded["labels"].squeeze(),
                "sem_magnitude": str(row.get('sem_magnitude', 'Unknown'))
            }
        except:
            return self.__getitem__((idx + 1) % len(self.df))

In [4]:
# ==============================================================================
# PER-IMAGE IoU CALCULATION HELPER
# ==============================================================================

def calculate_per_image_iou(labels, preds):
    """
    Calculate IoU metrics for a single image
    
    Args:
        labels: 1D numpy array of ground truth labels
        preds: 1D numpy array of predictions
    
    Returns:
        fake_iou, bg_iou, mean_iou, pixel_accuracy
    """
    n_classes = 2
    
    # Build confusion matrix for this image
    hist = np.bincount(
        n_classes * labels.astype(int) + preds.astype(int),
        minlength=n_classes ** 2
    ).reshape(n_classes, n_classes)
    
    # Calculate IoU per class
    ious = np.diag(hist) / (hist.sum(axis=1) + hist.sum(axis=0) - np.diag(hist) + 1e-10)
    
    bg_iou = ious[0] if len(ious) > 0 else 0.0
    fake_iou = ious[1] if len(ious) > 1 else 0.0
    mean_iou = np.nanmean(ious)
    
    # Calculate pixel accuracy
    pixel_accuracy = (labels == preds).mean()
    
    return fake_iou, bg_iou, mean_iou, pixel_accuracy

print("‚úÖ Helper function tanƒ±mlandƒ±: calculate_per_image_iou()")

‚úÖ Helper function tanƒ±mlandƒ±: calculate_per_image_iou()


In [5]:
# ==============================================================================
# GELI≈ûMI≈û TEST FONKSIYONU (CSV & Vƒ∞Z√úALƒ∞ZASYON DESTEKLƒ∞)
# ==============================================================================

class StreamSegMetrics:
    def __init__(self, n_classes):
        self.n_classes = n_classes
        self.confusion_matrix = np.zeros((n_classes, n_classes))
        self.total_samples = 0

    def update(self, label_trues, label_preds):
        if torch.is_tensor(label_trues):
            label_trues = label_trues.cpu().numpy()
        if torch.is_tensor(label_preds):
            label_preds = label_preds.cpu().numpy()
            
        mask = (label_trues >= 0) & (label_trues < self.n_classes)
        label_trues = label_trues[mask].astype(np.int32)
        label_preds = label_preds[mask].astype(np.int32)
        
        self.confusion_matrix += np.bincount(
            self.n_classes * label_trues + label_preds,
            minlength=self.n_classes ** 2
        ).reshape(self.n_classes, self.n_classes)
        
        self.total_samples += 1

    def get_results(self):
        hist = self.confusion_matrix
        iu = np.diag(hist) / (hist.sum(axis=1) + hist.sum(axis=0) - np.diag(hist) + 1e-10)
        mean_iou = np.nanmean(iu)
        acc = np.diag(hist).sum() / (hist.sum() + 1e-10)
        
        return {
            "mean_iou": mean_iou,
            "fake_iou": iu[1] if len(iu) > 1 else 0.0,
            "bg_iou": iu[0] if len(iu) > 0 else 0.0,
            "accuracy": acc
        }

def run_safe_test():
    print("\n" + "="*70)
    print("üöÄ GELI≈ûMI≈û TEST (CSV & Vƒ∞Z√úALƒ∞ZASYON ƒ∞LE)")
    print("="*70)

    if 'df_test' not in globals() or len(df_test) == 0:
        print("‚ùå HATA: df_test bulunamadƒ±! L√ºtfen veri hazƒ±rlama h√ºcresini √ßalƒ±≈ütƒ±r.")
        return

    print(f"üîÑ Model: {CONFIG['model_path']}")
    try:
        processor = SegformerImageProcessor.from_pretrained(CONFIG['model_path'])
        model = SegformerForSemanticSegmentation.from_pretrained(CONFIG['model_path'])
        model.to(device)
        model.eval()
    except Exception as e:
        print(f"‚ùå Model Hatasƒ±: {e}")
        return

    test_ds = TestDataset(df_test, processor)
    loader = DataLoader(test_ds, batch_size=CONFIG['batch_size'], shuffle=False, num_workers=0)
    
    global_metric = StreamSegMetrics(n_classes=2)
    mag_metrics = defaultdict(lambda: StreamSegMetrics(n_classes=2))
    
    # ‚ú® NEW: Per-image results collection
    per_image_results = []
    current_idx = 0
    
    print(f"üèéÔ∏è  {len(test_ds)} g√∂rsel test ediliyor...")
    
    with torch.no_grad():
        for batch in tqdm(loader, desc="Analiz"):
            pixel_values = batch["pixel_values"].to(device)
            labels = batch["labels"].to(device)
            mags = batch["sem_magnitude"]
            
            outputs = model(pixel_values=pixel_values)
            
            logits = torch.nn.functional.interpolate(
                outputs.logits, size=labels.shape[-2:], mode="bilinear", align_corners=False
            )
            preds = logits.argmax(dim=1)
            
            # ‚ú® NEW: Calculate confidence scores
            probs = torch.nn.functional.softmax(logits, dim=1)  # Shape: [B, 2, H, W]
            
            # Global metrics (existing)
            global_metric.update(labels, preds)
            
            # Per-image metrics (NEW)
            for i, mag in enumerate(mags):
                single_label = labels[i]
                single_pred = preds[i]
                mag_metrics[mag].update(single_label, single_pred)
                
                # ‚ú® Calculate per-image metrics
                label_np = single_label.cpu().numpy()
                pred_np = single_pred.cpu().numpy()
                prob_np = probs[i].cpu().numpy()  # Shape: [2, H, W]
                
                # Filter valid pixels
                valid_mask = (label_np >= 0) & (label_np < 2)
                label_valid = label_np[valid_mask]
                pred_valid = pred_np[valid_mask]
                
                if len(label_valid) > 0:
                    # Calculate IoU
                    fake_iou, bg_iou, mean_iou, pixel_acc = calculate_per_image_iou(
                        label_valid, pred_valid
                    )
                    
                    # Calculate confidence
                    max_probs = prob_np.max(axis=0)  # Max prob per pixel
                    conf_mean = float(max_probs[valid_mask].mean())
                    conf_max = float(max_probs[valid_mask].max())
                    conf_min = float(max_probs[valid_mask].min())
                    
                    # Get metadata
                    row_data = df_test.iloc[current_idx + i]
                    
                    # Store results
                    per_image_results.append({
                        'image_path': row_data['image_path'],
                        'perturbed_img_id': row_data.get('perturbed_img_id', ''),
                        'dataset': row_data.get('dataset', ''),
                        'sem_magnitude': mag,
                        'area_ratio': float(row_data.get('area_ratio', np.nan)),
                        'ssim': float(row_data.get('ssim', np.nan)),
                        'lpips_score': float(row_data.get('lpips_score', np.nan)),
                        'mse': float(row_data.get('mse', np.nan)),
                        'fake_iou': float(fake_iou),
                        'bg_iou': float(bg_iou),
                        'mean_iou': float(mean_iou),
                        'pixel_accuracy': float(pixel_acc),
                        'confidence_mean': conf_mean,
                        'confidence_max': conf_max,
                        'confidence_min': conf_min
                    })
            
            current_idx += len(mags)

    # Print results
    print("\n" + "="*70)
    print("üèÜ GENEL SONU√áLAR")
    print("="*70)
    res = global_metric.get_results()
    print(f"üî• Mean IoU:       {res['mean_iou']:.4f}")
    print(f"ü¶† Fake IoU (1):   {res['fake_iou']:.4f}")
    print(f"üèûÔ∏è Background IoU: {res['bg_iou']:.4f}")
    print(f"üéØ Accuracy:       {res['accuracy']:.4f}")

    print("\n" + "="*70)
    print("üìä ZORLUK SEVƒ∞YESƒ∞NE (MAGNITUDE) G√ñRE ANALƒ∞Z")
    print("="*70)
    print(f"{'MAGNITUDE':<20} | {'mIoU':<10} | {'Fake IoU':<10} | {'Adet':<5}")
    print("-" * 70)
    
    for mag in sorted(mag_metrics.keys()):
        res = mag_metrics[mag].get_results()
        count = mag_metrics[mag].total_samples
        print(f"{mag:<20} | {res['mean_iou']:.4f}     | {res['fake_iou']:.4f}     | {count}")
    print("-" * 70)
    
    # ‚ú® Save results if configured
    if CONFIG.get('save_results', True):
        df_results = pd.DataFrame(per_image_results)
        
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        output_dir = os.path.join(CONFIG.get('output_dir', 'segformer_test_results'), timestamp)
        os.makedirs(output_dir, exist_ok=True)
        
        # Save CSV (function will be defined in next cell)
        save_results_to_csv(df_results, output_dir, global_metric, mag_metrics)
        
        # Generate visualizations
        if CONFIG.get('generate_visualizations', True):
            generate_all_visualizations(df_results, output_dir)
        
        print(f"\n‚úÖ Sonu√ßlar kaydedildi: {output_dir}")
    
    return per_image_results, global_metric, mag_metrics

print("‚úÖ Test fonksiyonu tanƒ±mlandƒ±. √áalƒ±≈ütƒ±rmak i√ßin: results = run_safe_test()")

‚úÖ Test fonksiyonu tanƒ±mlandƒ±. √áalƒ±≈ütƒ±rmak i√ßin: results = run_safe_test()


In [6]:
# ==============================================================================
# CSV EXPORT FUNCTIONS
# ==============================================================================

def save_results_to_csv(df_results, output_dir, global_metric, mag_metrics):
    """
    Save test results to CSV files
    """
    print("\nüìÅ CSV dosyalarƒ± kaydediliyor...")
    
    # 1. Detailed results CSV (per-image)
    detailed_csv_path = os.path.join(output_dir, 'detailed_results.csv')
    df_results.to_csv(detailed_csv_path, index=False, encoding='utf-8')
    print(f"   ‚úÖ Detaylƒ± sonu√ßlar: {detailed_csv_path}")
    print(f"      {len(df_results):,} √∂rnek kaydedildi")
    
    # 2. Summary metrics CSV (aggregate)
    summary_data = []
    
    # Global metrics
    global_res = global_metric.get_results()
    summary_data.append({
        'category': 'Global',
        'subset': 'All',
        'sample_count': global_metric.total_samples,
        'mean_iou': global_res['mean_iou'],
        'fake_iou': global_res['fake_iou'],
        'bg_iou': global_res['bg_iou'],
        'pixel_accuracy': global_res['accuracy']
    })
    
    # Magnitude-based metrics
    for mag_name in sorted(mag_metrics.keys()):
        mag_res = mag_metrics[mag_name].get_results()
        summary_data.append({
            'category': 'Sem_Magnitude',
            'subset': mag_name,
            'sample_count': mag_metrics[mag_name].total_samples,
            'mean_iou': mag_res['mean_iou'],
            'fake_iou': mag_res['fake_iou'],
            'bg_iou': mag_res['bg_iou'],
            'pixel_accuracy': mag_res['accuracy']
        })
    
    # Dataset-based metrics (if available)
    if 'dataset' in df_results.columns:
        for dataset_name in df_results['dataset'].unique():
            if pd.notna(dataset_name):
                dataset_subset = df_results[df_results['dataset'] == dataset_name]
                summary_data.append({
                    'category': 'Dataset',
                    'subset': dataset_name,
                    'sample_count': len(dataset_subset),
                    'mean_iou': dataset_subset['mean_iou'].mean(),
                    'fake_iou': dataset_subset['fake_iou'].mean(),
                    'bg_iou': dataset_subset['bg_iou'].mean(),
                    'pixel_accuracy': dataset_subset['pixel_accuracy'].mean()
                })
    
    df_summary = pd.DataFrame(summary_data)
    summary_csv_path = os.path.join(output_dir, 'summary_metrics.csv')
    df_summary.to_csv(summary_csv_path, index=False, encoding='utf-8')
    print(f"   ‚úÖ √ñzet metrikler: {summary_csv_path}")
    
    # 3. Statistical summary
    stats_data = []
    for column in ['fake_iou', 'bg_iou', 'mean_iou', 'pixel_accuracy', 
                   'confidence_mean', 'confidence_max', 'confidence_min']:
        if column in df_results.columns:
            stats_data.append({
                'metric': column,
                'mean': df_results[column].mean(),
                'std': df_results[column].std(),
                'min': df_results[column].min(),
                'q25': df_results[column].quantile(0.25),
                'median': df_results[column].median(),
                'q75': df_results[column].quantile(0.75),
                'max': df_results[column].max()
            })
    
    df_stats = pd.DataFrame(stats_data)
    stats_csv_path = os.path.join(output_dir, 'statistical_summary.csv')
    df_stats.to_csv(stats_csv_path, index=False, encoding='utf-8')
    print(f"   ‚úÖ ƒ∞statistiksel √∂zet: {stats_csv_path}")
    
    print(f"\n‚úÖ Toplam {len(df_results):,} √∂rnek i√ßin 3 CSV dosyasƒ± olu≈üturuldu")

print("‚úÖ CSV export fonksiyonlarƒ± hazƒ±r")

‚úÖ CSV export fonksiyonlarƒ± hazƒ±r


In [7]:
# ==============================================================================
# VISUALIZATION FUNCTIONS - PART 1/2
# ==============================================================================

def generate_all_visualizations(df_results, output_dir):
    """Generate all visualization plots"""
    import warnings
    warnings.filterwarnings('ignore')
    
    plt.style.use('seaborn-v0_8-darkgrid')
    sns.set_palette("husl")
    
    viz_dir = os.path.join(output_dir, 'visualizations')
    os.makedirs(viz_dir, exist_ok=True)
    
    print("\nüé® G√∂rselle≈ütirmeler olu≈üturuluyor...")
    
    print("   üìä 1/5: Metrik daƒüƒ±lƒ±mlarƒ±...")
    plot_metric_distributions(df_results, viz_dir)
    
    print("   üìä 2/5: Magnitude bazlƒ± performans...")
    plot_magnitude_performance(df_results, viz_dir)
    
    print("   üìä 3/5: Dataset kar≈üƒ±la≈ütƒ±rmasƒ±...")
    plot_dataset_comparison(df_results, viz_dir)
    
    print("   üìä 4/5: Korelasyon grafikleri...")
    plot_correlation_scatters(df_results, viz_dir)
    
    print("   üìä 5/5: Kapsamlƒ± dashboard...")
    plot_comprehensive_dashboard(df_results, viz_dir)
    
    print(f"\n‚úÖ 5 g√∂rselle≈ütirme kaydedildi: {viz_dir}/")

def plot_metric_distributions(df, viz_dir):
    """2x3 dashboard of metric distributions"""
    fig, axes = plt.subplots(2, 3, figsize=(18, 12))
    fig.suptitle('Metric Distributions & Box Plots by Sem_Magnitude', 
                 fontsize=16, fontweight='bold', y=0.995)
    
    metrics = [
        ('fake_iou', 'Fake IoU', '#e74c3c'),
        ('pixel_accuracy', 'Pixel Accuracy', '#2ecc71'),
        ('confidence_mean', 'Mean Confidence', '#3498db')
    ]
    
    for idx, (col, title, color) in enumerate(metrics):
        # Top row: Histograms
        ax_hist = axes[0, idx]
        ax_hist.hist(df[col].dropna(), bins=40, color=color, 
                     alpha=0.7, edgecolor='black', linewidth=1.2)
        ax_hist.axvline(df[col].mean(), color='red', linestyle='--', 
                       linewidth=2, label=f'Mean: {df[col].mean():.3f}')
        ax_hist.axvline(df[col].median(), color='blue', linestyle='--', 
                       linewidth=2, label=f'Median: {df[col].median():.3f}')
        ax_hist.set_xlabel(title, fontsize=12, fontweight='bold')
        ax_hist.set_ylabel('Frequency', fontsize=11, fontweight='bold')
        ax_hist.set_title(f'{title} Distribution', fontsize=13, fontweight='bold')
        ax_hist.legend(fontsize=9)
        ax_hist.grid(True, alpha=0.3)
        
        # Bottom row: Box plots
        ax_box = axes[1, idx]
        if 'sem_magnitude' in df.columns:
            mag_order = ['small', 'medium', 'large']
            available_mags = [m for m in mag_order if m in df['sem_magnitude'].unique()]
            
            box_data = [df[df['sem_magnitude'] == mag][col].dropna() 
                       for mag in available_mags]
            
            bp = ax_box.boxplot(box_data, labels=available_mags, 
                               patch_artist=True, showmeans=True,
                               meanprops=dict(marker='D', markerfacecolor='red', 
                                            markersize=8))
            
            for patch in bp['boxes']:
                patch.set_facecolor(color)
                patch.set_alpha(0.6)
            
            ax_box.set_xlabel('Sem_Magnitude', fontsize=11, fontweight='bold')
            ax_box.set_ylabel(title, fontsize=11, fontweight='bold')
            ax_box.set_title(f'{title} by Magnitude', fontsize=13, fontweight='bold')
            ax_box.grid(True, alpha=0.3, axis='y')
    
    plt.tight_layout()
    plt.savefig(os.path.join(viz_dir, '01_metric_distributions.png'), dpi=300, bbox_inches='tight')
    plt.close()

def plot_magnitude_performance(df, viz_dir):
    """Bar charts with error bars for performance by magnitude"""
    if 'sem_magnitude' not in df.columns:
        print("      ‚ö†Ô∏è sem_magnitude s√ºtunu yok, atlanƒ±yor...")
        return
    
    fig, axes = plt.subplots(2, 2, figsize=(16, 12))
    fig.suptitle('Performance Metrics by Sem_Magnitude', 
                 fontsize=16, fontweight='bold')
    
    metrics = [
        ('mean_iou', 'Mean IoU', '#9b59b6'),
        ('fake_iou', 'Fake IoU', '#e74c3c'),
        ('pixel_accuracy', 'Pixel Accuracy', '#2ecc71'),
        ('confidence_mean', 'Mean Confidence', '#3498db')
    ]
    
    mag_order = ['small', 'medium', 'large']
    
    for idx, (col, title, color) in enumerate(metrics):
        ax = axes[idx // 2, idx % 2]
        
        means = []
        stds = []
        counts = []
        
        for mag in mag_order:
            mag_data = df[df['sem_magnitude'] == mag][col].dropna()
            if len(mag_data) > 0:
                means.append(mag_data.mean())
                stds.append(mag_data.std())
                counts.append(len(mag_data))
            else:
                means.append(0)
                stds.append(0)
                counts.append(0)
        
        x = np.arange(len(mag_order))
        bars = ax.bar(x, means, yerr=stds, capsize=10, 
                      color=color, alpha=0.7, edgecolor='black', linewidth=1.5)
        
        for i, (bar, mean, count) in enumerate(zip(bars, means, counts)):
            height = bar.get_height()
            ax.text(bar.get_x() + bar.get_width()/2., height,
                   f'{mean:.3f}\n(n={count:,})',
                   ha='center', va='bottom', fontsize=10, fontweight='bold')
        
        ax.set_xticks(x)
        ax.set_xticklabels(mag_order, fontsize=11)
        ax.set_xlabel('Sem_Magnitude', fontsize=12, fontweight='bold')
        ax.set_ylabel(title, fontsize=12, fontweight='bold')
        ax.set_title(f'{title} by Magnitude', fontsize=13, fontweight='bold')
        ax.grid(True, alpha=0.3, axis='y')
        ax.set_ylim([0, min(1.1, max(means) * 1.2) if max(means) > 0 else 1.1])
    
    plt.tight_layout()
    plt.savefig(os.path.join(viz_dir, '02_magnitude_performance.png'), dpi=300, bbox_inches='tight')
    plt.close()

print("‚úÖ Visualization fonksiyonlarƒ± (1/2) hazƒ±r")

‚úÖ Visualization fonksiyonlarƒ± (1/2) hazƒ±r


In [8]:
# ==============================================================================
# VISUALIZATION FUNCTIONS - PART 2/2
# ==============================================================================

def plot_dataset_comparison(df, viz_dir):
    """Grouped bar chart comparing performance across datasets"""
    if 'dataset' not in df.columns or df['dataset'].isna().all():
        print("      ‚ö†Ô∏è dataset s√ºtunu yok veya bo≈ü, atlanƒ±yor...")
        return
    
    fig, ax = plt.subplots(figsize=(16, 8))
    
    metrics = ['fake_iou', 'mean_iou', 'pixel_accuracy']
    metric_labels = ['Fake IoU', 'Mean IoU', 'Pixel Accuracy']
    
    datasets = df['dataset'].dropna().unique()
    datasets = sorted([d for d in datasets if d])[:10]
    
    if len(datasets) == 0:
        print("      ‚ö†Ô∏è Ge√ßerli dataset yok, atlanƒ±yor...")
        return
    
    data_matrix = []
    for dataset in datasets:
        dataset_data = df[df['dataset'] == dataset]
        row = [dataset_data[col].mean() for col in metrics]
        data_matrix.append(row)
    
    x = np.arange(len(datasets))
    width = 0.25
    
    colors = ['#e74c3c', '#9b59b6', '#2ecc71']
    
    for i, (metric, label, color) in enumerate(zip(metrics, metric_labels, colors)):
        values = [row[i] for row in data_matrix]
        offset = width * (i - 1)
        bars = ax.bar(x + offset, values, width, label=label, 
                     color=color, alpha=0.8, edgecolor='black', linewidth=1)
        
        for bar, val in zip(bars, values):
            height = bar.get_height()
            if height > 0.05:
                ax.text(bar.get_x() + bar.get_width()/2., height,
                       f'{val:.2f}',
                       ha='center', va='bottom', fontsize=8)
    
    ax.set_xlabel('Dataset', fontsize=13, fontweight='bold')
    ax.set_ylabel('Performance Score', fontsize=13, fontweight='bold')
    ax.set_title('Performance Comparison Across Datasets', 
                fontsize=15, fontweight='bold', pad=20)
    ax.set_xticks(x)
    ax.set_xticklabels(datasets, rotation=45, ha='right', fontsize=10)
    ax.legend(fontsize=11, loc='upper right')
    ax.grid(True, alpha=0.3, axis='y')
    ax.set_ylim([0, 1.1])
    
    plt.tight_layout()
    plt.savefig(os.path.join(viz_dir, '03_dataset_comparison.png'), dpi=300, bbox_inches='tight')
    plt.close()

def plot_correlation_scatters(df, viz_dir):
    """2x2 grid of scatter plots showing correlations"""
    fig, axes = plt.subplots(2, 2, figsize=(16, 14))
    fig.suptitle('Correlation Analysis: Scatter Plots', 
                 fontsize=16, fontweight='bold')
    
    plots = [
        ('confidence_mean', 'fake_iou', 'Mean Confidence vs Fake IoU'),
        ('confidence_mean', 'pixel_accuracy', 'Mean Confidence vs Pixel Accuracy'),
        ('area_ratio', 'fake_iou', 'Area Ratio vs Fake IoU'),
        ('ssim', 'fake_iou', 'SSIM vs Fake IoU')
    ]
    
    for idx, (x_col, y_col, title) in enumerate(plots):
        ax = axes[idx // 2, idx % 2]
        
        if x_col not in df.columns or y_col not in df.columns:
            ax.text(0.5, 0.5, f'Data not available\n({x_col}, {y_col})',
                   ha='center', va='center', fontsize=12)
            ax.set_title(title, fontsize=13, fontweight='bold')
            continue
        
        valid_data = df[[x_col, y_col]].dropna()
        
        if len(valid_data) == 0:
            ax.text(0.5, 0.5, 'No valid data',
                   ha='center', va='center', fontsize=12)
            ax.set_title(title, fontsize=13, fontweight='bold')
            continue
        
        x = valid_data[x_col]
        y = valid_data[y_col]
        
        if 'sem_magnitude' in df.columns:
            mag_colors = {'small': '#3498db', 'medium': '#f39c12', 'large': '#e74c3c'}
            
            for mag, color in mag_colors.items():
                mag_mask = df.loc[valid_data.index, 'sem_magnitude'] == mag
                if mag_mask.sum() > 0:
                    ax.scatter(x[mag_mask], y[mag_mask], 
                             c=color, label=mag, alpha=0.6, s=30, edgecolors='black', linewidth=0.5)
            ax.legend(fontsize=9, title='Sem_Magnitude')
        else:
            ax.scatter(x, y, alpha=0.6, s=30, c='#3498db', edgecolors='black', linewidth=0.5)
        
        correlation = x.corr(y)
        
        if len(x) > 1:
            z = np.polyfit(x, y, 1)
            p = np.poly1d(z)
            x_line = np.linspace(x.min(), x.max(), 100)
            ax.plot(x_line, p(x_line), "r--", linewidth=2, alpha=0.8)
        
        ax.set_xlabel(x_col.replace('_', ' ').title(), fontsize=11, fontweight='bold')
        ax.set_ylabel(y_col.replace('_', ' ').title(), fontsize=11, fontweight='bold')
        ax.set_title(title, fontsize=13, fontweight='bold')
        ax.grid(True, alpha=0.3)
        
        textstr = f'Pearson r = {correlation:.3f}\nN = {len(x):,}'
        props = dict(boxstyle='round', facecolor='wheat', alpha=0.8)
        ax.text(0.05, 0.95, textstr, transform=ax.transAxes, fontsize=10,
               verticalalignment='top', bbox=props)
    
    plt.tight_layout()
    plt.savefig(os.path.join(viz_dir, '04_correlation_plots.png'), dpi=300, bbox_inches='tight')
    plt.close()

def plot_comprehensive_dashboard(df, viz_dir):
    """Professional comprehensive dashboard with statistics"""
    fig = plt.figure(figsize=(20, 14))
    gs = fig.add_gridspec(3, 3, hspace=0.35, wspace=0.3)
    fig.suptitle('SegFormer Test Results - Comprehensive Dashboard', 
                 fontsize=18, fontweight='bold', y=0.98)
    
    # Statistics text
    ax1 = fig.add_subplot(gs[0, :])
    ax1.axis('off')
    
    stats_text = f"""
    ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
                            OVERALL STATISTICS                                  
    ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
    
    Total Images: {len(df):,}
    
    ‚îå‚îÄ Performance Metrics ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îê
    ‚îÇ  Mean IoU:       {df['mean_iou'].mean():.4f} ¬± {df['mean_iou'].std():.4f}
    ‚îÇ  Fake IoU:       {df['fake_iou'].mean():.4f} ¬± {df['fake_iou'].std():.4f}
    ‚îÇ  Pixel Accuracy: {df['pixel_accuracy'].mean():.4f} ¬± {df['pixel_accuracy'].std():.4f}
    ‚îî‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îò
    
    ‚îå‚îÄ Confidence Scores ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îê
    ‚îÇ  Mean Confidence:  {df['confidence_mean'].mean():.4f} ¬± {df['confidence_mean'].std():.4f}
    ‚îÇ  Max Confidence:   {df['confidence_max'].mean():.4f}
    ‚îÇ  Min Confidence:   {df['confidence_min'].mean():.4f}
    ‚îî‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îò
    """
    
    ax1.text(0.05, 0.5, stats_text, fontsize=10, family='monospace',
            verticalalignment='center', bbox=dict(boxstyle='round', 
            facecolor='lightblue', alpha=0.3))
    
    # Heatmap
    ax2 = fig.add_subplot(gs[1, :2])
    if 'sem_magnitude' in df.columns:
        mag_order = ['small', 'medium', 'large']
        metrics = ['fake_iou', 'mean_iou', 'pixel_accuracy']
        
        heatmap_data = []
        for mag in mag_order:
            mag_data = df[df['sem_magnitude'] == mag]
            if len(mag_data) > 0:
                row = [mag_data[col].mean() for col in metrics]
                heatmap_data.append(row)
        
        if heatmap_data:
            heatmap_array = np.array(heatmap_data)
            sns.heatmap(heatmap_array, annot=True, fmt='.3f', cmap='RdYlGn',
                       xticklabels=['Fake IoU', 'Mean IoU', 'Pixel Acc'],
                       yticklabels=mag_order, ax=ax2, cbar_kws={'label': 'Score'},
                       linewidths=1, linecolor='white', vmin=0, vmax=1)
            ax2.set_title('Performance Heatmap by Sem_Magnitude', 
                         fontsize=13, fontweight='bold', pad=15)
    
    # Top performers
    ax3 = fig.add_subplot(gs[1, 2])
    top_10 = df.nlargest(10, 'mean_iou')[['sem_magnitude', 'mean_iou']]
    ax3.barh(range(10), top_10['mean_iou'], color='#2ecc71', edgecolor='black')
    ax3.set_yticks(range(10))
    ax3.set_yticklabels([row['sem_magnitude'][:3] for _, row in top_10.iterrows()], fontsize=8)
    ax3.set_xlabel('Mean IoU', fontsize=10, fontweight='bold')
    ax3.set_title('Top 10 Best Performers', fontsize=11, fontweight='bold')
    ax3.grid(True, alpha=0.3, axis='x')
    ax3.invert_yaxis()
    
    # Violin plot
    ax4 = fig.add_subplot(gs[2, 0])
    if 'sem_magnitude' in df.columns:
        mag_order = ['small', 'medium', 'large']
        violin_data = [df[df['sem_magnitude'] == mag]['fake_iou'].dropna() 
                      for mag in mag_order]
        ax4.violinplot(violin_data, positions=range(len(mag_order)),
                      showmeans=True, showmedians=True)
        ax4.set_xticks(range(len(mag_order)))
        ax4.set_xticklabels(mag_order)
        ax4.set_ylabel('Fake IoU', fontsize=10, fontweight='bold')
        ax4.set_title('Fake IoU Distribution', fontsize=11, fontweight='bold')
        ax4.grid(True, alpha=0.3, axis='y')
    
    # Confidence histogram
    ax5 = fig.add_subplot(gs[2, 1])
    ax5.hist(df['confidence_mean'], bins=30, color='#3498db', 
            alpha=0.7, edgecolor='black')
    ax5.axvline(df['confidence_mean'].mean(), color='red', 
               linestyle='--', linewidth=2, label='Mean')
    ax5.set_xlabel('Mean Confidence', fontsize=10, fontweight='bold')
    ax5.set_ylabel('Frequency', fontsize=10, fontweight='bold')
    ax5.set_title('Confidence Distribution', fontsize=11, fontweight='bold')
    ax5.legend()
    ax5.grid(True, alpha=0.3)
    
    # Pie chart
    ax6 = fig.add_subplot(gs[2, 2])
    if 'sem_magnitude' in df.columns:
        mag_counts = df['sem_magnitude'].value_counts()
        colors_pie = ['#3498db', '#f39c12', '#e74c3c']
        ax6.pie(mag_counts.values, labels=mag_counts.index, autopct='%1.1f%%',
               colors=colors_pie, startangle=90, 
               textprops={'fontsize': 10, 'weight': 'bold'})
        ax6.set_title('Sample Distribution', fontsize=11, fontweight='bold')
    
    plt.tight_layout()
    plt.savefig(os.path.join(viz_dir, '05_comprehensive_dashboard.png'), dpi=300, bbox_inches='tight')
    plt.close()

print("‚úÖ Visualization fonksiyonlarƒ± (2/2) hazƒ±r")

‚úÖ Visualization fonksiyonlarƒ± (2/2) hazƒ±r


In [9]:
# ==============================================================================
# TEST √áALI≈ûTIRMA - T√úM FONKSƒ∞YONLAR TANIMLANDIKTAN SONRA
# ==============================================================================

print("\n" + "="*70)
print("üéØ T√úM FONKSƒ∞YONLAR TANIMLANDI - TEST BA≈ûLIYOR")
print("="*70)

# Test fonksiyonunu √ßalƒ±≈ütƒ±r
results = run_safe_test()


üéØ T√úM FONKSƒ∞YONLAR TANIMLANDI - TEST BA≈ûLIYOR

üöÄ GELI≈ûMI≈û TEST (CSV & Vƒ∞Z√úALƒ∞ZASYON ƒ∞LE)
üîÑ Model: ./segformer_b4_stable/final_best_model
üèéÔ∏è  129 g√∂rsel test ediliyor...


Analiz:   0%|          | 0/9 [00:00<?, ?it/s]


üèÜ GENEL SONU√áLAR
üî• Mean IoU:       0.8742
ü¶† Fake IoU (1):   0.7731
üèûÔ∏è Background IoU: 0.9752
üéØ Accuracy:       0.9772

üìä ZORLUK SEVƒ∞YESƒ∞NE (MAGNITUDE) G√ñRE ANALƒ∞Z
MAGNITUDE            | mIoU       | Fake IoU   | Adet 
----------------------------------------------------------------------
large                | 0.8597     | 0.7429     | 43
medium               | 0.8731     | 0.7689     | 43
small                | 0.8844     | 0.7970     | 43
----------------------------------------------------------------------

üìÅ CSV dosyalarƒ± kaydediliyor...
   ‚úÖ Detaylƒ± sonu√ßlar: segformer_test_results\20251229_205934\detailed_results.csv
      129 √∂rnek kaydedildi
   ‚úÖ √ñzet metrikler: segformer_test_results\20251229_205934\summary_metrics.csv
   ‚úÖ ƒ∞statistiksel √∂zet: segformer_test_results\20251229_205934\statistical_summary.csv

‚úÖ Toplam 129 √∂rnek i√ßin 3 CSV dosyasƒ± olu≈üturuldu

üé® G√∂rselle≈ütirmeler olu≈üturuluyor...
   üìä 1/5: Metrik daƒüƒ±lƒ±m