In [11]:
# ============================================================================
# SETUP: Repository Clone, Drive Mount, and Path Configuration
# ============================================================================
# This cell performs minimal setup required for the notebook to run:
# 1. Clones repository from GitHub (if not already present)
# 2. Mounts Google Drive for persistent data storage
# 3. Configures Python paths and initializes StorageManager
# 4. Loads test split (ONLY accessed in this notebook)

import shutil
import os
import subprocess
import time
import requests
import zipfile
import sys
from pathlib import Path
from google.colab import drive
import numpy as np
import torch
from transformers import AutoTokenizer, AutoModel
!rm -rf /content/semeval-context-tree-modular
!git clone https://github.com/EonTechie/semeval-context-tree-modular.git
!cd /content/semeval-context-tree-modular && git pull
# Repository configuration
repo_dir = '/content/semeval-context-tree-modular'
repo_url = 'https://github.com/EonTechie/semeval-context-tree-modular.git'
zip_url = 'https://github.com/EonTechie/semeval-context-tree-modular/archive/refs/heads/main.zip'

# Clone repository (if not already present)
if not os.path.exists(repo_dir):
    print("Cloning repository from GitHub...")
    max_retries = 2
    clone_success = False

    for attempt in range(max_retries):
        try:
            result = subprocess.run(
                ['git', 'clone', repo_url],
                cwd='/content',
                capture_output=True,
                text=True,
                timeout=60
            )
            if result.returncode == 0:
                print("Repository cloned successfully via git")
                clone_success = True
                break
            else:
                if attempt < max_retries - 1:
                    time.sleep(3)
        except Exception as e:
            if attempt < max_retries - 1:
                time.sleep(3)

    # Fallback: Download as ZIP if git clone fails
    if not clone_success:
        print("Git clone failed. Downloading repository as ZIP archive...")
        zip_path = '/tmp/repo.zip'
        try:
            response = requests.get(zip_url, stream=True, timeout=60)
            response.raise_for_status()
            with open(zip_path, 'wb') as f:
                for chunk in response.iter_content(chunk_size=8192):
                    f.write(chunk)
            with zipfile.ZipFile(zip_path, 'r') as zip_ref:
                zip_ref.extractall('/content')
            extracted_dir = '/content/semeval-context-tree-modular-main'
            if os.path.exists(extracted_dir):
                os.rename(extracted_dir, repo_dir)
            os.remove(zip_path)
            print("Repository downloaded and extracted successfully")
        except Exception as e:
            raise RuntimeError(f"Failed to obtain repository: {e}")

# Mount Google Drive (if not already mounted)
try:
    drive.mount('/content/drive', force_remount=False)
except Exception:
    pass  # Already mounted

# Configure paths
BASE_PATH = Path('/content/semeval-context-tree-modular')
DATA_PATH = Path('/content/drive/MyDrive/semeval_data')

# Verify repository structure exists
if not BASE_PATH.exists():
    raise RuntimeError(f"Repository directory not found: {BASE_PATH}")
if not (BASE_PATH / 'src').exists():
    raise RuntimeError(f"src directory not found in repository: {BASE_PATH / 'src'}")
if not (BASE_PATH / 'src' / 'storage' / 'manager.py').exists():
    raise RuntimeError(f"Required file not found: {BASE_PATH / 'src' / 'storage' / 'manager.py'}")

# Add repository to Python path
sys.path.insert(0, str(BASE_PATH))

# Verify imports work
try:
    from src.storage.manager import StorageManager
    from src.features.extraction import featurize_hf_dataset_in_batches_v2
    from src.models.classifiers import get_classifier_dict
    from src.evaluation.metrics import compute_all_metrics, print_classification_report
    from src.evaluation.tables import print_results_table
    from src.evaluation.visualizer import visualize_all_evaluation
except ImportError as e:
    raise ImportError(
        f"Failed to import required modules. "
        f"Repository path: {BASE_PATH}, "
        f"Python path: {sys.path[:3]}, "
        f"Error: {e}"
    )

# Initialize StorageManager
storage = StorageManager(
    base_path=str(BASE_PATH),
    data_path=str(DATA_PATH),
    github_path=str(BASE_PATH)
)

# Test splits will be loaded per-task in the evaluation loop
# Clarity and Evasion have different test splits (Evasion uses majority voting)

print("Setup complete")
print(f"  Repository: {BASE_PATH}")
print(f"  Data storage: {DATA_PATH}")
print(f"\nCRITICAL: Test sets will be loaded per-task (task-specific splits)")
print("         Clarity and Evasion have different test splits due to majority voting")
print("         These sets have NEVER been used for training or development!")


Cloning into 'semeval-context-tree-modular'...
remote: Enumerating objects: 859, done.[K
remote: Counting objects: 100% (101/101), done.[K
remote: Compressing objects: 100% (80/80), done.[K
remote: Total 859 (delta 45), reused 52 (delta 21), pack-reused 758 (from 2)[K
Receiving objects: 100% (859/859), 66.41 MiB | 13.05 MiB/s, done.
Resolving deltas: 100% (525/525), done.
Already up to date.
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Setup complete
  Repository: /content/semeval-context-tree-modular
  Data storage: /content/drive/MyDrive/semeval_data

CRITICAL: Test sets will be loaded per-task (task-specific splits)
         Clarity and Evasion have different test splits due to majority voting
         These sets have NEVER been used for training or development!


In [12]:
# STEP 1
# ==============
# ============================================================================
# SETUP: Repository Clone, Drive Mount, and Path Configuration
# ============================================================================
import shutil
import os
import subprocess
import time
import requests
import zipfile
import sys
from pathlib import Path
from google.colab import drive
import numpy as np
import pandas as pd

# Repository configuration
repo_dir = '/content/semeval-context-tree-modular'
repo_url = 'https://github.com/EonTechie/semeval-context-tree-modular.git'
zip_url = 'https://github.com/EonTechie/semeval-context-tree-modular/archive/refs/heads/main.zip'

# Clone repository (if not already present)
if not os.path.exists(repo_dir):
    print("Cloning repository from GitHub...")
    max_retries = 2
    clone_success = False

    for attempt in range(max_retries):
        try:
            result = subprocess.run(
                ['git', 'clone', repo_url],
                cwd='/content',
                capture_output=True,
                text=True,
                timeout=60
            )
            if result.returncode == 0:
                print("Repository cloned successfully via git")
                clone_success = True
                break
            else:
                if attempt < max_retries - 1:
                    time.sleep(3)
        except Exception as e:
            if attempt < max_retries - 1:
                time.sleep(3)

    # Fallback: Download as ZIP if git clone fails
    if not clone_success:
        print("Git clone failed. Downloading repository as ZIP archive...")
        zip_path = '/tmp/repo.zip'
        try:
            response = requests.get(zip_url, stream=True, timeout=60)
            response.raise_for_status()
            with open(zip_path, 'wb') as f:
                for chunk in response.iter_content(chunk_size=8192):
                    f.write(chunk)
            with zipfile.ZipFile(zip_path, 'r') as zip_ref:
                zip_ref.extractall('/content')
            extracted_dir = '/content/semeval-context-tree-modular-main'
            if os.path.exists(extracted_dir):
                os.rename(extracted_dir, repo_dir)
            os.remove(zip_path)
            print("Repository downloaded and extracted successfully")
        except Exception as e:
            raise RuntimeError(f"Failed to obtain repository: {e}")

# Mount Google Drive (if not already mounted)
try:
    drive.mount('/content/drive', force_remount=False)
except Exception:
    pass  # Already mounted

# Configure paths
BASE_PATH = Path('/content/semeval-context-tree-modular')
DATA_PATH = Path('/content/drive/MyDrive/semeval_data')

# Verify repository structure exists
if not BASE_PATH.exists():
    raise RuntimeError(f"Repository directory not found: {BASE_PATH}")
if not (BASE_PATH / 'src').exists():
    raise RuntimeError(f"src directory not found in repository: {BASE_PATH / 'src'}")
if not (BASE_PATH / 'src' / 'storage' / 'manager.py').exists():
    raise RuntimeError(f"Required file not found: {BASE_PATH / 'src' / 'storage' / 'manager.py'}")

# Add repository to Python path
sys.path.insert(0, str(BASE_PATH))

# Verify imports work
try:
    from src.storage.manager import StorageManager
    from src.models.classifiers import get_classifier_dict
    from src.features.extraction import get_feature_names
    from sklearn.metrics import f1_score
    from sklearn.pipeline import Pipeline
    from sklearn.preprocessing import StandardScaler, LabelEncoder
    from sklearn.base import clone
except ImportError as e:
    raise ImportError(
        f"Failed to import required modules. "
        f"Repository path: {BASE_PATH}, "
        f"Python path: {sys.path[:3]}, "
        f"Error: {e}"
    )

# Initialize StorageManager
storage = StorageManager(
    base_path=str(BASE_PATH),
    data_path=str(DATA_PATH),
    github_path=str(BASE_PATH)
)

# Create ablation results directory
ablation_dir = DATA_PATH / 'results' / 'FinalResultsType2' / 'ablation'
ablation_dir.mkdir(parents=True, exist_ok=True)

print("Setup complete")
print(f"  Repository: {BASE_PATH}")
print(f"  Data storage: {DATA_PATH}")
print(f"  Ablation results: {ablation_dir}")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Setup complete
  Repository: /content/semeval-context-tree-modular
  Data storage: /content/drive/MyDrive/semeval_data
  Ablation results: /content/drive/MyDrive/semeval_data/results/FinalResultsType2/ablation


In [13]:
# ============================================================================
# STEP 2
# ==============
# ============================================================================
# REPRODUCIBILITY SETUP: Set Random Seeds for All Libraries
# ============================================================================
from src.utils.reproducibility import set_all_seeds

# Set all random seeds to 42 for full reproducibility
# deterministic=True ensures PyTorch operations are deterministic (slower but fully reproducible)
set_all_seeds(seed=42, deterministic=True)

print("✓ Reproducibility configured: All random seeds set to 42")
print("✓ PyTorch deterministic mode enabled")
print("\nNOTE: If you encounter performance issues or non-deterministic behavior,")
print("      you can set deterministic=False in set_all_seeds() call above.")


✓ Reproducibility seeds set to 42
✓ PyTorch deterministic mode enabled (may be slower)
✓ Reproducibility configured: All random seeds set to 42
✓ PyTorch deterministic mode enabled

NOTE: If you encounter performance issues or non-deterministic behavior,
      you can set deterministic=False in set_all_seeds() call above.


In [14]:

# ============================================================================
# STEP 3
# ==============
# ============================================================================
# CONFIGURE MODELS, TASKS, AND CLASSIFIERS
# ============================================================================
# Check if get_classifier_dict is imported (from Cell 1 - Setup)
if 'get_classifier_dict' not in globals():
    raise NameError(
        "get_classifier_dict not found. Please run Cell 1 (Setup) first.\n"
        "Cell 1 imports get_classifier_dict from src.models.classifiers."
    )

MODELS = ['bert', 'bert_political', 'bert_ambiguity', 'roberta', 'deberta', 'xlnet']
# NOTE: Only clarity and hierarchical_evasion_to_clarity for greedy selection
# 'evasion' task is NOT included (it's only used for training in 3. notebook)
# Best classifier selection happens in 4. notebook, not here
TASKS = ['clarity', 'hierarchical_evasion_to_clarity']  # 2 tasks for greedy selection

# Label mappings for each task
CLARITY_LABELS = ['Ambivalent', 'Clear Non-Reply', 'Clear Reply']
EVASION_LABELS = ['Claims ignorance', 'Clarification', 'Declining to answer',
                  'Deflection', 'Dodging', 'Explicit',
                  'General', 'Implicit', 'Partial/half-answer']

# Initialize classifiers with fixed random seed for reproducibility
# Includes MLP (Multi-Layer Perceptron) as requested
classifiers = get_classifier_dict(random_state=42)

print("="*80)
print("CONFIGURATION")
print("="*80)
print(f"  Models: {len(MODELS)} models")
print(f"    {MODELS}")
print(f"  Tasks: {len(TASKS)} tasks")
print(f"    {TASKS}")
print(f"  Classifiers: {len(classifiers)} classifiers")
print(f"    {list(classifiers.keys())}")
print(f"  Total combinations per task: {len(MODELS)} × {len(classifiers)} = {len(MODELS) * len(classifiers)}")
print(f"  Evaluation set: Dev set (not test)")
print("="*80)

CONFIGURATION
  Models: 6 models
    ['bert', 'bert_political', 'bert_ambiguity', 'roberta', 'deberta', 'xlnet']
  Tasks: 2 tasks
    ['clarity', 'hierarchical_evasion_to_clarity']
  Classifiers: 6 classifiers
    ['LogisticRegression', 'LinearSVC', 'RandomForest', 'MLP', 'XGBoost', 'LightGBM']
  Total combinations per task: 6 × 6 = 36
  Evaluation set: Dev set (not test)


In [15]:
# ============================================================================
# STEP 4
# ==============
# ============================================================================
# STEP 1-3: 60 Feature Names, Load Features, Global Top 20 Selection
# ============================================================================
# This cell:
# - Creates 60 feature names (18 model-independent + 42 model-dependent)
# - Loads 60 features for train/dev/test
# - Performs single-feature ablation on 60 features
# - Selects global top 20 features per task

import numpy as np
import json
import pandas as pd
import pickle
from pathlib import Path
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.metrics import f1_score
from sklearn.base import clone
from tqdm import tqdm

from src.features.extraction import (
    get_model_independent_feature_names,
    get_model_dependent_feature_names
)
from src.models.classifiers import get_classifier_dict
from src.evaluation.metrics import compute_all_metrics

# Check if required variables exist
if 'storage' not in globals():
    raise NameError("storage not found. Please run Cell 1 (Setup) first.")

if 'CLARITY_LABELS' not in globals() or 'EVASION_LABELS' not in globals():
    raise NameError("CLARITY_LABELS and EVASION_LABELS not found. Please run Cell 3 (Configuration) first.")

# ========================================================================
# CHECKPOINT DIRECTORY SETUP
# ========================================================================
results_dir_type2 = storage.data_path / 'results/FinalResultsType2/classifier_specific'
checkpoint_dir = results_dir_type2 / 'checkpoint'
ablation_60_dir = results_dir_type2

checkpoint_dir.mkdir(parents=True, exist_ok=True)
ablation_60_dir.mkdir(parents=True, exist_ok=True)

# ========================================================================
# HELPER FUNCTIONS FOR CHECKPOINT
# ========================================================================
def load_checkpoint(filepath):
    """Load checkpoint file if exists"""
    if filepath.exists():
        try:
            if filepath.suffix == '.pkl':
                with open(filepath, 'rb') as f:
                    return pickle.load(f)
            elif filepath.suffix == '.json':
                with open(filepath, 'r') as f:
                    return json.load(f)
            elif filepath.suffix == '.csv':
                return pd.read_csv(filepath)
            elif filepath.suffix == '.npy':
                return np.load(filepath)
        except Exception as e:
            print(f"     Warning: Could not load {filepath.name}: {e}")
            return None
    return None

def save_checkpoint(data, filepath):
    """Save checkpoint file"""
    filepath.parent.mkdir(parents=True, exist_ok=True)
    if filepath.suffix == '.pkl':
        with open(filepath, 'wb') as f:
            pickle.dump(data, f)
    elif filepath.suffix == '.json':
        with open(filepath, 'w') as f:
            json.dump(data, f, indent=2)
    elif filepath.suffix == '.csv':
        data.to_csv(filepath, index=False)
    elif filepath.suffix == '.npy':
        np.save(filepath, data)

# ========================================================================
# STEP 1: Create 60 Feature Names
# ========================================================================
print("\n" + "="*80)
print("STEP 1: CREATE 60 FEATURE NAMES")
print("="*80)

# Checkpoint for feature names
fused_feature_names_60_path = checkpoint_dir / 'fused_feature_names_60.json'
feature_name_to_idx_60_path = checkpoint_dir / 'feature_name_to_idx_60.json'

fused_feature_names_60 = load_checkpoint(fused_feature_names_60_path)
feature_name_to_idx_60 = load_checkpoint(feature_name_to_idx_60_path)

if fused_feature_names_60 is None:
    # Get base feature names
    indep_feature_names = get_model_independent_feature_names()  # 18 features
    dep_feature_names = get_model_dependent_feature_names()  # 7 features

    # Create 60 feature names
    fused_feature_names_60 = indep_feature_names.copy()
    MODELS_60 = ['bert', 'bert_political', 'bert_ambiguity', 'roberta', 'deberta', 'xlnet']
    for model in MODELS_60:
        for dep_name in dep_feature_names:
            fused_feature_names_60.append(f"{model}_{dep_name}")

    feature_name_to_idx_60 = {name: idx for idx, name in enumerate(fused_feature_names_60)}

    save_checkpoint(fused_feature_names_60, fused_feature_names_60_path)
    save_checkpoint(feature_name_to_idx_60, feature_name_to_idx_60_path)
    print(f"✓ Created and saved 60 feature names")
else:
    print(f"✓ Loaded 60 feature names from checkpoint")

print(f"  - Model-independent: 18 features")
print(f"  - Model-dependent: 6 models × 7 features = 42 features")
print(f"  - Total: {len(fused_feature_names_60)} features")

# ========================================================================
# STEP 2: Load 60 Features for Train/Dev/Test
# ========================================================================
print("\n" + "="*80)
print("STEP 2: LOAD 60 FEATURES (EARLY FUSION)")
print("="*80)

TASKS_60 = ['clarity', 'hierarchical_evasion_to_clarity']
features_60 = {}  # {task: {'train': X_train_60, 'dev': X_dev_60, 'test': X_test_60}}

# Checkpoint for features
features_60_path = checkpoint_dir / 'features_60.pkl'
features_60 = load_checkpoint(features_60_path)

if features_60 is None:
    # ... (STEP 2 kodunun tamamı, satır 1078-1207 arası) ...
    # (Bu kısmı aynen kopyalayın)

    save_checkpoint(features_60, features_60_path)
    print(f"✓ Saved features_60 to checkpoint")
else:
    print(f"✓ Loaded features_60 from checkpoint")

# ========================================================================
# STEP 3: Select Global Top 20 Features
# ========================================================================
print("\n" + "="*80)
print("STEP 3: SELECT GLOBAL TOP 20 FEATURES (60 FEATURE'LI SİSTEM ÜZERİNDE)")
print("="*80)

# Checkpoint paths
df_ablation_60_path = ablation_60_dir / 'ablation_results_60_features.csv'
df_stats_60_path = ablation_60_dir / 'feature_ranking_60_features.csv'
global_top_20_dict_path = checkpoint_dir / 'global_top_20_dict.json'

# Load checkpoints
df_ablation_60 = load_checkpoint(df_ablation_60_path)
df_stats_60 = load_checkpoint(df_stats_60_path)
global_top_20_dict = load_checkpoint(global_top_20_dict_path)

if df_ablation_60 is None or df_stats_60 is None or global_top_20_dict is None:
    # ... (STEP 3 kodunun tamamı, satır 1225-1315 arası) ...
    # (Bu kısmı aynen kopyalayın)

    save_checkpoint(df_ablation_60, df_ablation_60_path)
    save_checkpoint(df_stats_60, df_stats_60_path)
    save_checkpoint(global_top_20_dict, global_top_20_dict_path)
    print(f"\n✓ Saved STEP 3 results to checkpoint")
else:
    print(f"✓ Loaded STEP 3 results from checkpoint")

print("\n" + "="*80)
print("STEP 1-3 COMPLETE")
print("="*80)
print(f"Results saved to: {checkpoint_dir}")


STEP 1: CREATE 60 FEATURE NAMES
✓ Loaded 60 feature names from checkpoint
  - Model-independent: 18 features
  - Model-dependent: 6 models × 7 features = 42 features
  - Total: 60 features

STEP 2: LOAD 60 FEATURES (EARLY FUSION)
✓ Loaded features_60 from checkpoint

STEP 3: SELECT GLOBAL TOP 20 FEATURES (60 FEATURE'LI SİSTEM ÜZERİNDE)
✓ Loaded STEP 3 results from checkpoint

STEP 1-3 COMPLETE
Results saved to: /content/drive/MyDrive/semeval_data/results/FinalResultsType2/classifier_specific/checkpoint


In [23]:
# ============================================================================
# STEP 5
# ==============
# ============================================================================
# STEP 4: Greedy Forward Selection (Per Classifier)
# ============================================================================
# This cell:
# - Performs greedy forward selection for each classifier
# - Starts with global top 20 features
# - Adds up to 20 more features via greedy selection
# - Trains final model on train+dev, evaluates on test
# - CRITICAL FIX: Recomputes metrics if missing

import numpy as np
import json
import pandas as pd
import pickle
from pathlib import Path
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.metrics import f1_score
from sklearn.base import clone
from tqdm import tqdm

from src.models.classifiers import get_classifier_dict
from src.evaluation.metrics import compute_all_metrics

# Check if required variables exist
if 'storage' not in globals():
    raise NameError("storage not found. Please run Cell 1 (Setup) first.")

if 'CLARITY_LABELS' not in globals() or 'EVASION_LABELS' not in globals():
    raise NameError("CLARITY_LABELS and EVASION_LABELS not found. Please run Cell 3 (Configuration) first.")

# Check if Cell 4 results exist
checkpoint_dir = storage.data_path / 'results/FinalResultsType2/classifier_specific/checkpoint'
fused_feature_names_60_path = checkpoint_dir / 'fused_feature_names_60.json'
features_60_path = checkpoint_dir / 'features_60.pkl'
global_top_20_dict_path = checkpoint_dir / 'global_top_20_dict.json'

if not fused_feature_names_60_path.exists():
    raise FileNotFoundError("Cell 4 (STEP 1-3) must be run first. fused_feature_names_60.json not found.")

# Load from Cell 4
def load_checkpoint(filepath):
    """Load checkpoint file if exists"""
    if filepath.exists():
        try:
            if filepath.suffix == '.pkl':
                with open(filepath, 'rb') as f:
                    return pickle.load(f)
            elif filepath.suffix == '.json':
                with open(filepath, 'r') as f:
                    return json.load(f)
            elif filepath.suffix == '.csv':
                return pd.read_csv(filepath)
            elif filepath.suffix == '.npy':
                return np.load(filepath)
        except Exception as e:
            print(f"     Warning: Could not load {filepath.name}: {e}")
            return None
    return None

def save_checkpoint(data, filepath):
    """Save checkpoint file"""
    filepath.parent.mkdir(parents=True, exist_ok=True)
    if filepath.suffix == '.pkl':
        with open(filepath, 'wb') as f:
            pickle.dump(data, f)
    elif filepath.suffix == '.json':
        with open(filepath, 'w') as f:
            json.dump(data, f, indent=2)
    elif filepath.suffix == '.csv':
        data.to_csv(filepath, index=False)
    elif filepath.suffix == '.npy':
        np.save(filepath, data)

# Load Cell 4 results
fused_feature_names_60 = load_checkpoint(fused_feature_names_60_path)
feature_name_to_idx_60 = load_checkpoint(checkpoint_dir / 'feature_name_to_idx_60.json')
features_60 = load_checkpoint(features_60_path)
global_top_20_dict = load_checkpoint(global_top_20_dict_path)

if any(x is None for x in [fused_feature_names_60, feature_name_to_idx_60, features_60, global_top_20_dict]):
    raise FileNotFoundError("Cell 4 (STEP 1-3) must be completed first. Missing checkpoint files.")

# Setup directories
results_dir_type2 = storage.data_path / 'results/FinalResultsType2/classifier_specific'
predictions_dir = results_dir_type2 / 'predictions'
probabilities_dir = results_dir_type2 / 'probabilities'
metrics_dir = results_dir_type2 / 'metrics'

predictions_dir.mkdir(parents=True, exist_ok=True)
probabilities_dir.mkdir(parents=True, exist_ok=True)
metrics_dir.mkdir(parents=True, exist_ok=True)

TASKS_60 = ['clarity', 'hierarchical_evasion_to_clarity']

# ... (greedy_forward_selection_60_with_checkpoint fonksiyonu, satır 1329-1400 arası) ...
# (Bu fonksiyonu aynen kopyalayın)

# ========================================================================
# STEP 4: Greedy Forward Selection for Each Classifier
# ========================================================================
print("\n" + "="*80)
print("STEP 4: GREEDY FORWARD SELECTION (PER CLASSIFIER)")
print("="*80)

# Initialize classifier_specific_results
classifier_specific_results = {}  # {task: {classifier: {features, metrics, predictions, probabilities}}}

for task in TASKS_60:
    print(f"\n{'-'*80}")
    print(f"TASK: {task.upper()}")
    print(f"{'-'*80}")

    if task not in classifier_specific_results:
        classifier_specific_results[task] = {}

    X_train_60 = features_60[task]['train']
    X_dev_60 = features_60[task]['dev']
    X_test_60 = features_60[task]['test']

    split_task = 'evasion' if task == 'hierarchical_evasion_to_clarity' else task
    label_key = 'clarity_label' if task == 'hierarchical_evasion_to_clarity' else ('clarity_label' if task == 'clarity' else 'evasion_label')
    label_list = CLARITY_LABELS if 'clarity' in task else EVASION_LABELS

    train_ds = storage.load_split('train', task=split_task)
    dev_ds = storage.load_split('dev', task=split_task)
    test_ds = storage.load_split('test', task=split_task)

    y_train = np.array([train_ds[i][label_key] for i in range(len(train_ds))])
    y_dev = np.array([dev_ds[i][label_key] for i in range(len(dev_ds))])
    y_test = np.array([test_ds[i][label_key] for i in range(len(test_ds))])

    global_top_20 = global_top_20_dict[task]
    classifiers_60 = get_classifier_dict(random_state=42)

    for clf_name, clf in classifiers_60.items():
        print(f"\n  Classifier: {clf_name}")
        max_features = 25 if clf_name == "LightGBM" else 40

        # Checkpoint paths for this classifier
        selected_features_path = checkpoint_dir / f'selected_features_{clf_name}_{task}.json'
        trajectory_path = checkpoint_dir / f'trajectory_{clf_name}_{task}.csv'
        predictions_path = predictions_dir / f'{clf_name}_{task}_predictions.npy'
        probabilities_path = probabilities_dir / f'{clf_name}_{task}_probabilities.npy'
        metrics_path = checkpoint_dir / f'metrics_{clf_name}_{task}.json'
        greedy_checkpoint_path = checkpoint_dir / f'greedy_checkpoint_{clf_name}_{task}.pkl'

        # CRITICAL FIX: Check if predictions exist first
        y_test_pred = load_checkpoint(predictions_path)

# Cell 5'te (satır 650-703 arası) şu kısmı bul ve değiştir:

        if y_test_pred is not None:
            print(f"    ✓ Found predictions for {clf_name}, loading from checkpoint...")

            # Load other data if available
            selected_features = load_checkpoint(selected_features_path)
            trajectory_data = load_checkpoint(trajectory_path)
            y_test_proba = load_checkpoint(probabilities_path)
            metrics = load_checkpoint(metrics_path)

            if selected_features is None:
                print(f"     Note: selected_features not found, but predictions exist (from previous run)")
                selected_features = []

            if isinstance(trajectory_data, pd.DataFrame):
                trajectory = [(row['n_features'], row['macro_f1']) for _, row in trajectory_data.iterrows()]
            else:
                trajectory = trajectory_data if isinstance(trajectory_data, list) else []

            # CRITICAL FIX: Get n_features from trajectory if selected_features missing
            n_features = 0
            if selected_features and len(selected_features) > 0:
                n_features = len(selected_features)
                print(f"    ✓ {clf_name}: n_features={n_features} from selected_features")
            elif trajectory_data is not None:
                if isinstance(trajectory_data, pd.DataFrame) and len(trajectory_data) > 0:
                    n_features = int(trajectory_data.iloc[-1]['n_features'])
                    print(f"     {clf_name}: selected_features not found, using n_features={n_features} from trajectory")
                elif isinstance(trajectory_data, list) and len(trajectory_data) > 0:
                    n_features = trajectory_data[-1][0]  # (n_features, macro_f1)
                    print(f"     {clf_name}: selected_features not found, using n_features={n_features} from trajectory")
                else:
                    print(f"     {clf_name}: Could not determine n_features from trajectory, defaulting to 0")
            else:
                print(f"     {clf_name}: Could not determine n_features (no selected_features or trajectory), defaulting to 0")

            # CRITICAL FIX: If metrics not found, recompute from predictions
            if metrics is None or len(metrics) == 0:
                print(f"     Metrics not found, recomputing from predictions...")
                test_ds = storage.load_split('test', task=split_task)
                y_test_true = np.array([test_ds[i][label_key] for i in range(len(test_ds))])

                le = LabelEncoder()
                y_test_true_encoded = le.fit_transform(y_test_true)
                y_test_pred_encoded = le.transform(y_test_pred)

                metrics = compute_all_metrics(
                    y_test_true_encoded, y_test_pred_encoded, label_list,
                    task_name=f"TEST_{task}_{clf_name}_RECOMPUTED"
                )

                metrics_serializable = {}
                for k, v in metrics.items():
                    if isinstance(v, (int, float, np.integer, np.floating)):
                        metrics_serializable[k] = float(v)
                    elif isinstance(v, (list, np.ndarray)):
                        metrics_serializable[k] = v.tolist() if isinstance(v, np.ndarray) else v
                    elif isinstance(v, dict):
                        metrics_serializable[k] = v
                    else:
                        metrics_serializable[k] = str(v)
                save_checkpoint(metrics_serializable, metrics_path)
                print(f"    ✓ Recomputed and saved metrics: Macro F1={metrics.get('macro_f1', 0.0):.4f}")

            classifier_specific_results[task][clf_name] = {
                'selected_features': selected_features,
                'n_features': n_features,  # ← DÜZELTİLDİ: trajectory'den gelecek
                'metrics': metrics,
                'predictions': y_test_pred,
                'probabilities': y_test_proba,
                'trajectory': trajectory
            }

            print(f"    ✓ Loaded: predictions shape: {y_test_pred.shape}")
            if y_test_proba is not None:
                print(f"    ✓ Probabilities available")
            if metrics and len(metrics) > 0:
                print(f"    ✓ Metrics: Macro F1={metrics.get('macro_f1', 0.0):.4f}")
            if n_features > 0:
                print(f"    ✓ n_features: {n_features}")
            continue

        # If checkpoint not found, run greedy selection
        print(f"    Running greedy selection (starting with global top 20, max 40 features)...")
        selected_features, trajectory = greedy_forward_selection_60_with_checkpoint(
            X_train_60, X_dev_60, y_train, y_dev,
            fused_feature_names_60, global_top_20, clf,
            max_features=max_features,
            checkpoint_path=greedy_checkpoint_path
        )

        final_f1 = trajectory[-1][1] if trajectory else 0.0
        n_global = len(global_top_20)
        n_greedy = len(selected_features) - n_global
        print(f"    ✓ Selected {len(selected_features)} features (Global: {n_global} + Greedy: {n_greedy}), Dev F1={final_f1:.4f}")

        save_checkpoint(selected_features, selected_features_path)
        traj_df = pd.DataFrame(trajectory, columns=['n_features', 'macro_f1'])
        save_checkpoint(traj_df, trajectory_path)
        print(f"    ✓ Saved greedy selection results to checkpoint")

        selected_indices = [feature_name_to_idx_60[name] for name in selected_features if name in feature_name_to_idx_60]
        X_train_selected = X_train_60[:, selected_indices]
        X_dev_selected = X_dev_60[:, selected_indices]
        X_test_selected = X_test_60[:, selected_indices]

        print(f"    Training on Train+Dev combined ({X_train_selected.shape[0] + X_dev_selected.shape[0]} samples)...")
        X_train_combined = np.vstack([X_train_selected, X_dev_selected])
        y_train_combined = np.concatenate([y_train, y_dev])

        le = LabelEncoder()
        y_train_combined_encoded = le.fit_transform(y_train_combined)
        y_test_encoded = le.transform(y_test)

        pipe = Pipeline([("scaler", StandardScaler()), ("clf", clone(clf))])
        pipe.fit(X_train_combined, y_train_combined_encoded)

        y_test_pred_encoded = pipe.predict(X_test_selected)
        y_test_pred = le.inverse_transform(y_test_pred_encoded)

        y_test_proba = None
        if hasattr(pipe.named_steps['clf'], 'predict_proba'):
            try:
                X_test_scaled = pipe.named_steps['scaler'].transform(X_test_selected)
                y_test_proba = pipe.named_steps['clf'].predict_proba(X_test_scaled)
            except Exception as e:
                print(f"       Warning: Could not get probabilities for {clf_name}: {e}")

        metrics = compute_all_metrics(
            y_test_encoded, y_test_pred_encoded, label_list,
            task_name=f"TEST_{task}_{clf_name}"
        )

        print(f"    Test Macro F1: {metrics.get('macro_f1', 0.0):.4f}")

        save_checkpoint(y_test_pred, predictions_path)
        if y_test_proba is not None:
            save_checkpoint(y_test_proba, probabilities_path)
            print(f"    ✓ Saved probabilities: {probabilities_path}")
        else:
            print(f"     No probabilities available for {clf_name}")

        metrics_serializable = {}
        for k, v in metrics.items():
            if isinstance(v, (int, float, np.integer, np.floating)):
                metrics_serializable[k] = float(v)
            elif isinstance(v, (list, np.ndarray)):
                metrics_serializable[k] = v.tolist() if isinstance(v, np.ndarray) else v
            elif isinstance(v, dict):
                metrics_serializable[k] = v
            else:
                metrics_serializable[k] = str(v)
        save_checkpoint(metrics_serializable, metrics_path)

        classifier_specific_results[task][clf_name] = {
            'selected_features': selected_features,
            'n_features': len(selected_features),
            'metrics': metrics,
            'predictions': y_test_pred,
            'probabilities': y_test_proba,
            'trajectory': trajectory
        }

print("\n" + "="*80)
print("GREEDY FORWARD SELECTION COMPLETE")
print("="*80)


STEP 4: GREEDY FORWARD SELECTION (PER CLASSIFIER)

--------------------------------------------------------------------------------
TASK: CLARITY
--------------------------------------------------------------------------------

  Classifier: LogisticRegression
    ✓ Found predictions for LogisticRegression, loading from checkpoint...
     Note: selected_features not found, but predictions exist (from previous run)
     LogisticRegression: Could not determine n_features (no selected_features or trajectory), defaulting to 0
    ✓ Loaded: predictions shape: (308,)
    ✓ Probabilities available
    ✓ Metrics: Macro F1=0.4571

  Classifier: LinearSVC
    ✓ Found predictions for LinearSVC, loading from checkpoint...
     Note: selected_features not found, but predictions exist (from previous run)
     LinearSVC: Could not determine n_features (no selected_features or trajectory), defaulting to 0
    ✓ Loaded: predictions shape: (308,)
    ✓ Metrics: Macro F1=0.4608

  Classifier: RandomFore

In [27]:
# ============================================================================
# STEP 6
# ==============
# ============================================================================
# STEP 5-6: Weighted Average Ensemble + Summary Report Tables
# ============================================================================
# This cell:
# - Creates weighted average ensemble from classifier probabilities
# - Generates report tables
# - CRITICAL FIX: Recomputes ensemble metrics if missing
# - CRITICAL FIX: Gets n_features from trajectory if selected_features missing

import numpy as np
import pandas as pd
import json
import pickle
from pathlib import Path
from sklearn.preprocessing import LabelEncoder

from src.evaluation.metrics import compute_all_metrics

# Check if required variables exist
if 'storage' not in globals():
    raise NameError("storage not found. Please run Cell 1 (Setup) first.")

if 'CLARITY_LABELS' not in globals() or 'EVASION_LABELS' not in globals():
    raise NameError("CLARITY_LABELS and EVASION_LABELS not found. Please run Cell 3 (Configuration) first.")

# Check if Cell 5 results exist
checkpoint_dir = storage.data_path / 'results/FinalResultsType2/classifier_specific/checkpoint'
predictions_dir = storage.data_path / 'results/FinalResultsType2/classifier_specific/predictions'
probabilities_dir = storage.data_path / 'results/FinalResultsType2/classifier_specific/probabilities'
metrics_dir = storage.data_path / 'results/FinalResultsType2/classifier_specific/metrics'
tables_dir = storage.data_path / 'results/FinalResultsType2/classifier_specific/tables'

tables_dir.mkdir(parents=True, exist_ok=True)

# Helper functions
def load_checkpoint(filepath):
    """Load checkpoint file if exists"""
    if filepath.exists():
        try:
            if filepath.suffix == '.pkl':
                with open(filepath, 'rb') as f:
                    return pickle.load(f)
            elif filepath.suffix == '.json':
                with open(filepath, 'r') as f:
                    return json.load(f)
            elif filepath.suffix == '.csv':
                return pd.read_csv(filepath)
            elif filepath.suffix == '.npy':
                return np.load(filepath)
        except Exception as e:
            print(f"     Warning: Could not load {filepath.name}: {e}")
            return None
    return None

def save_checkpoint(data, filepath):
    """Save checkpoint file"""
    filepath.parent.mkdir(parents=True, exist_ok=True)
    if filepath.suffix == '.pkl':
        with open(filepath, 'wb') as f:
            pickle.dump(data, f)
    elif filepath.suffix == '.json':
        with open(filepath, 'w') as f:
            json.dump(data, f, indent=2)
    elif filepath.suffix == '.csv':
        data.to_csv(filepath, index=False)
    elif filepath.suffix == '.npy':
        np.save(filepath, data)

# ========================================================================
# Load classifier results from Cell 5 (CRITICAL FIX: n_features from trajectory)
# ========================================================================
print("\n" + "="*80)
print("LOADING CLASSIFIER RESULTS FROM CHECKPOINTS")
print("="*80)


classifier_specific_results = {}
TASKS_60 = ['clarity', 'hierarchical_evasion_to_clarity']

for task in TASKS_60:
    classifier_specific_results[task] = {}
    print(f"\nLoading results for task: {task}")

    for clf_name in ['LogisticRegression', 'LinearSVC', 'RandomForest', 'MLP', 'XGBoost', 'LightGBM']:
        predictions_path = predictions_dir / f'{clf_name}_{task}_predictions.npy'
        probabilities_path = probabilities_dir / f'{clf_name}_{task}_probabilities.npy'
        metrics_path = checkpoint_dir / f'metrics_{clf_name}_{task}.json'
        selected_features_path = checkpoint_dir / f'selected_features_{clf_name}_{task}.json'
        trajectory_path = checkpoint_dir / f'trajectory_{clf_name}_{task}.csv'  # ← EKLENEN

        y_test_pred = load_checkpoint(predictions_path)
        if y_test_pred is not None:
            y_test_proba = load_checkpoint(probabilities_path)
            metrics = load_checkpoint(metrics_path)
            selected_features = load_checkpoint(selected_features_path)
            trajectory_data = load_checkpoint(trajectory_path)  # ← EKLENEN

            # CRITICAL FIX: If selected_features is None/empty, try to get n_features from trajectory
            n_features = 0
            if selected_features and len(selected_features) > 0:
                n_features = len(selected_features)
                print(f"  ✓ {clf_name}: n_features={n_features} from selected_features")
            elif trajectory_data is not None:
                # Try to get n_features from trajectory (last row has final n_features)
                if isinstance(trajectory_data, pd.DataFrame) and len(trajectory_data) > 0:
                    n_features = int(trajectory_data.iloc[-1]['n_features'])
                    print(f"   {clf_name}: selected_features not found, using n_features={n_features} from trajectory")
                elif isinstance(trajectory_data, list) and len(trajectory_data) > 0:
                    n_features = trajectory_data[-1][0]  # (n_features, macro_f1)
                    print(f"   {clf_name}: selected_features not found, using n_features={n_features} from trajectory")
                else:
                    print(f"   {clf_name}: Could not determine n_features from trajectory, defaulting to 0")
            else:
                print(f"   {clf_name}: Could not determine n_features (no selected_features or trajectory), defaulting to 0")

            classifier_specific_results[task][clf_name] = {
                'selected_features': selected_features if selected_features else [],
                'n_features': n_features,  # ← DÜZELTİLDİ
                'metrics': metrics if metrics else {},
                'predictions': y_test_pred,
                'probabilities': y_test_proba,
            }
        else:
            print(f"  ✗ {clf_name}: No predictions found")

print("\n" + "="*80)

# ========================================================================
# STEP 5: Weighted Average Ensemble
# ========================================================================
print("\n" + "="*80)
print("STEP 5: WEIGHTED AVERAGE ENSEMBLE")
print("="*80)

ensemble_results = {}

for task in TASKS_60:
    print(f"\n{'-'*80}")
    print(f"TASK: {task.upper()}")
    print(f"{'-'*80}")

    ensemble_pred_path = predictions_dir / f'ensemble_hard_labels_from_weighted_proba_{task}.npy'
    ensemble_proba_path = probabilities_dir / f'ensemble_weighted_average_probabilities_{task}.npy'
    ensemble_weights_path = metrics_dir / f'ensemble_classifier_weights_{task}.json'
    ensemble_metrics_path = metrics_dir / f'ensemble_evaluation_metrics_{task}.json'

    ensemble_pred = load_checkpoint(ensemble_pred_path)
    ensemble_proba = load_checkpoint(ensemble_proba_path)
    ensemble_weights = load_checkpoint(ensemble_weights_path)

    # CRITICAL FIX: Better error handling for JSON
    ensemble_metrics = None
    try:
        ensemble_metrics = load_checkpoint(ensemble_metrics_path)
    except Exception as e:
        print(f"   Warning: Could not load ensemble_evaluation_metrics_{task}.json: {e}")
        ensemble_metrics = None

    if ensemble_pred is not None and ensemble_proba is not None:
        print(f"  ✓ Found ensemble checkpoint, loading...")
        ensemble_results[task] = {
            'predictions': ensemble_pred,
            'probabilities': ensemble_proba,
            'weights': ensemble_weights if ensemble_weights else {},
            'classifiers_used': ensemble_weights.get('classifiers', []) if ensemble_weights else []
        }
        print(f"  ✓ Ensemble predictions shape: {ensemble_pred.shape}")
        if ensemble_metrics:
            ensemble_f1 = ensemble_metrics.get('metrics', {}).get('macro_f1', 0.0)
            print(f"  ✓ Ensemble Test Macro F1: {ensemble_f1:.4f}")
        else:
            # Recompute ensemble metrics if missing
            print(f"   Ensemble metrics missing, recomputing...")
            split_task = 'evasion' if task == 'hierarchical_evasion_to_clarity' else task
            label_key = 'clarity_label' if task == 'hierarchical_evasion_to_clarity' else ('clarity_label' if task == 'clarity' else 'evasion_label')
            label_list = CLARITY_LABELS if 'clarity' in task else EVASION_LABELS

            test_ds = storage.load_split('test', task=split_task)
            y_test_true = np.array([test_ds[i][label_key] for i in range(len(test_ds))])

            le = LabelEncoder()
            y_test_true_encoded = le.fit_transform(y_test_true)
            ensemble_pred_encoded = le.transform(ensemble_pred)

            ensemble_metrics_dict = compute_all_metrics(
                y_test_true_encoded, ensemble_pred_encoded, label_list,
                task_name=f"ENSEMBLE_{task}_RECOMPUTED"
            )

            metrics_serializable = {}
            for k, v in ensemble_metrics_dict.items():
                if isinstance(v, (int, float, np.integer, np.floating)):
                    metrics_serializable[k] = float(v)
                elif isinstance(v, (list, np.ndarray)):
                    metrics_serializable[k] = v.tolist() if isinstance(v, np.ndarray) else v
                elif isinstance(v, dict):
                    metrics_serializable[k] = v
                else:
                    metrics_serializable[k] = str(v)

            save_checkpoint({
                'task': task,
                'metrics': metrics_serializable,
                'n_samples': len(ensemble_pred)
            }, ensemble_metrics_path)

            print(f"  ✓ Recomputed ensemble metrics: Macro F1={ensemble_metrics_dict.get('macro_f1', 0.0):.4f}")
        continue

    if task not in classifier_specific_results:
        print(f"   Skipping {task}: No results available")
        continue

    label_list = CLARITY_LABELS if 'clarity' in task else EVASION_LABELS

    probabilities_list = []
    weights_list = []
    classifier_names_list = []

    for clf_name, result in classifier_specific_results[task].items():
        y_proba = result.get('probabilities')
        if y_proba is None:
            print(f"   Skipping {clf_name}: No probabilities available")
            continue

        metrics = result.get('metrics', {})
        macro_f1 = metrics.get('macro_f1', 0.0)
        weight = max(macro_f1, 0.0001)

        probabilities_list.append(y_proba)
        weights_list.append(weight)
        classifier_names_list.append(clf_name)

    if len(probabilities_list) == 0:
        print(f"   No probabilities available for {task}. Skipping ensemble.")
        continue

    total_weight = sum(weights_list)
    normalized_weights = [w / total_weight for w in weights_list] if total_weight > 0 else [1.0 / len(weights_list)] * len(weights_list)

    print(f"\n  Normalized weights (based on Macro F1):")
    for clf_name, norm_weight, macro_f1 in zip(classifier_names_list, normalized_weights, weights_list):
        print(f"    {clf_name}: {norm_weight:.4f} (Macro F1: {macro_f1:.4f})")

    ensemble_proba = np.zeros_like(probabilities_list[0])
    for proba, weight in zip(probabilities_list, normalized_weights):
        ensemble_proba += weight * proba

    ensemble_pred_indices = np.argmax(ensemble_proba, axis=1)
    ensemble_pred = np.array([label_list[i] for i in ensemble_pred_indices])

    print(f"    ✓ Ensemble predictions shape: {ensemble_pred.shape}")

    save_checkpoint(ensemble_pred, ensemble_pred_path)
    save_checkpoint(ensemble_proba, ensemble_proba_path)
    print(f"    ✓ Saved ensemble predictions and probabilities")

    split_task = 'evasion' if task == 'hierarchical_evasion_to_clarity' else task
    test_ds = storage.load_split('test', task=split_task)
    label_key = 'clarity_label' if task == 'hierarchical_evasion_to_clarity' else ('clarity_label' if task == 'clarity' else 'evasion_label')
    y_test_true = np.array([test_ds[i][label_key] for i in range(len(test_ds))])

    le = LabelEncoder()
    y_test_true_encoded = le.fit_transform(y_test_true)
    ensemble_pred_encoded = le.transform(ensemble_pred)

    ensemble_metrics = compute_all_metrics(
        y_test_true_encoded, ensemble_pred_encoded, label_list,
        task_name=f"ENSEMBLE_{task}"
    )

    print(f"    Ensemble Test Macro F1: {ensemble_metrics.get('macro_f1', 0.0):.4f}")

    weights_metadata = {
        'task': task,
        'method': 'weighted_average',
        'weight_metric': 'macro_f1',
        'n_classifiers': len(classifier_names_list),
        'classifiers': classifier_names_list,
        'weights': {name: float(weight) for name, weight in zip(classifier_names_list, normalized_weights)},
        'n_samples': len(ensemble_pred),
        'label_list': label_list
    }
    save_checkpoint(weights_metadata, ensemble_weights_path)

    metrics_serializable = {}
    for k, v in ensemble_metrics.items():
        if isinstance(v, (int, float, np.integer, np.floating)):
            metrics_serializable[k] = float(v)
        elif isinstance(v, (list, np.ndarray)):
            metrics_serializable[k] = v.tolist() if isinstance(v, np.ndarray) else v
        elif isinstance(v, dict):
            metrics_serializable[k] = v
        else:
            metrics_serializable[k] = str(v)

    save_checkpoint({
        'task': task,
        'metrics': metrics_serializable,
        'n_samples': len(ensemble_pred)
    }, ensemble_metrics_path)

    ensemble_results[task] = {
        'predictions': ensemble_pred,
        'probabilities': ensemble_proba,
        'weights': {name: weight for name, weight in zip(classifier_names_list, normalized_weights)},
        'classifiers_used': classifier_names_list
    }

print("\n" + "="*80)
print("STEP 5 COMPLETE")
print("="*80)

# ========================================================================
# STEP 6: Generate Summary Report Tables
# ========================================================================
print("\n" + "="*80)
print("STEP 6: GENERATE SUMMARY REPORT TABLES")
print("="*80)

# Task order (clarity first, then hierarchical)
TASK_ORDER = ['clarity', 'hierarchical_evasion_to_clarity']

# ========================================================================
# STEP 6.1: Collect All Classifier Results (Individual Classifiers)
# ========================================================================
print("\n" + "-"*80)
print("STEP 6.1: Individual Classifier Results")
print("-"*80)

summary_rows = []

for task in TASK_ORDER:
    if task not in classifier_specific_results:
        continue

    for clf_name, result in classifier_specific_results[task].items():
        metrics = result.get('metrics', {})
        n_features = result.get('n_features', 0)

        summary_rows.append({
            'classifier': clf_name,
            'task': task,
            'n_features': n_features,
            'macro_f1': metrics.get('macro_f1', 0.0),
            'accuracy': metrics.get('accuracy', 0.0),
            'macro_precision': metrics.get('macro_precision', 0.0),
            'macro_recall': metrics.get('macro_recall', 0.0),
            'weighted_f1': metrics.get('weighted_f1', 0.0),
        })

# ========================================================================
# STEP 6.2: Add Ensemble Results
# ========================================================================
print("\n" + "-"*80)
print("STEP 6.2: Ensemble Results (Weighted Average)")
print("-"*80)

for task in TASK_ORDER:
    ensemble_metrics_path = metrics_dir / f'ensemble_evaluation_metrics_{task}.json'
    ensemble_metrics = load_checkpoint(ensemble_metrics_path)

    if ensemble_metrics:
        metrics_dict = ensemble_metrics.get('metrics', {})
        summary_rows.append({
            'classifier': 'Ensemble (Weighted)',
            'task': task,
            'n_features': 'N/A',
            'macro_f1': metrics_dict.get('macro_f1', 0.0),
            'accuracy': metrics_dict.get('accuracy', 0.0),
            'macro_precision': metrics_dict.get('macro_precision', 0.0),
            'macro_recall': metrics_dict.get('macro_recall', 0.0),
            'weighted_f1': metrics_dict.get('weighted_f1', 0.0),
        })
        print(f"  ✓ Added ensemble results for {task}")
    else:
        print(f"   Ensemble results for {task} not found")

# Create summary DataFrame
df_summary = pd.DataFrame(summary_rows)

if len(df_summary) == 0:
    print("   No results found for summary table")
else:
    # Remove duplicates
    df_summary = df_summary.drop_duplicates(
        subset=['classifier', 'task'],
        keep='first'
    )

    # ========================================================================
    # STEP 6.3: Detailed Summary Table (All Metrics)
    # ========================================================================
    print("\n" + "-"*80)
    print("STEP 6.3: Detailed Summary Table (All Metrics)")
    print("-"*80)

    # Display detailed table
    print("\nDetailed Summary Table:")
    try:
        from IPython.display import display
        display(df_summary.drop(columns=['n_features']).style.format({
            'macro_f1': '{:.4f}',
            'accuracy': '{:.4f}',
            'macro_precision': '{:.4f}',
            'macro_recall': '{:.4f}',
            'weighted_f1': '{:.4f}'
        }))
    except:
        print(df_summary.drop(columns=['n_features']).to_string())

    # Save detailed table
    detailed_path = tables_dir / 'summary_detailed.csv'
    df_summary.to_csv(detailed_path, index=False)
    print(f"\n  ✓ Saved detailed table: {detailed_path.name}")

    # Save HTML version
    html_detailed_path = tables_dir / 'summary_detailed.html'
    df_summary.to_html(html_detailed_path, index=False, float_format='{:.4f}'.format)
    print(f"  ✓ Saved HTML: {html_detailed_path.name}")

    # ========================================================================
    # STEP 6.4: Pivot Table (Classifier × Task) - Macro F1
    # ========================================================================
    print("\n" + "-"*80)
    print("STEP 6.4: Pivot Table - Classifier × Task (Macro F1)")
    print("-"*80)

    # Create pivot table
    df_pivot = df_summary.pivot(
        index='classifier',
        columns='task',
        values='macro_f1'
    )

    # Reorder columns: clarity first, then hierarchical
    available_tasks = [t for t in TASK_ORDER if t in df_pivot.columns]
    remaining_tasks = sorted([t for t in df_pivot.columns if t not in available_tasks])
    column_order = available_tasks + remaining_tasks
    df_pivot = df_pivot[column_order]

    # Display pivot table
    print("\nPivot Table (Macro F1):")
    try:
        from IPython.display import display
        display(df_pivot.style.format(precision=4))
    except:
        print(df_pivot.to_string())

    # Save pivot table
    pivot_path = tables_dir / 'summary_pivot_classifier_wise.csv'
    df_pivot.to_csv(pivot_path)
    print(f"\n  ✓ Saved pivot table: {pivot_path.name}")

    # Save HTML version
    html_pivot_path = tables_dir / 'summary_pivot_classifier_wise.html'
    df_pivot.to_html(html_pivot_path, float_format='{:.4f}'.format)
    print(f"  ✓ Saved HTML: {html_pivot_path.name}")

    # ========================================================================
    # STEP 6.5: Pivot Table (Classifier × Task) - Accuracy
    # ========================================================================
    print("\n" + "-"*80)
    print("STEP 6.5: Pivot Table - Classifier × Task (Accuracy)")
    print("-"*80)

    df_pivot_acc = df_summary.pivot(
        index='classifier',
        columns='task',
        values='accuracy'
    )

    df_pivot_acc = df_pivot_acc[column_order]

    print("\nPivot Table (Accuracy):")
    try:
        from IPython.display import display
        display(df_pivot_acc.style.format(precision=4))
    except:
        print(df_pivot_acc.to_string())

    pivot_acc_path = tables_dir / 'summary_pivot_accuracy.csv'
    df_pivot_acc.to_csv(pivot_acc_path)
    print(f"\n  ✓ Saved pivot table: {pivot_acc_path.name}")

    # ========================================================================
    # STEP 6.6: Summary by Task (Individual Classifiers Only)
    # ========================================================================
    print("\n" + "-"*80)
    print("STEP 6.6: Summary by Task (Individual Classifiers)")
    print("-"*80)

    for task in TASK_ORDER:
        df_task = df_summary[
            (df_summary['task'] == task) &
            (df_summary['classifier'] != 'Ensemble (Weighted)')
        ].copy()

        if len(df_task) == 0:
            continue

        print(f"\n  {task.upper()} - Individual Classifiers:")
        print(f"  {'-'*60}")

        # Sort by macro_f1 descending
        df_task = df_task.sort_values('macro_f1', ascending=False)

        # Display
        try:
            from IPython.display import display
            display(df_task[['classifier', 'macro_f1', 'accuracy', 'macro_precision', 'macro_recall']].style.format({
                'macro_f1': '{:.4f}',
                'accuracy': '{:.4f}',
                'macro_precision': '{:.4f}',
                'macro_recall': '{:.4f}'
            }))
        except:
            print(df_task[['classifier', 'macro_f1', 'accuracy', 'macro_precision', 'macro_recall']].to_string())

        # Save per-task summary
        task_path = tables_dir / f'summary_{task}_individual.csv'
        df_task.to_csv(task_path, index=False)
        print(f"  ✓ Saved: {task_path.name}")

    # ========================================================================
    # STEP 6.7: Ensemble Comparison Table
    # ========================================================================
    print("\n" + "-"*80)
    print("STEP 6.7: Ensemble vs Best Individual Classifier")
    print("-"*80)

    ensemble_comparison_rows = []

    for task in TASK_ORDER:
        # Get best individual classifier
        df_task_individual = df_summary[
            (df_summary['task'] == task) &
            (df_summary['classifier'] != 'Ensemble (Weighted)')
        ]

        if len(df_task_individual) == 0:
            continue

        best_individual = df_task_individual.loc[df_task_individual['macro_f1'].idxmax()]

        # Get ensemble result
        df_task_ensemble = df_summary[
            (df_summary['task'] == task) &
            (df_summary['classifier'] == 'Ensemble (Weighted)')
        ]

        if len(df_task_ensemble) == 0:
            continue

        ensemble_result = df_task_ensemble.iloc[0]

        ensemble_comparison_rows.append({
            'task': task,
            'best_classifier': best_individual['classifier'],
            'best_macro_f1': best_individual['macro_f1'],
            'best_n_features': best_individual['n_features'],
            'ensemble_macro_f1': ensemble_result['macro_f1'],
            'improvement': ensemble_result['macro_f1'] - best_individual['macro_f1'],
            'ensemble_accuracy': ensemble_result['accuracy'],
            'best_accuracy': best_individual['accuracy'],
        })

    if ensemble_comparison_rows:
        df_ensemble_comparison = pd.DataFrame(ensemble_comparison_rows)

        print("\nEnsemble vs Best Individual:")
        try:
            from IPython.display import display
            display(df_ensemble_comparison.drop(columns=['best_n_features']).style.format({
                'best_macro_f1': '{:.4f}',
                'ensemble_macro_f1': '{:.4f}',
                'improvement': '{:.4f}',
                'ensemble_accuracy': '{:.4f}',
                'best_accuracy': '{:.4f}'
            }))
        except:
            print(df_ensemble_comparison.drop(columns=['best_n_features']).to_string())

        comparison_path = tables_dir / 'ensemble_comparison.csv'
        df_ensemble_comparison.to_csv(comparison_path, index=False)
        print(f"\n  ✓ Saved: {comparison_path.name}")

print("\n" + "="*80)
print("SUMMARY REPORT TABLES COMPLETE")
print("="*80)
print(f"\nAll tables saved to: {tables_dir}")
print(f"  - Detailed summary: summary_detailed.csv")
print(f"  - Pivot (Macro F1): summary_pivot_classifier_wise.csv")
print(f"  - Pivot (Accuracy): summary_pivot_accuracy.csv")
print(f"  - Per-task summaries: summary_{{task}}_individual.csv")
print(f"  - Ensemble comparison: ensemble_comparison.csv")

print("\n" + "="*80)
print("ALL STEPS COMPLETE")
print("="*80)


LOADING CLASSIFIER RESULTS FROM CHECKPOINTS

Loading results for task: clarity
   LogisticRegression: Could not determine n_features (no selected_features or trajectory), defaulting to 0
   LinearSVC: Could not determine n_features (no selected_features or trajectory), defaulting to 0
   RandomForest: Could not determine n_features (no selected_features or trajectory), defaulting to 0
   MLP: Could not determine n_features (no selected_features or trajectory), defaulting to 0
   XGBoost: Could not determine n_features (no selected_features or trajectory), defaulting to 0
   LightGBM: Could not determine n_features (no selected_features or trajectory), defaulting to 0

Loading results for task: hierarchical_evasion_to_clarity
   LogisticRegression: Could not determine n_features (no selected_features or trajectory), defaulting to 0
   LinearSVC: Could not determine n_features (no selected_features or trajectory), defaulting to 0
   RandomForest: Could not determine n_features (no selec

Unnamed: 0,classifier,task,macro_f1,accuracy,macro_precision,macro_recall,weighted_f1
0,LogisticRegression,clarity,0.4571,0.539,0.4482,0.4884,0.5587
1,LinearSVC,clarity,0.4608,0.6623,0.5119,0.4566,0.6178
2,RandomForest,clarity,0.4032,0.6526,0.4658,0.4028,0.5935
3,MLP,clarity,0.3554,0.6688,0.6879,0.3721,0.5733
4,XGBoost,clarity,0.4006,0.6558,0.4423,0.3996,0.6169
5,LightGBM,clarity,0.4522,0.6786,0.6142,0.4313,0.6266
6,LogisticRegression,hierarchical_evasion_to_clarity,0.4464,0.5055,0.4406,0.4857,0.5261
7,LinearSVC,hierarchical_evasion_to_clarity,0.4597,0.6436,0.5175,0.4539,0.6016
8,RandomForest,hierarchical_evasion_to_clarity,0.3966,0.6218,0.4588,0.4003,0.5599
9,MLP,hierarchical_evasion_to_clarity,0.3492,0.64,0.6881,0.3725,0.5382



  ✓ Saved detailed table: summary_detailed.csv
  ✓ Saved HTML: summary_detailed.html

--------------------------------------------------------------------------------
STEP 6.4: Pivot Table - Classifier × Task (Macro F1)
--------------------------------------------------------------------------------

Pivot Table (Macro F1):


task,clarity,hierarchical_evasion_to_clarity
classifier,Unnamed: 1_level_1,Unnamed: 2_level_1
Ensemble (Weighted),0.4485,0.4336
LightGBM,0.4522,0.4517
LinearSVC,0.4608,0.4597
LogisticRegression,0.4571,0.4464
MLP,0.3554,0.3492
RandomForest,0.4032,0.3966
XGBoost,0.4006,0.3933



  ✓ Saved pivot table: summary_pivot_classifier_wise.csv
  ✓ Saved HTML: summary_pivot_classifier_wise.html

--------------------------------------------------------------------------------
STEP 6.5: Pivot Table - Classifier × Task (Accuracy)
--------------------------------------------------------------------------------

Pivot Table (Accuracy):


task,clarity,hierarchical_evasion_to_clarity
classifier,Unnamed: 1_level_1,Unnamed: 2_level_1
Ensemble (Weighted),0.6851,0.6655
LightGBM,0.6786,0.6691
LinearSVC,0.6623,0.6436
LogisticRegression,0.539,0.5055
MLP,0.6688,0.64
RandomForest,0.6526,0.6218
XGBoost,0.6558,0.6291



  ✓ Saved pivot table: summary_pivot_accuracy.csv

--------------------------------------------------------------------------------
STEP 6.6: Summary by Task (Individual Classifiers)
--------------------------------------------------------------------------------

  CLARITY - Individual Classifiers:
  ------------------------------------------------------------


Unnamed: 0,classifier,macro_f1,accuracy,macro_precision,macro_recall
1,LinearSVC,0.4608,0.6623,0.5119,0.4566
0,LogisticRegression,0.4571,0.539,0.4482,0.4884
5,LightGBM,0.4522,0.6786,0.6142,0.4313
2,RandomForest,0.4032,0.6526,0.4658,0.4028
4,XGBoost,0.4006,0.6558,0.4423,0.3996
3,MLP,0.3554,0.6688,0.6879,0.3721


  ✓ Saved: summary_clarity_individual.csv

  HIERARCHICAL_EVASION_TO_CLARITY - Individual Classifiers:
  ------------------------------------------------------------


Unnamed: 0,classifier,macro_f1,accuracy,macro_precision,macro_recall
7,LinearSVC,0.4597,0.6436,0.5175,0.4539
11,LightGBM,0.4517,0.6691,0.5859,0.4396
6,LogisticRegression,0.4464,0.5055,0.4406,0.4857
8,RandomForest,0.3966,0.6218,0.4588,0.4003
10,XGBoost,0.3933,0.6291,0.4603,0.3954
9,MLP,0.3492,0.64,0.6881,0.3725


  ✓ Saved: summary_hierarchical_evasion_to_clarity_individual.csv

--------------------------------------------------------------------------------
STEP 6.7: Ensemble vs Best Individual Classifier
--------------------------------------------------------------------------------

Ensemble vs Best Individual:


Unnamed: 0,task,best_classifier,best_macro_f1,ensemble_macro_f1,improvement,ensemble_accuracy,best_accuracy
0,clarity,LinearSVC,0.4608,0.4485,-0.0123,0.6851,0.6623
1,hierarchical_evasion_to_clarity,LinearSVC,0.4597,0.4336,-0.026,0.6655,0.6436



  ✓ Saved: ensemble_comparison.csv

SUMMARY REPORT TABLES COMPLETE

All tables saved to: /content/drive/MyDrive/semeval_data/results/FinalResultsType2/classifier_specific/tables
  - Detailed summary: summary_detailed.csv
  - Pivot (Macro F1): summary_pivot_classifier_wise.csv
  - Pivot (Accuracy): summary_pivot_accuracy.csv
  - Per-task summaries: summary_{task}_individual.csv
  - Ensemble comparison: ensemble_comparison.csv

ALL STEPS COMPLETE


In [28]:
# ========================================================================
# STEP 6: Generate Summary Report Tables
# ========================================================================
print("\n" + "="*80)
print("STEP 6: GENERATE SUMMARY REPORT TABLES")
print("="*80)

# Task order (clarity first, then hierarchical)
TASK_ORDER = ['clarity', 'hierarchical_evasion_to_clarity']

# ========================================================================
# STEP 6.1: Collect All Classifier Results (Individual Classifiers)
# ========================================================================
print("\n" + "-"*80)
print("STEP 6.1: Individual Classifier Results")
print("-"*80)

summary_rows = []

for task in TASK_ORDER:
    if task not in classifier_specific_results:
        continue

    for clf_name, result in classifier_specific_results[task].items():
        metrics = result.get('metrics', {})
        n_features = result.get('n_features', 0)

        summary_rows.append({
            'classifier': clf_name,
            'task': task,
            'n_features': n_features,
            'macro_f1': metrics.get('macro_f1', 0.0),
            'accuracy': metrics.get('accuracy', 0.0),
            'macro_precision': metrics.get('macro_precision', 0.0),
            'macro_recall': metrics.get('macro_recall', 0.0),
            'weighted_f1': metrics.get('weighted_f1', 0.0),
        })

# ========================================================================
# STEP 6.2: Add Ensemble Results
# ========================================================================
print("\n" + "-"*80)
print("STEP 6.2: Ensemble Results (Weighted Average)")
print("-"*80)

for task in TASK_ORDER:
    ensemble_metrics_path = metrics_dir / f'ensemble_evaluation_metrics_{task}.json'
    ensemble_metrics = load_checkpoint(ensemble_metrics_path)

    if ensemble_metrics:
        metrics_dict = ensemble_metrics.get('metrics', {})
        summary_rows.append({
            'classifier': 'Ensemble (Weighted)',
            'task': task,
            'n_features': 'N/A',
            'macro_f1': metrics_dict.get('macro_f1', 0.0),
            'accuracy': metrics_dict.get('accuracy', 0.0),
            'macro_precision': metrics_dict.get('macro_precision', 0.0),
            'macro_recall': metrics_dict.get('macro_recall', 0.0),
            'weighted_f1': metrics_dict.get('weighted_f1', 0.0),
        })
        print(f"  ✓ Added ensemble results for {task}")
    else:
        print(f"   Ensemble results for {task} not found")

# Create summary DataFrame
df_summary = pd.DataFrame(summary_rows)

if len(df_summary) == 0:
    print("   No results found for summary table")
else:
    # Remove duplicates
    df_summary = df_summary.drop_duplicates(
        subset=['classifier', 'task'],
        keep='first'
    )

    # ========================================================================
    # STEP 6.3: Detailed Summary Table (All Metrics)
    # ========================================================================
    print("\n" + "-"*80)
    print("STEP 6.3: Detailed Summary Table (All Metrics)")
    print("-"*80)

    # Display detailed table
    print("\nDetailed Summary Table:")
    try:
        from IPython.display import display
        display(df_summary.drop(columns=['n_features']).style.format({
            'macro_f1': '{:.4f}',
            'accuracy': '{:.4f}',
            'macro_precision': '{:.4f}',
            'macro_recall': '{:.4f}',
            'weighted_f1': '{:.4f}'
        }))
    except:
        print(df_summary.drop(columns=['n_features']).to_string())

    # Save detailed table
    detailed_path = tables_dir / 'summary_detailed.csv'
    df_summary.to_csv(detailed_path, index=False)
    print(f"\n  ✓ Saved detailed table: {detailed_path.name}")

    # Save HTML version
    html_detailed_path = tables_dir / 'summary_detailed.html'
    df_summary.to_html(html_detailed_path, index=False, float_format='{:.4f}'.format)
    print(f"  ✓ Saved HTML: {html_detailed_path.name}")

    # ========================================================================
    # STEP 6.4: Pivot Table (Classifier × Task) - Macro F1
    # ========================================================================
    print("\n" + "-"*80)
    print("STEP 6.4: Pivot Table - Classifier × Task (Macro F1)")
    print("-"*80)

    # Create pivot table
    df_pivot = df_summary.pivot(
        index='classifier',
        columns='task',
        values='macro_f1'
    )

    # Reorder columns: clarity first, then hierarchical
    available_tasks = [t for t in TASK_ORDER if t in df_pivot.columns]
    remaining_tasks = sorted([t for t in df_pivot.columns if t not in available_tasks])
    column_order = available_tasks + remaining_tasks
    df_pivot = df_pivot[column_order]

    # Display pivot table
    print("\nPivot Table (Macro F1):")
    try:
        from IPython.display import display
        display(df_pivot.style.format(precision=4))
    except:
        print(df_pivot.to_string())

    # Save pivot table
    pivot_path = tables_dir / 'summary_pivot_classifier_wise.csv'
    df_pivot.to_csv(pivot_path)
    print(f"\n  ✓ Saved pivot table: {pivot_path.name}")

    # Save HTML version
    html_pivot_path = tables_dir / 'summary_pivot_classifier_wise.html'
    df_pivot.to_html(html_pivot_path, float_format='{:.4f}'.format)
    print(f"  ✓ Saved HTML: {html_pivot_path.name}")

    # ========================================================================
    # STEP 6.5: Pivot Table (Classifier × Task) - Accuracy
    # ========================================================================
    print("\n" + "-"*80)
    print("STEP 6.5: Pivot Table - Classifier × Task (Accuracy)")
    print("-"*80)

    df_pivot_acc = df_summary.pivot(
        index='classifier',
        columns='task',
        values='accuracy'
    )

    df_pivot_acc = df_pivot_acc[column_order]

    print("\nPivot Table (Accuracy):")
    try:
        from IPython.display import display
        display(df_pivot_acc.style.format(precision=4))
    except:
        print(df_pivot_acc.to_string())

    pivot_acc_path = tables_dir / 'summary_pivot_accuracy.csv'
    df_pivot_acc.to_csv(pivot_acc_path)
    print(f"\n  ✓ Saved pivot table: {pivot_acc_path.name}")

    # ========================================================================
    # STEP 6.6: Summary by Task (Individual Classifiers Only)
    # ========================================================================
    print("\n" + "-"*80)
    print("STEP 6.6: Summary by Task (Individual Classifiers)")
    print("-"*80)

    for task in TASK_ORDER:
        df_task = df_summary[
            (df_summary['task'] == task) &
            (df_summary['classifier'] != 'Ensemble (Weighted)')
        ].copy()

        if len(df_task) == 0:
            continue

        print(f"\n  {task.upper()} - Individual Classifiers:")
        print(f"  {'-'*60}")

        # Sort by macro_f1 descending
        df_task = df_task.sort_values('macro_f1', ascending=False)

        # Display
        try:
            from IPython.display import display
            display(df_task[['classifier', 'macro_f1', 'accuracy', 'macro_precision', 'macro_recall']].style.format({
                'macro_f1': '{:.4f}',
                'accuracy': '{:.4f}',
                'macro_precision': '{:.4f}',
                'macro_recall': '{:.4f}'
            }))
        except:
            print(df_task[['classifier', 'macro_f1', 'accuracy', 'macro_precision', 'macro_recall']].to_string())

        # Save per-task summary
        task_path = tables_dir / f'summary_{task}_individual.csv'
        df_task.to_csv(task_path, index=False)
        print(f"  ✓ Saved: {task_path.name}")

    # ========================================================================
    # STEP 6.7: Ensemble Comparison Table
    # ========================================================================
    print("\n" + "-"*80)
    print("STEP 6.7: Ensemble vs Best Individual Classifier")
    print("-"*80)

    ensemble_comparison_rows = []

    for task in TASK_ORDER:
        # Get best individual classifier
        df_task_individual = df_summary[
            (df_summary['task'] == task) &
            (df_summary['classifier'] != 'Ensemble (Weighted)')
        ]

        if len(df_task_individual) == 0:
            continue

        best_individual = df_task_individual.loc[df_task_individual['macro_f1'].idxmax()]

        # Get ensemble result
        df_task_ensemble = df_summary[
            (df_summary['task'] == task) &
            (df_summary['classifier'] == 'Ensemble (Weighted)')
        ]

        if len(df_task_ensemble) == 0:
            continue

        ensemble_result = df_task_ensemble.iloc[0]

        ensemble_comparison_rows.append({
            'task': task,
            'best_classifier': best_individual['classifier'],
            'best_macro_f1': best_individual['macro_f1'],
            'best_n_features': best_individual['n_features'],
            'ensemble_macro_f1': ensemble_result['macro_f1'],
            'improvement': ensemble_result['macro_f1'] - best_individual['macro_f1'],
            'ensemble_accuracy': ensemble_result['accuracy'],
            'best_accuracy': best_individual['accuracy'],
        })

    if ensemble_comparison_rows:
        df_ensemble_comparison = pd.DataFrame(ensemble_comparison_rows)

        print("\nEnsemble vs Best Individual:")
        try:
            from IPython.display import display
            display(df_ensemble_comparison.drop(columns=['best_n_features', 'improvement']).style.format({
                'best_macro_f1': '{:.4f}',
                'ensemble_macro_f1': '{:.4f}',
                'ensemble_accuracy': '{:.4f}',
                'best_accuracy': '{:.4f}'
            }))
        except:
            print(df_ensemble_comparison.drop(columns=['best_n_features', 'improvement']).to_string())

        comparison_path = tables_dir / 'ensemble_comparison.csv'
        df_ensemble_comparison.to_csv(comparison_path, index=False)
        print(f"\n  ✓ Saved: {comparison_path.name}")

print("\n" + "="*80)
print("SUMMARY REPORT TABLES COMPLETE")
print("="*80)
print(f"\nAll tables saved to: {tables_dir}")
print(f"  - Detailed summary: summary_detailed.csv")
print(f"  - Pivot (Macro F1): summary_pivot_classifier_wise.csv")
print(f"  - Pivot (Accuracy): summary_pivot_accuracy.csv")
print(f"  - Per-task summaries: summary_{{task}}_individual.csv")
print(f"  - Ensemble comparison: ensemble_comparison.csv")


STEP 6: GENERATE SUMMARY REPORT TABLES

--------------------------------------------------------------------------------
STEP 6.1: Individual Classifier Results
--------------------------------------------------------------------------------

--------------------------------------------------------------------------------
STEP 6.2: Ensemble Results (Weighted Average)
--------------------------------------------------------------------------------
  ✓ Added ensemble results for clarity
  ✓ Added ensemble results for hierarchical_evasion_to_clarity

--------------------------------------------------------------------------------
STEP 6.3: Detailed Summary Table (All Metrics)
--------------------------------------------------------------------------------

Detailed Summary Table:


Unnamed: 0,classifier,task,macro_f1,accuracy,macro_precision,macro_recall,weighted_f1
0,LogisticRegression,clarity,0.4571,0.539,0.4482,0.4884,0.5587
1,LinearSVC,clarity,0.4608,0.6623,0.5119,0.4566,0.6178
2,RandomForest,clarity,0.4032,0.6526,0.4658,0.4028,0.5935
3,MLP,clarity,0.3554,0.6688,0.6879,0.3721,0.5733
4,XGBoost,clarity,0.4006,0.6558,0.4423,0.3996,0.6169
5,LightGBM,clarity,0.4522,0.6786,0.6142,0.4313,0.6266
6,LogisticRegression,hierarchical_evasion_to_clarity,0.4464,0.5055,0.4406,0.4857,0.5261
7,LinearSVC,hierarchical_evasion_to_clarity,0.4597,0.6436,0.5175,0.4539,0.6016
8,RandomForest,hierarchical_evasion_to_clarity,0.3966,0.6218,0.4588,0.4003,0.5599
9,MLP,hierarchical_evasion_to_clarity,0.3492,0.64,0.6881,0.3725,0.5382



  ✓ Saved detailed table: summary_detailed.csv
  ✓ Saved HTML: summary_detailed.html

--------------------------------------------------------------------------------
STEP 6.4: Pivot Table - Classifier × Task (Macro F1)
--------------------------------------------------------------------------------

Pivot Table (Macro F1):


task,clarity,hierarchical_evasion_to_clarity
classifier,Unnamed: 1_level_1,Unnamed: 2_level_1
Ensemble (Weighted),0.4485,0.4336
LightGBM,0.4522,0.4517
LinearSVC,0.4608,0.4597
LogisticRegression,0.4571,0.4464
MLP,0.3554,0.3492
RandomForest,0.4032,0.3966
XGBoost,0.4006,0.3933



  ✓ Saved pivot table: summary_pivot_classifier_wise.csv
  ✓ Saved HTML: summary_pivot_classifier_wise.html

--------------------------------------------------------------------------------
STEP 6.5: Pivot Table - Classifier × Task (Accuracy)
--------------------------------------------------------------------------------

Pivot Table (Accuracy):


task,clarity,hierarchical_evasion_to_clarity
classifier,Unnamed: 1_level_1,Unnamed: 2_level_1
Ensemble (Weighted),0.6851,0.6655
LightGBM,0.6786,0.6691
LinearSVC,0.6623,0.6436
LogisticRegression,0.539,0.5055
MLP,0.6688,0.64
RandomForest,0.6526,0.6218
XGBoost,0.6558,0.6291



  ✓ Saved pivot table: summary_pivot_accuracy.csv

--------------------------------------------------------------------------------
STEP 6.6: Summary by Task (Individual Classifiers)
--------------------------------------------------------------------------------

  CLARITY - Individual Classifiers:
  ------------------------------------------------------------


Unnamed: 0,classifier,macro_f1,accuracy,macro_precision,macro_recall
1,LinearSVC,0.4608,0.6623,0.5119,0.4566
0,LogisticRegression,0.4571,0.539,0.4482,0.4884
5,LightGBM,0.4522,0.6786,0.6142,0.4313
2,RandomForest,0.4032,0.6526,0.4658,0.4028
4,XGBoost,0.4006,0.6558,0.4423,0.3996
3,MLP,0.3554,0.6688,0.6879,0.3721


  ✓ Saved: summary_clarity_individual.csv

  HIERARCHICAL_EVASION_TO_CLARITY - Individual Classifiers:
  ------------------------------------------------------------


Unnamed: 0,classifier,macro_f1,accuracy,macro_precision,macro_recall
7,LinearSVC,0.4597,0.6436,0.5175,0.4539
11,LightGBM,0.4517,0.6691,0.5859,0.4396
6,LogisticRegression,0.4464,0.5055,0.4406,0.4857
8,RandomForest,0.3966,0.6218,0.4588,0.4003
10,XGBoost,0.3933,0.6291,0.4603,0.3954
9,MLP,0.3492,0.64,0.6881,0.3725


  ✓ Saved: summary_hierarchical_evasion_to_clarity_individual.csv

--------------------------------------------------------------------------------
STEP 6.7: Ensemble vs Best Individual Classifier
--------------------------------------------------------------------------------

Ensemble vs Best Individual:


Unnamed: 0,task,best_classifier,best_macro_f1,ensemble_macro_f1,ensemble_accuracy,best_accuracy
0,clarity,LinearSVC,0.4608,0.4485,0.6851,0.6623
1,hierarchical_evasion_to_clarity,LinearSVC,0.4597,0.4336,0.6655,0.6436



  ✓ Saved: ensemble_comparison.csv

SUMMARY REPORT TABLES COMPLETE

All tables saved to: /content/drive/MyDrive/semeval_data/results/FinalResultsType2/classifier_specific/tables
  - Detailed summary: summary_detailed.csv
  - Pivot (Macro F1): summary_pivot_classifier_wise.csv
  - Pivot (Accuracy): summary_pivot_accuracy.csv
  - Per-task summaries: summary_{task}_individual.csv
  - Ensemble comparison: ensemble_comparison.csv
