# Comparação de limiares Stage1 (0.50 vs 0.55)

In [None]:
import json
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import sys
sns.set_theme(style='whitegrid')
root = Path('..').resolve()
project_root = root.parent
if str(project_root) not in sys.path:
    sys.path.append(str(project_root))
from pesquisa.v5_pipeline import STAGE3_GROUPS
baseline = json.loads((root / 'logs/v5_pipeline_eval_val.json').read_text())
thresh = json.loads((root / 'logs/v5_pipeline_eval_val_th055.json').read_text())
baseline['final_accuracy'], thresh['final_accuracy']


## Métricas Stage1

In [None]:
stage1_metrics = {
    'baseline': baseline['stage1'],
    'thresh_0.55': thresh['stage1']
}
stage1_metrics


## Stage 2 — Matriz de Confusão

In [None]:
stage2_baseline = np.array(baseline['stage2']['confusion_matrix'])
stage2_thresh = np.array(thresh['stage2']['confusion_matrix'])
stage2_names = baseline['stage2']['class_names']
fig, axes = plt.subplots(1, 2, figsize=(12, 4))
sns.heatmap(stage2_baseline, annot=True, fmt='d', cmap='Blues', xticklabels=stage2_names, yticklabels=stage2_names, ax=axes[0])
axes[0].set_title('Stage2 Confusão — threshold 0.50')
sns.heatmap(stage2_thresh, annot=True, fmt='d', cmap='Blues', xticklabels=stage2_names, yticklabels=stage2_names, ax=axes[1])
axes[1].set_title('Stage2 Confusão — threshold 0.55')
for ax in axes:
    ax.set_xlabel('Predito'); ax.set_ylabel('Real'); ax.set_xticklabels(stage2_names, rotation=45, ha='right'); ax.set_yticklabels(stage2_names, rotation=0)
plt.tight_layout(); plt.show()


## Stage 3 — Macro-F1

In [None]:
stage3_heads = ['RECT', 'AB', '1TO4']
for head in stage3_heads:
    base_conf = np.array(baseline['stage3']['confusion_matrices'][head])
    thr_conf = np.array(thresh['stage3']['confusion_matrices'][head])
    labels = STAGE3_GROUPS[head]
    fig, axes = plt.subplots(1, 2, figsize=(8, 3))
    sns.heatmap(base_conf, annot=True, fmt='d', cmap='Purples', xticklabels=labels, yticklabels=labels, ax=axes[0])
    axes[0].set_title(f'{head} — confusão 0.50')
    sns.heatmap(thr_conf, annot=True, fmt='d', cmap='Purples', xticklabels=labels, yticklabels=labels, ax=axes[1])
    axes[1].set_title(f'{head} — confusão 0.55')
    for ax in axes:
        ax.set_xlabel('Predito'); ax.set_ylabel('Real'); ax.set_xticklabels(labels, rotation=45, ha='right'); ax.set_yticklabels(labels, rotation=0)
    plt.tight_layout(); plt.show()


## Pipeline Completo — Matrizes

In [None]:
final_baseline = np.array(baseline['confusion_matrix'])
final_thresh = np.array(thresh['confusion_matrix'])
class_names = baseline['class_names']
fig, axes = plt.subplots(1, 2, figsize=(14, 5))
sns.heatmap(final_baseline, annot=True, fmt='d', cmap='Greens', xticklabels=class_names, yticklabels=class_names, ax=axes[0])
axes[0].set_title('Pipeline Confusão — threshold 0.50')
sns.heatmap(final_thresh, annot=True, fmt='d', cmap='Greens', xticklabels=class_names, yticklabels=class_names, ax=axes[1])
axes[1].set_title('Pipeline Confusão — threshold 0.55')
for ax in axes:
    ax.set_xlabel('Predito'); ax.set_ylabel('Real'); ax.set_xticklabels(class_names, rotation=45, ha='right'); ax.set_yticklabels(class_names, rotation=0)
plt.tight_layout(); plt.show()


## Observações
- O threshold 0.55 aumenta a acurácia global (48.8% vs 44.7%), mas derruba o recall e o F1 do Stage1.
- COM o recall menor, os especialistas recebem menos blocos para refinar (especialmente AB), reduzindo macro-F1.
- CAR é necessário reforçar o especialista AB antes de adotar limiar mais agressivo.