In [None]:
import json
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
from pathlib import Path

# Set style
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette('husl')

# Figure directory
fig_dir = Path('../report/figures')
fig_dir.mkdir(parents=True, exist_ok=True)

## 1. Training Loss Curve

In [None]:
# Load training history
models_dir = Path('../models')

# Example: Load BERT baseline
with open(models_dir / 'baseline_bert_hierarchical' / 'training_history.json') as f:
    history = json.load(f)

# Plot loss curve
plt.figure(figsize=(10, 6))
plt.plot(history['epochs'], history['train_loss'], marker='o', linewidth=2, label='Training Loss')
plt.xlabel('Epoch', fontsize=12)
plt.ylabel('Loss', fontsize=12)
plt.title('Training Loss over Epochs', fontsize=14, fontweight='bold')
plt.legend(fontsize=10)
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig(fig_dir / 'training_loss.png', dpi=300, bbox_inches='tight')
plt.show()

print(f"✓ Saved: {fig_dir / 'training_loss.png'}")

## 2. Multiple Models Comparison

In [None]:
# Compare multiple models
model_configs = [
    ('baseline_bert_hierarchical', 'BERT + Hierarchical Loss'),
    ('gcn_hierarchical', 'GCN + Hierarchical Loss'),
    ('gat_hierarchical', 'GAT + Hierarchical Loss'),
]

plt.figure(figsize=(12, 6))

for model_dir, label in model_configs:
    history_path = models_dir / model_dir / 'training_history.json'
    if history_path.exists():
        with open(history_path) as f:
            history = json.load(f)
        plt.plot(history['epochs'], history['train_loss'], marker='o', linewidth=2, label=label)

plt.xlabel('Epoch', fontsize=12)
plt.ylabel('Training Loss', fontsize=12)
plt.title('Model Comparison: Training Loss', fontsize=14, fontweight='bold')
plt.legend(fontsize=10)
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig(fig_dir / 'model_comparison_loss.png', dpi=300, bbox_inches='tight')
plt.show()

print(f"✓ Saved: {fig_dir / 'model_comparison_loss.png'}")

## 3. Evaluation Metrics Table

In [None]:
# Load checkpoint metrics (from checkpoint files)
results = []

for model_dir, label in model_configs:
    checkpoint_path = models_dir / model_dir / 'best_model.pt'
    if checkpoint_path.exists():
        # You would load actual metrics from evaluation
        # For now, create sample data structure
        results.append({
            'Model': label,
            'Micro F1': 0.0,  # Replace with actual metrics
            'Macro F1': 0.0,
            'Precision': 0.0,
            'Recall': 0.0,
            'Final Loss': 0.0
        })

# Create DataFrame
df = pd.DataFrame(results)
print("\n=== Model Performance Comparison ===")
print(df.to_string(index=False))

# Save as CSV
df.to_csv(fig_dir.parent / 'results_table.csv', index=False)
print(f"\n✓ Saved: {fig_dir.parent / 'results_table.csv'}")

## 4. Metrics Bar Chart

In [None]:
# Visualize metrics comparison
metrics = ['Micro F1', 'Macro F1', 'Precision', 'Recall']

fig, axes = plt.subplots(2, 2, figsize=(14, 10))
axes = axes.flatten()

for idx, metric in enumerate(metrics):
    ax = axes[idx]
    ax.bar(df['Model'], df[metric], alpha=0.7, edgecolor='black')
    ax.set_ylabel(metric, fontsize=11)
    ax.set_title(f'{metric} Comparison', fontsize=12, fontweight='bold')
    ax.tick_params(axis='x', rotation=15)
    ax.grid(axis='y', alpha=0.3)

plt.tight_layout()
plt.savefig(fig_dir / 'metrics_comparison.png', dpi=300, bbox_inches='tight')
plt.show()

print(f"✓ Saved: {fig_dir / 'metrics_comparison.png'}")

## 5. Confusion Matrix / Label Distribution

In [None]:
# Example: Label distribution from predictions
# Load predictions pickle file
import pickle

pred_file = Path('../predictions/test_predictions.pkl')
if pred_file.exists():
    with open(pred_file, 'rb') as f:
        results = pickle.load(f)
    
    predictions = results['predictions']
    
    # Count labels per sample
    label_counts = [len(p) for p in predictions]
    
    plt.figure(figsize=(10, 6))
    plt.hist(label_counts, bins=range(1, max(label_counts)+2), alpha=0.7, edgecolor='black')
    plt.xlabel('Number of Labels per Sample', fontsize=12)
    plt.ylabel('Frequency', fontsize=12)
    plt.title('Predicted Label Distribution', fontsize=14, fontweight='bold')
    plt.grid(axis='y', alpha=0.3)
    plt.tight_layout()
    plt.savefig(fig_dir / 'label_distribution.png', dpi=300, bbox_inches='tight')
    plt.show()
    
    print(f"✓ Saved: {fig_dir / 'label_distribution.png'}")
    print(f"Average labels per sample: {np.mean(label_counts):.2f}")
else:
    print(f"Prediction file not found: {pred_file}")

## 6. Top Predicted Classes

In [None]:
if pred_file.exists():
    # Count class frequencies
    from collections import Counter
    
    all_labels = [label for pred in predictions for label in pred]
    label_freq = Counter(all_labels)
    top_classes = label_freq.most_common(20)
    
    classes, counts = zip(*top_classes)
    
    plt.figure(figsize=(12, 6))
    plt.barh(range(len(classes)), counts, alpha=0.7, edgecolor='black')
    plt.yticks(range(len(classes)), [f'Class {c}' for c in classes])
    plt.xlabel('Frequency', fontsize=12)
    plt.ylabel('Class', fontsize=12)
    plt.title('Top 20 Predicted Classes', fontsize=14, fontweight='bold')
    plt.gca().invert_yaxis()
    plt.grid(axis='x', alpha=0.3)
    plt.tight_layout()
    plt.savefig(fig_dir / 'top_classes.png', dpi=300, bbox_inches='tight')
    plt.show()
    
    print(f"✓ Saved: {fig_dir / 'top_classes.png'}")

## 7. Export All Figures

모든 그림이 `report/figures/` 폴더에 저장되었습니다.

**생성된 파일:**
- `training_loss.png`: 단일 모델 학습 곡선
- `model_comparison_loss.png`: 여러 모델 비교
- `metrics_comparison.png`: 성능 지표 비교
- `label_distribution.png`: 예측 레이블 분포
- `top_classes.png`: 가장 많이 예측된 클래스
- `../results_table.csv`: 성능 비교 표 (CSV)