In [None]:
# ============================================================
# PCA vs UMAP COMPARISON - METAL GPU VERSION
# ============================================================
# This notebook compares PCA and UMAP for dimensionality reduction
# of ClinicalBERT embeddings before ANN classification
# Optimized for Apple Silicon Macs with Metal GPU support

# Install required packages
%pip install transformers torch datasets scikit-learn umap-learn

# Load basic libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import time
import os
from sklearn.preprocessing import StandardScaler
import platform

print("=" * 60)
print("PCA vs UMAP Comparison - Metal GPU Version")
print("=" * 60)
print(f"Platform: {platform.platform()}")
print(f"Processor: {platform.processor()}")
print("=" * 60)


In [None]:
# ============================================================
# DATA LOADING AND PREPROCESSING
# ============================================================

# Load TSV data
drug_test_data = pd.read_csv("data/drugLibTest_raw.tsv", sep='\t')
drug_train_data = pd.read_csv("data/drugLibTrain_raw.tsv", sep='\t')

# Combine train and test data
drug_data = pd.concat([drug_train_data, drug_test_data], ignore_index=True)

# Drop missing values
drug_data = drug_data.dropna()

# Create text column
drug_data = drug_data.dropna(subset=["urlDrugName", "rating"])
drug_data = drug_data[~drug_data["urlDrugName"].str.lower().str.contains("unnamed", na=False)]

drug_data["text"] = (
    drug_data[["benefitsReview", "sideEffectsReview", "commentsReview"]]
    .fillna("")
    .agg(" ".join, axis=1)
    .str.replace(r"\s+", " ", regex=True)
    .str.strip()
)

drug_data = drug_data[drug_data["text"].str.len() > 10]

# Map sentiment
def map_sentiment(r):
    if r <= 3:
        return "negative"
    elif r <= 6:
        return "neutral"
    else:
        return "positive"

drug_data["sentiment"] = drug_data["rating"].apply(map_sentiment)

# Map labels to integers
label_order = ["negative", "neutral", "positive"]
label2id = {l: i for i, l in enumerate(label_order)}
id2label = {i: l for l, i in label2id.items()}
drug_data["label"] = drug_data["sentiment"].map(label2id)

# Train/val split
from sklearn.model_selection import train_test_split

TEXT_COL = "text"
LABEL_COL = "sentiment"

train_df, val_df = train_test_split(
    drug_data[[TEXT_COL, "label"]],
    test_size=0.2,
    random_state=42,
    stratify=drug_data["label"]
)

print(f"Dataset size: {len(drug_data)}")
print(f"Train size: {len(train_df)}, Validation size: {len(val_df)}")
print(f"\nLabel distribution:")
print(drug_data[LABEL_COL].value_counts())


In [None]:
# ============================================================
# DEVICE SETUP - METAL GPU
# ============================================================

import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader
from datasets import Dataset
from transformers import AutoTokenizer, AutoModel
from sklearn.decomposition import PCA
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, f1_score, recall_score
import umap

# Detect device with priority: MPS (Metal) > CUDA > CPU
if torch.backends.mps.is_available():
    device = torch.device("mps")
    print("✓ Metal GPU (MPS) available - using Apple Silicon GPU")
    print(f"Device: {device}")
elif torch.cuda.is_available():
    device = torch.device("cuda")
    print(f"✓ CUDA GPU available: {torch.cuda.get_device_name(0)}")
    print(f"Device: {device}")
else:
    device = torch.device("cpu")
    print("⚠️  No GPU detected - using CPU")
    print("   For Apple Silicon Macs, ensure PyTorch with MPS support is installed")
    print("   Install: pip install torch torchvision torchaudio")
    print(f"Device: {device}")

print(f"\nPlatform: {platform.platform()}")
print(f"Processor: {platform.processor()}")


In [None]:
# ============================================================
# STEP 1: CLINICALBERT EMBEDDING EXTRACTION
# ============================================================

MODEL_NAME = "emilyalsentzer/Bio_ClinicalBERT"
MAX_LEN = 256

print("Loading ClinicalBERT tokenizer and encoder...")
tok = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=True)
enc_model = AutoModel.from_pretrained(MODEL_NAME).to(device)
enc_model.eval()

print(f"Model loaded on {device}")
print(f"Model hidden size: {enc_model.config.hidden_size}")

# Convert to Hugging Face Dataset
train_ds = Dataset.from_pandas(train_df.reset_index(drop=True))
val_ds = Dataset.from_pandas(val_df.reset_index(drop=True))

# Tokenize
def tokenize_for_enc(batch):
    return tok(
        batch[TEXT_COL], 
        truncation=True, 
        padding="max_length", 
        max_length=MAX_LEN
    )

print("\nTokenizing datasets...")
train_tok = train_ds.map(tokenize_for_enc, batched=True)
val_tok = val_ds.map(tokenize_for_enc, batched=True)

train_tok.set_format(type="torch", columns=["input_ids", "attention_mask", "label"])
val_tok.set_format(type="torch", columns=["input_ids", "attention_mask", "label"])

print(f"Train tokenized: {len(train_tok)} samples")
print(f"Val tokenized: {len(val_tok)} samples")


In [None]:
# Extract [CLS] token embeddings from ClinicalBERT
def get_cls_embeddings(dataset, batch_size=16):
    """
    Extract [CLS] token embeddings from ClinicalBERT.
    Returns: X_emb (N, 768), y (N,)
    """
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False)
    
    all_embeddings = []
    all_labels = []
    
    print(f"Extracting embeddings from {len(dataset)} samples...")
    with torch.no_grad():
        for i, batch in enumerate(dataloader):
            input_ids = batch["input_ids"].to(device)
            attention_mask = batch["attention_mask"].to(device)
            labels = batch["label"].cpu().numpy()
            
            outputs = enc_model(input_ids=input_ids, attention_mask=attention_mask)
            cls_emb = outputs.last_hidden_state[:, 0, :]
            
            all_embeddings.append(cls_emb.cpu().numpy())
            all_labels.append(labels)
            
            if (i + 1) % 50 == 0:
                print(f"  Processed {i + 1} batches...")
            
            # MPS memory management
            if device.type == "mps" and (i + 1) % 100 == 0:
                torch.mps.empty_cache()
    
    X_emb = np.vstack(all_embeddings)
    y = np.concatenate(all_labels)
    
    print(f"Extracted embeddings shape: {X_emb.shape}")
    print(f"Labels shape: {y.shape}")
    
    return X_emb, y

# Extract embeddings
print("=" * 60)
print("Extracting ClinicalBERT Embeddings")
print("=" * 60)

X_train_bert, y_train = get_cls_embeddings(train_tok, batch_size=16)
X_val_bert, y_val = get_cls_embeddings(val_tok, batch_size=16)

print(f"\nTrain embeddings: {X_train_bert.shape}, Labels: {y_train.shape}")
print(f"Val embeddings: {X_val_bert.shape}, Labels: {y_val.shape}")


In [None]:
# ============================================================
# STEP 2: STANDARDIZE EMBEDDINGS
# ============================================================
# Both PCA and UMAP work better on standardized data

print("=" * 60)
print("Standardizing BERT Embeddings")
print("=" * 60)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_bert)
X_val_scaled = scaler.transform(X_val_bert)

print(f"Train scaled shape: {X_train_scaled.shape}")
print(f"Val scaled shape: {X_val_scaled.shape}")
print(f"Mean (should be ~0): {X_train_scaled.mean():.6f}")
print(f"Std (should be ~1): {X_train_scaled.std():.6f}")


In [None]:
# ============================================================
# METHOD 1: PCA DIMENSIONALITY REDUCTION
# ============================================================

N_COMPONENTS = 50  # Target dimension for both methods

print("=" * 60)
print("APPLYING PCA DIMENSIONALITY REDUCTION")
print("=" * 60)

start_time = time.time()

# Apply PCA
pca = PCA(n_components=N_COMPONENTS, random_state=42)
X_train_pca = pca.fit_transform(X_train_scaled)
X_val_pca = pca.transform(X_val_scaled)

pca_time = time.time() - start_time

print(f"\n✓ PCA completed in {pca_time:.2f} seconds")
print(f"Original dimension: {X_train_bert.shape[1]}")
print(f"PCA reduced dimension: {X_train_pca.shape[1]}")
print(f"Variance explained: {pca.explained_variance_ratio_.sum():.3f}")
print(f"Train PCA shape: {X_train_pca.shape}")
print(f"Val PCA shape: {X_val_pca.shape}")

# Store for later comparison
pca_results = {
    'X_train': X_train_pca,
    'X_val': X_val_pca,
    'time': pca_time,
    'variance_explained': pca.explained_variance_ratio_.sum()
}


In [None]:
# ============================================================
# METHOD 2: UMAP DIMENSIONALITY REDUCTION
# ============================================================

print("=" * 60)
print("APPLYING UMAP DIMENSIONALITY REDUCTION")
print("=" * 60)
print("Note: UMAP is slower than PCA but may preserve better structure")
print("=" * 60)

start_time = time.time()

# Apply UMAP
# n_neighbors: balance between local vs global structure (smaller = faster, less global)
# min_dist: controls how tightly points are packed (0.0 = tight, 1.0 = loose)
umap_reducer = umap.UMAP(
    n_components=N_COMPONENTS,
    n_neighbors=15,          # Default: 15 (try 5-10 for faster, 20-30 for better quality)
    min_dist=0.1,            # Default: 0.1 (try 0.0-0.5)
    metric='euclidean',      # Distance metric
    random_state=42,         # For reproducibility
    n_jobs=-1                # Use all CPU cores
)

X_train_umap = umap_reducer.fit_transform(X_train_scaled)
X_val_umap = umap_reducer.transform(X_val_scaled)

umap_time = time.time() - start_time

print(f"\n✓ UMAP completed in {umap_time:.2f} seconds ({umap_time/60:.2f} minutes)")
print(f"Original dimension: {X_train_bert.shape[1]}")
print(f"UMAP reduced dimension: {X_train_umap.shape[1]}")
print(f"Train UMAP shape: {X_train_umap.shape}")
print(f"Val UMAP shape: {X_val_umap.shape}")

# Store for later comparison
umap_results = {
    'X_train': X_train_umap,
    'X_val': X_val_umap,
    'time': umap_time
}

# Clear MPS cache if using Metal
if device.type == "mps":
    torch.mps.empty_cache()

print(f"\n⏱️  Speed comparison:")
print(f"   PCA:  {pca_time:.2f}s")
print(f"   UMAP: {umap_time:.2f}s ({umap_time/pca_time:.1f}x slower)")


In [None]:
# ============================================================
# VISUALIZATION: 2D Projections
# ============================================================
# Visualize both methods in 2D for comparison

print("=" * 60)
print("Creating 2D Visualizations")
print("=" * 60)

# Create 2D versions for visualization
pca_2d = PCA(n_components=2, random_state=42)
X_train_pca_2d = pca_2d.fit_transform(X_train_scaled)

umap_2d = umap.UMAP(n_components=2, n_neighbors=15, min_dist=0.1, random_state=42, n_jobs=-1)
X_train_umap_2d = umap_2d.fit_transform(X_train_scaled)

# Create visualization
fig, axes = plt.subplots(1, 2, figsize=(16, 6))

# PCA 2D
scatter1 = axes[0].scatter(
    X_train_pca_2d[:, 0], X_train_pca_2d[:, 1],
    c=y_train, cmap='viridis', alpha=0.6, s=20
)
axes[0].set_title('PCA 2D Projection\n(Linear Variance)', fontsize=14, fontweight='bold')
axes[0].set_xlabel('PC1')
axes[0].set_ylabel('PC2')
axes[0].grid(True, alpha=0.3)
plt.colorbar(scatter1, ax=axes[0], label='Sentiment\n(0=neg, 1=neu, 2=pos)')

# UMAP 2D
scatter2 = axes[1].scatter(
    X_train_umap_2d[:, 0], X_train_umap_2d[:, 1],
    c=y_train, cmap='viridis', alpha=0.6, s=20
)
axes[1].set_title('UMAP 2D Projection\n(Non-linear Structure)', fontsize=14, fontweight='bold')
axes[1].set_xlabel('UMAP1')
axes[1].set_ylabel('UMAP2')
axes[1].grid(True, alpha=0.3)
plt.colorbar(scatter2, ax=axes[1], label='Sentiment\n(0=neg, 1=neu, 2=pos)')

plt.tight_layout()
plt.show()

print("✓ Visualizations created")
print("\nNote: Better clustering in UMAP suggests it may improve classification")


In [None]:
# ============================================================
# ANN MODEL DEFINITION
# ============================================================

class SentimentANN(nn.Module):
    """
    Feedforward neural network for sentiment classification.
    """
    def __init__(self, in_dim, hidden=64, num_classes=3):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(in_dim, hidden),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(hidden, hidden),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(hidden, num_classes)
        )
    
    def forward(self, x):
        return self.net(x)

def train_ann(X_train, X_val, y_train, y_val, method_name, num_epochs=10):
    """
    Train ANN on given features and return metrics.
    """
    print(f"\n{'='*60}")
    print(f"Training ANN on {method_name} features")
    print(f"{'='*60}")
    
    # Create datasets
    train_dataset = TensorDataset(
        torch.FloatTensor(X_train), 
        torch.LongTensor(y_train)
    )
    val_dataset = TensorDataset(
        torch.FloatTensor(X_val), 
        torch.LongTensor(y_val)
    )
    
    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
    
    # Initialize model
    model = SentimentANN(in_dim=X_train.shape[1], hidden=64, num_classes=3).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    
    best_val_acc = 0.0
    best_model_state = None
    
    start_time = time.time()
    
    for epoch in range(num_epochs):
        # Training
        model.train()
        train_loss = 0.0
        for batch_x, batch_y in train_loader:
            batch_x, batch_y = batch_x.to(device), batch_y.to(device)
            
            optimizer.zero_grad()
            outputs = model(batch_x)
            loss = criterion(outputs, batch_y)
            loss.backward()
            optimizer.step()
            
            train_loss += loss.item()
        
        # Validation
        model.eval()
        val_correct = 0
        val_total = 0
        all_val_preds = []
        all_val_labels = []
        
        with torch.no_grad():
            for batch_x, batch_y in val_loader:
                batch_x, batch_y = batch_x.to(device), batch_y.to(device)
                outputs = model(batch_x)
                _, predicted = torch.max(outputs.data, 1)
                
                val_total += batch_y.size(0)
                val_correct += (predicted == batch_y).sum().item()
                
                all_val_preds.extend(predicted.cpu().numpy())
                all_val_labels.extend(batch_y.cpu().numpy())
        
        val_acc = val_correct / val_total
        val_f1 = f1_score(all_val_labels, all_val_preds, average="macro")
        avg_train_loss = train_loss / len(train_loader)
        
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            best_model_state = model.state_dict().copy()
        
        # MPS memory management
        if device.type == "mps" and (epoch + 1) % 3 == 0:
            torch.mps.empty_cache()
    
    # Load best model
    if best_model_state is not None:
        model.load_state_dict(best_model_state)
    
    # Final evaluation
    model.eval()
    all_preds = []
    all_labels = []
    
    with torch.no_grad():
        for batch_x, batch_y in val_loader:
            batch_x, batch_y = batch_x.to(device), batch_y.to(device)
            outputs = model(batch_x)
            _, predicted = torch.max(outputs.data, 1)
            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(batch_y.cpu().numpy())
    
    training_time = time.time() - start_time
    
    # Calculate metrics
    acc = accuracy_score(all_labels, all_preds)
    f1_macro = f1_score(all_labels, all_preds, average="macro")
    recall_macro = recall_score(all_labels, all_preds, average="macro")
    
    # Per-class F1
    f1_per_class = f1_score(all_labels, all_preds, average=None, labels=[0,1,2])
    
    results = {
        'method': method_name,
        'accuracy': acc,
        'macro_f1': f1_macro,
        'macro_recall': recall_macro,
        'f1_negative': f1_per_class[0],
        'f1_neutral': f1_per_class[1],
        'f1_positive': f1_per_class[2],
        'training_time': training_time,
        'predictions': all_preds,
        'labels': all_labels
    }
    
    print(f"\n✓ Training completed in {training_time:.2f} seconds")
    print(f"  Accuracy: {acc:.4f}")
    print(f"  Macro F1: {f1_macro:.4f}")
    print(f"  Macro Recall: {recall_macro:.4f}")
    print(f"  F1 per class - Negative: {f1_per_class[0]:.4f}, Neutral: {f1_per_class[1]:.4f}, Positive: {f1_per_class[2]:.4f}")
    
    if device.type == "mps":
        torch.mps.empty_cache()
    
    return results


In [None]:
# ============================================================
# TRAIN ANN ON PCA FEATURES
# ============================================================

pca_ann_results = train_ann(
    X_train=pca_results['X_train'],
    X_val=pca_results['X_val'],
    y_train=y_train,
    y_val=y_val,
    method_name="PCA",
    num_epochs=10
)


In [None]:
# ============================================================
# TRAIN ANN ON UMAP FEATURES
# ============================================================

umap_ann_results = train_ann(
    X_train=umap_results['X_train'],
    X_val=umap_results['X_val'],
    y_train=y_train,
    y_val=y_val,
    method_name="UMAP",
    num_epochs=10
)


In [None]:
# ============================================================
# COMPREHENSIVE COMPARISON
# ============================================================

print("=" * 80)
print("FINAL COMPARISON: PCA vs UMAP")
print("=" * 80)

# Create comparison DataFrame
comparison_data = {
    'Method': ['PCA', 'UMAP'],
    'Reduction Time (s)': [pca_results['time'], umap_results['time']],
    'Reduction Time (min)': [pca_results['time']/60, umap_results['time']/60],
    'Training Time (s)': [pca_ann_results['training_time'], umap_ann_results['training_time']],
    'Total Time (s)': [
        pca_results['time'] + pca_ann_results['training_time'],
        umap_results['time'] + umap_ann_results['training_time']
    ],
    'Accuracy': [pca_ann_results['accuracy'], umap_ann_results['accuracy']],
    'Macro F1': [pca_ann_results['macro_f1'], umap_ann_results['macro_f1']],
    'Macro Recall': [pca_ann_results['macro_recall'], umap_ann_results['macro_recall']],
    'F1 Negative': [pca_ann_results['f1_negative'], umap_ann_results['f1_negative']],
    'F1 Neutral': [pca_ann_results['f1_neutral'], umap_ann_results['f1_neutral']],
    'F1 Positive': [pca_ann_results['f1_positive'], umap_ann_results['f1_positive']],
    'Variance Explained': [pca_results['variance_explained'], None]
}

comparison_df = pd.DataFrame(comparison_data)

print("\n" + comparison_df.to_string(index=False))

# Calculate improvements
acc_improvement = umap_ann_results['accuracy'] - pca_ann_results['accuracy']
f1_improvement = umap_ann_results['macro_f1'] - pca_ann_results['macro_f1']
neutral_f1_improvement = umap_ann_results['f1_neutral'] - pca_ann_results['f1_neutral']

print("\n" + "=" * 80)
print("IMPROVEMENT ANALYSIS")
print("=" * 80)
print(f"Accuracy improvement (UMAP vs PCA): {acc_improvement:+.4f} ({acc_improvement*100:+.2f}%)")
print(f"Macro F1 improvement (UMAP vs PCA): {f1_improvement:+.4f} ({f1_improvement*100:+.2f}%)")
print(f"Neutral F1 improvement (UMAP vs PCA): {neutral_f1_improvement:+.4f} ({neutral_f1_improvement*100:+.2f}%)")
print(f"\nTime cost (UMAP vs PCA): {umap_results['time']/pca_results['time']:.1f}x slower")

# Save results
comparison_df.to_csv("pca_vs_umap_comparison.csv", index=False)
print("\n✓ Saved comparison to: pca_vs_umap_comparison.csv")


In [None]:
# ============================================================
# CONFUSION MATRICES COMPARISON
# ============================================================

def plot_confusion_matrix(y_true, y_pred, title, ax):
    """Plot row-normalized confusion matrix."""
    cm = confusion_matrix(y_true, y_pred, labels=[0,1,2])
    
    # Row-normalize to percentages
    with np.errstate(invalid="ignore", divide="ignore"):
        row_sums = cm.sum(axis=1, keepdims=True)
        cm_pct = np.where(row_sums > 0, (cm / row_sums) * 100.0, 0.0)
    
    im = ax.imshow(cm_pct, cmap="Blues", aspect="auto", vmin=0, vmax=100)
    ax.set_title(title, fontsize=12, fontweight='bold')
    ax.set_xlabel("Predicted")
    ax.set_ylabel("True")
    ax.set_xticks([0,1,2])
    ax.set_xticklabels(label_order)
    ax.set_yticks([0,1,2])
    ax.set_yticklabels(label_order)
    
    # Add text annotations
    for (i, j), val in np.ndenumerate(cm_pct):
        ax.text(j, i, f"{val:.1f}%", ha="center", va="center", fontsize=10,
                color="white" if val > 50 else "black", weight="bold")
    
    return im

# Create side-by-side confusion matrices
fig, axes = plt.subplots(1, 2, figsize=(16, 6))

im1 = plot_confusion_matrix(
    pca_ann_results['labels'], 
    pca_ann_results['predictions'],
    f"PCA+ANN\n(Accuracy: {pca_ann_results['accuracy']:.3f}, F1: {pca_ann_results['macro_f1']:.3f})",
    axes[0]
)

im2 = plot_confusion_matrix(
    umap_ann_results['labels'], 
    umap_ann_results['predictions'],
    f"UMAP+ANN\n(Accuracy: {umap_ann_results['accuracy']:.3f}, F1: {umap_ann_results['macro_f1']:.3f})",
    axes[1]
)

# Add colorbar
plt.colorbar(im2, ax=axes, label="Row %", fraction=0.046, pad=0.04)

plt.tight_layout()
plt.show()

print("✓ Confusion matrices created")


In [None]:
# ============================================================
# DETAILED CLASSIFICATION REPORTS
# ============================================================

print("=" * 80)
print("CLASSIFICATION REPORT: PCA+ANN")
print("=" * 80)
print(classification_report(
    pca_ann_results['labels'], 
    pca_ann_results['predictions'],
    target_names=label_order,
    digits=3
))

print("\n" + "=" * 80)
print("CLASSIFICATION REPORT: UMAP+ANN")
print("=" * 80)
print(classification_report(
    umap_ann_results['labels'], 
    umap_ann_results['predictions'],
    target_names=label_order,
    digits=3
))


In [None]:
# ============================================================
# RECOMMENDATION SUMMARY
# ============================================================

print("=" * 80)
print("RECOMMENDATION")
print("=" * 80)

f1_improvement_pct = (umap_ann_results['macro_f1'] - pca_ann_results['macro_f1']) / pca_ann_results['macro_f1'] * 100
time_cost = umap_results['time'] / pca_results['time']

if f1_improvement_pct > 2.0:
    recommendation = "✅ RECOMMEND UMAP"
    reason = f"UMAP improves macro F1 by {f1_improvement_pct:.1f}% (significant improvement)"
elif f1_improvement_pct > 0.5:
    recommendation = "⚠️  CONSIDER UMAP"
    reason = f"UMAP improves macro F1 by {f1_improvement_pct:.1f}% (moderate improvement, but {time_cost:.1f}x slower)"
else:
    recommendation = "✅ RECOMMEND PCA"
    reason = f"UMAP only improves by {f1_improvement_pct:.1f}% (not worth {time_cost:.1f}x time cost)"

print(f"\n{recommendation}")
print(f"Reason: {reason}")

print(f"\nKey Metrics:")
print(f"  - PCA Accuracy: {pca_ann_results['accuracy']:.4f}")
print(f"  - UMAP Accuracy: {umap_ann_results['accuracy']:.4f} ({umap_ann_results['accuracy']-pca_ann_results['accuracy']:+.4f})")
print(f"  - PCA Macro F1: {pca_ann_results['macro_f1']:.4f}")
print(f"  - UMAP Macro F1: {umap_ann_results['macro_f1']:.4f} ({umap_ann_results['macro_f1']-pca_ann_results['macro_f1']:+.4f})")
print(f"  - PCA Neutral F1: {pca_ann_results['f1_neutral']:.4f}")
print(f"  - UMAP Neutral F1: {umap_ann_results['f1_neutral']:.4f} ({umap_ann_results['f1_neutral']-pca_ann_results['f1_neutral']:+.4f})")
print(f"  - PCA Time: {pca_results['time']:.2f}s")
print(f"  - UMAP Time: {umap_results['time']:.2f}s ({time_cost:.1f}x slower)")

print("\n" + "=" * 80)
