# Advanced TB Detection Algorithm - NEW DATASET

This notebook implements an advanced TB detection algorithm using the new UCSF TB dataset.
**This version works with the new dataset structure and embeddings.**

## Key Improvements:
1. **Temporal Feature Engineering**: Extract temporal patterns from multi-clip embeddings
2. **Advanced Data Augmentation**: SMOTE for class balance
3. **Patient-Level Aggregation**: Voting across multiple audio files per patient
4. **Ensemble Methods**: Combine multiple models for robustness
5. **Threshold Optimization**: Optimize for clinical sensitivity requirements

## Dataset Information:
- **Source**: UCSF TB Project R2D2 lung sounds
- **Countries**: Philippines (PH), India (IN), Uganda (UG), Vietnam (VN), South Africa (SA)
- **Embedding Model**: Google HeAR (Health Acoustic Representations)
- **Embedding Dimension**: 512
- **Temporal Clips**: ~10 clips per audio file (2-second clips)

## Setup and Enhanced Data Loading

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from collections import defaultdict, Counter
import warnings
warnings.filterwarnings('ignore')

# Advanced ML imports
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import RobustScaler
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import (
    RandomForestClassifier, GradientBoostingClassifier, 
    VotingClassifier, AdaBoostClassifier
)
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import (
    confusion_matrix, classification_report, roc_curve, auc,
    precision_recall_curve, f1_score, fbeta_score, roc_auc_score,
    accuracy_score, precision_score, recall_score
)
from sklearn.utils.class_weight import compute_class_weight

# Data augmentation
from imblearn.over_sampling import SMOTE

# Feature engineering
from sklearn.feature_selection import SelectKBest, f_classif, VarianceThreshold
from scipy import stats

# XGBoost
try:
    from xgboost import XGBClassifier
    XGBOOST_AVAILABLE = True
except ImportError:
    XGBOOST_AVAILABLE = False

print("✅ Advanced ML libraries loaded successfully")
print(f"🔧 XGBoost available: {XGBOOST_AVAILABLE}")

✅ Advanced ML libraries loaded successfully
🔧 XGBoost available: True


## Enhanced Data Loading with Temporal Features

In [2]:
# Load the new dataset with temporal features
EMBEDDING_PATH = "ucsf_new_embeddings.npz"  # UPDATED: Full dataset
METADATA_PATH = "ucsf_new_embeddings_metadata.csv"  # UPDATED: Full dataset

# Load dataset
X, y, file_keys, patient_ids = load_advanced_embeddings(
    EMBEDDING_PATH, METADATA_PATH, use_temporal=True, max_samples=None
)

print(f"\n🎯 Enhanced dataset shape: {X.shape}")
print(f"🎯 Feature expansion: {X.shape[1]} features (was 512)")

# Additional dataset summary
print(f"\n📊 FULL DATASET SUMMARY:")
print(f"   Total audio files: {len(X)}")
print(f"   Total patients: {len(np.unique(patient_ids))}")
print(f"   Average files per patient: {len(X) / len(np.unique(patient_ids)):.1f}")
print(f"   TB Positive files: {sum(y)} ({sum(y)/len(y)*100:.1f}%)")
print(f"   TB Negative files: {len(y)-sum(y)} ({(len(y)-sum(y))/len(y)*100:.1f}%)")
print(f"   Countries: {len(np.unique([pid.split('_')[0] if '_' in pid else pid[:3] for pid in patient_ids]))}")
print(f"   Temporal features: {X.shape[1]} (13x expansion from 512)")

# Check for any data quality issues
print(f"\n🔍 DATA QUALITY CHECKS:")
print(f"   NaN values: {np.isnan(X).sum()}")
print(f"   Infinite values: {np.isinf(X).sum()}")
print(f"   Zero variance features: {np.sum(np.var(X, axis=0) == 0)}")
print(f"   Feature range: [{np.min(X):.3f}, {np.max(X):.3f}]")

NameError: name 'load_advanced_embeddings' is not defined

## Advanced Data Preprocessing and Patient-Level Splits

In [None]:
def create_patient_level_split(X, y, patient_ids, test_size=0.2, random_state=42):
    """
    Create train/test split ensuring patients don't appear in both sets
    """
    unique_patients = np.unique(patient_ids)
    
    # Calculate patient-level labels (any TB positive file makes patient positive)
    patient_labels = {}
    for patient in unique_patients:
        patient_mask = patient_ids == patient
        patient_labels[patient] = int(np.any(y[patient_mask]))
    
    # Split patients
    patients_array = np.array(list(patient_labels.keys()))
    labels_array = np.array(list(patient_labels.values()))
    
    # Only do stratified split if we have both classes
    if len(np.unique(labels_array)) > 1:
        train_patients, test_patients = train_test_split(
            patients_array, test_size=test_size, stratify=labels_array, random_state=random_state
        )
    else:
        train_patients, test_patients = train_test_split(
            patients_array, test_size=test_size, random_state=random_state
        )
    
    # Create file-level splits
    train_mask = np.isin(patient_ids, train_patients)
    test_mask = np.isin(patient_ids, test_patients)
    
    return (
        X[train_mask], X[test_mask],
        y[train_mask], y[test_mask],
        patient_ids[train_mask], patient_ids[test_mask]
    )

# Patient-level split
X_train, X_test, y_train, y_test, train_patients, test_patients = create_patient_level_split(
    X, y, patient_ids, test_size=0.2, random_state=42
)

print(f"🔄 Patient-level split completed")
print(f"📊 Train: {len(X_train)} files from {len(np.unique(train_patients))} patients")
print(f"📊 Test: {len(X_test)} files from {len(np.unique(test_patients))} patients")
print(f"📈 Train TB rate: {sum(y_train)/len(y_train)*100:.1f}%")
print(f"📈 Test TB rate: {sum(y_test)/len(y_test)*100:.1f}%")

# Apply data augmentation
print("\n🔄 Applying advanced data augmentation...")

# Remove features with zero variance
var_selector = VarianceThreshold(threshold=0.001)
X_train_filtered = var_selector.fit_transform(X_train)
X_test_filtered = var_selector.transform(X_test)

print(f"📊 Features after variance filtering: {X_train_filtered.shape[1]} (was {X_train.shape[1]})")

# Apply SMOTE for class balancing only if we have both classes
if len(np.unique(y_train)) > 1 and np.sum(y_train) > 1:
    # Use k_neighbors based on minority class size
    min_samples = min(np.sum(y_train), len(y_train) - np.sum(y_train))
    k_neighbors = min(5, min_samples - 1)
    
    if k_neighbors > 0:
        smote = SMOTE(random_state=42, k_neighbors=k_neighbors)
        X_train_balanced, y_train_balanced = smote.fit_resample(X_train_filtered, y_train)
        
        print(f"✅ SMOTE applied:")
        print(f"   Before: {Counter(y_train)}")
        print(f"   After: {Counter(y_train_balanced)}")
        print(f"   Training set size: {len(X_train_balanced)}")
    else:
        print("⚠️ Not enough samples for SMOTE, using original data")
        X_train_balanced, y_train_balanced = X_train_filtered, y_train
else:
    print("⚠️ Only one class in training set, skipping SMOTE")
    X_train_balanced, y_train_balanced = X_train_filtered, y_train

# Feature scaling
scaler = RobustScaler()  # More robust to outliers than StandardScaler
X_train_scaled = scaler.fit_transform(X_train_balanced)
X_test_scaled = scaler.transform(X_test_filtered)

print(f"✅ Feature scaling completed")

# Feature selection
selector = SelectKBest(score_func=f_classif, k=min(1000, X_train_scaled.shape[1]))
X_train_selected = selector.fit_transform(X_train_scaled, y_train_balanced)
X_test_selected = selector.transform(X_test_scaled)

print(f"✅ Feature selection: {X_train_selected.shape[1]} features selected")

# Store original test data for patient-level evaluation
X_test_original = X_test_filtered
y_test_original = y_test
test_patients_original = test_patients

## Advanced Model Architecture

In [None]:
# Calculate advanced class weights
pos_weight = len(y_train_balanced[y_train_balanced == 0]) / len(y_train_balanced[y_train_balanced == 1]) if len(y_train_balanced[y_train_balanced == 1]) > 0 else 1.0
print(f"📊 Positive class weight: {pos_weight:.2f}")

# Define advanced models
advanced_models = {
    "Optimized SVM": SVC(
        kernel='rbf',
        C=1.0,
        gamma='scale',
        probability=True,
        class_weight='balanced',
        random_state=42
    ),
    
    "Logistic Regression L1": LogisticRegression(
        penalty='l1',
        solver='liblinear',
        C=0.1,
        class_weight='balanced',
        random_state=42
    ),
    
    "Random Forest Balanced": RandomForestClassifier(
        n_estimators=100,
        max_depth=10,
        min_samples_split=5,
        min_samples_leaf=2,
        class_weight='balanced',
        random_state=42,
        n_jobs=-1
    ),
    
    "Gradient Boosting Custom": GradientBoostingClassifier(
        n_estimators=100,
        learning_rate=0.1,
        max_depth=6,
        min_samples_split=5,
        subsample=0.8,
        random_state=42
    ),
    
    "Neural Network": MLPClassifier(
        hidden_layer_sizes=(64, 32),  # Reduced for small dataset
        activation='relu',
        solver='adam',
        alpha=0.001,
        learning_rate='adaptive',
        max_iter=500,
        early_stopping=True,
        random_state=42
    )
}

# Add XGBoost if available
if XGBOOST_AVAILABLE:
    advanced_models["XGBoost Optimized"] = XGBClassifier(
        n_estimators=100,
        max_depth=6,
        learning_rate=0.1,
        subsample=0.8,
        colsample_bytree=0.8,
        scale_pos_weight=pos_weight,
        random_state=42,
        eval_metric='logloss'
    )

print(f"🤖 Configured {len(advanced_models)} advanced models")
for name in advanced_models.keys():
    print(f"  - {name}")

## Model Training with Cross-Validation

In [None]:
%%time
# Train advanced models
trained_advanced_models = {}
cv_scores = {}

print("🚀 Training advanced models...\n")

for name, model in advanced_models.items():
    print(f"🔄 Training: {name}")
    
    # Train model
    model.fit(X_train_selected, y_train_balanced)
    trained_advanced_models[name] = model
    
    # Quick cross-validation (reduced folds for small dataset)
    try:
        if len(np.unique(y_train_balanced)) > 1:
            cv_scores_model = cross_val_score(
                model, X_train_selected, y_train_balanced, 
                cv=min(3, len(X_train_selected)), scoring='f1', n_jobs=-1
            )
            cv_scores[name] = cv_scores_model
            print(f"  ✅ CV F1-Score: {cv_scores_model.mean():.3f} (±{cv_scores_model.std():.3f})")
        else:
            print(f"  ⚠️ CV skipped: only one class in training set")
            cv_scores[name] = [0.0]
    except Exception as e:
        print(f"  ⚠️ CV failed: {e}")
        cv_scores[name] = [0.0]
    
    print(f"  ✅ Training accuracy: {model.score(X_train_selected, y_train_balanced):.3f}")
    print()

print(f"🎯 All {len(trained_advanced_models)} advanced models trained!")

## Ensemble Methods

In [None]:
# Create ensemble models
print("🔄 Creating ensemble models...")

# Select best performing models for ensemble
best_models = [
    ('svm', advanced_models['Optimized SVM']),
    ('lr', advanced_models['Logistic Regression L1']),
    ('rf', advanced_models['Random Forest Balanced'])
]

# Voting classifier (soft voting for probabilities)
voting_clf = VotingClassifier(
    estimators=best_models,
    voting='soft'
)

# Train ensemble
print("🔄 Training ensemble...")
voting_clf.fit(X_train_selected, y_train_balanced)

# Add to models
trained_advanced_models['Ensemble (Voting)'] = voting_clf

print("✅ Ensemble model trained")

## Advanced Evaluation with Patient-Level Aggregation

In [None]:
def evaluate_advanced_model(model, X_test, y_test, test_patients, model_name):
    """
    Advanced evaluation with both file-level and patient-level metrics
    """
    # File-level predictions
    y_pred_file = model.predict(X_test)
    
    if hasattr(model, "predict_proba"):
        y_prob_file = model.predict_proba(X_test)[:, 1]
    else:
        y_prob_file = y_pred_file
    
    # Patient-level aggregation
    unique_patients = np.unique(test_patients)
    patient_predictions = []
    patient_true_labels = []
    patient_probs = []
    
    for patient in unique_patients:
        patient_mask = test_patients == patient
        patient_files_pred = y_pred_file[patient_mask]
        patient_files_true = y_test[patient_mask]
        patient_files_prob = y_prob_file[patient_mask]
        
        # Patient-level aggregation strategies
        # 1. Any positive file makes patient positive (sensitive)
        patient_pred_any = int(np.any(patient_files_pred))
        patient_true_any = int(np.any(patient_files_true))
        patient_prob_max = np.max(patient_files_prob)
        
        patient_predictions.append(patient_pred_any)
        patient_true_labels.append(patient_true_any)
        patient_probs.append(patient_prob_max)
    
    patient_predictions = np.array(patient_predictions)
    patient_true_labels = np.array(patient_true_labels)
    patient_probs = np.array(patient_probs)
    
    # Calculate metrics
    # File-level metrics
    cm_file = confusion_matrix(y_test, y_pred_file)
    if cm_file.shape == (2, 2):
        tn_f, fp_f, fn_f, tp_f = cm_file.ravel()
    else:
        tn_f, fp_f, fn_f, tp_f = 0, 0, 0, 0
    
    # Patient-level metrics
    cm_patient = confusion_matrix(patient_true_labels, patient_predictions)
    if cm_patient.shape == (2, 2):
        tn_p, fp_p, fn_p, tp_p = cm_patient.ravel()
    else:
        tn_p, fp_p, fn_p, tp_p = 0, 0, 0, 0
    
    # Calculate clinical metrics
    def safe_divide(a, b):
        return a / b if b > 0 else 0
    
    # File-level metrics
    file_metrics = {
        'sensitivity': safe_divide(tp_f, tp_f + fn_f),
        'specificity': safe_divide(tn_f, tn_f + fp_f),
        'precision': safe_divide(tp_f, tp_f + fp_f),
        'npv': safe_divide(tn_f, tn_f + fn_f),
        'f1': f1_score(y_test, y_pred_file, average='weighted') if len(np.unique(y_test)) > 1 else 0,
        'f2': fbeta_score(y_test, y_pred_file, beta=2, average='weighted') if len(np.unique(y_test)) > 1 else 0,
        'accuracy': accuracy_score(y_test, y_pred_file)
    }
    
    # Patient-level metrics
    patient_metrics = {
        'sensitivity': safe_divide(tp_p, tp_p + fn_p),
        'specificity': safe_divide(tn_p, tn_p + fp_p),
        'precision': safe_divide(tp_p, tp_p + fp_p),
        'npv': safe_divide(tn_p, tn_p + fn_p),
        'f1': f1_score(patient_true_labels, patient_predictions, average='weighted') if len(np.unique(patient_true_labels)) > 1 else 0,
        'f2': fbeta_score(patient_true_labels, patient_predictions, beta=2, average='weighted') if len(np.unique(patient_true_labels)) > 1 else 0,
        'accuracy': accuracy_score(patient_true_labels, patient_predictions)
    }
    
    # AUC metrics
    try:
        if len(np.unique(y_test)) > 1:
            file_roc_auc = roc_auc_score(y_test, y_prob_file)
            precision_vals, recall_vals, _ = precision_recall_curve(y_test, y_prob_file)
            file_pr_auc = auc(recall_vals, precision_vals)
        else:
            file_roc_auc = file_pr_auc = 0.0
            
        if len(np.unique(patient_true_labels)) > 1:
            patient_roc_auc = roc_auc_score(patient_true_labels, patient_probs)
            precision_vals_p, recall_vals_p, _ = precision_recall_curve(patient_true_labels, patient_probs)
            patient_pr_auc = auc(recall_vals_p, precision_vals_p)
        else:
            patient_roc_auc = patient_pr_auc = 0.0
    except:
        file_roc_auc = patient_roc_auc = file_pr_auc = patient_pr_auc = 0.0
    
    return {
        'model_name': model_name,
        'file_metrics': file_metrics,
        'patient_metrics': patient_metrics,
        'file_roc_auc': file_roc_auc,
        'patient_roc_auc': patient_roc_auc,
        'file_pr_auc': file_pr_auc,
        'patient_pr_auc': patient_pr_auc,
        'file_cm': cm_file,
        'patient_cm': cm_patient,
        'file_predictions': y_pred_file,
        'patient_predictions': patient_predictions,
        'file_probs': y_prob_file,
        'patient_probs': patient_probs,
        'n_patients': len(unique_patients),
        'n_files': len(y_test),
        'tp_p': tp_p, 'fn_p': fn_p, 'tn_p': tn_p, 'fp_p': fp_p
    }

# Evaluate all advanced models
advanced_results = {}

print("📊 Evaluating advanced models...\n")

for name, model in trained_advanced_models.items():
    result = evaluate_advanced_model(
        model, X_test_selected, y_test_original, test_patients_original, name
    )
    advanced_results[name] = result
    
    print(f"🔍 {name}:")
    print(f"  📁 File-level Sensitivity: {result['file_metrics']['sensitivity']:.3f}")
    print(f"  🏥 Patient-level Sensitivity: {result['patient_metrics']['sensitivity']:.3f}")
    print(f"  📁 File-level F2-Score: {result['file_metrics']['f2']:.3f}")
    print(f"  🏥 Patient-level F2-Score: {result['patient_metrics']['f2']:.3f}")
    print(f"  📊 Patient-level PR-AUC: {result['patient_pr_auc']:.3f}")
    print(f"  🎯 Clinical Target (≥80%): {'✅' if result['patient_metrics']['sensitivity'] >= 0.8 else '❌'}")
    print(f"  🏥 TB Patients Detected: {result['tp_p']}/{result['tp_p'] + result['fn_p']}")
    print()

print("✅ Advanced evaluation completed!")

## Results Summary

In [None]:
# Create results summary
print("📋 NEW DATASET RESULTS SUMMARY")
print("=" * 50)

# Create summary table
summary_data = []
for name, result in advanced_results.items():
    summary_data.append({
        'Model': name,
        'Patient Sensitivity': f"{result['patient_metrics']['sensitivity']:.3f}",
        'Patient Specificity': f"{result['patient_metrics']['specificity']:.3f}",
        'Patient Precision': f"{result['patient_metrics']['precision']:.3f}",
        'Patient F2-Score': f"{result['patient_metrics']['f2']:.3f}",
        'Patient PR-AUC': f"{result['patient_pr_auc']:.3f}",
        'Patient ROC-AUC': f"{result['patient_roc_auc']:.3f}",
        'Clinical Target': '✅' if result['patient_metrics']['sensitivity'] >= 0.8 else '❌',
        'TB Patients Detected': f"{result['tp_p']}/{result['tp_p'] + result['fn_p']}"
    })

results_df = pd.DataFrame(summary_data)
print(results_df.to_string(index=False))

# Dataset summary
print(f"\n📊 DATASET SUMMARY:")
print(f"   Total samples: {len(X)}")
print(f"   Total patients: {len(np.unique(patient_ids))}")
print(f"   TB Positive: {sum(y)} ({sum(y)/len(y)*100:.1f}%)")
print(f"   TB Negative: {len(y)-sum(y)} ({(len(y)-sum(y))/len(y)*100:.1f}%)")
print(f"   Features: {X.shape[1]} (temporal expansion from 512)")

# Best model
if len(advanced_results) > 0:
    best_model = max(advanced_results.items(), key=lambda x: x[1]['patient_metrics']['sensitivity'])
    print(f"\n🏆 BEST MODEL: {best_model[0]}")
    print(f"   Patient-level Sensitivity: {best_model[1]['patient_metrics']['sensitivity']:.3f}")
    print(f"   Patient-level Specificity: {best_model[1]['patient_metrics']['specificity']:.3f}")
    print(f"   Patient-level Precision: {best_model[1]['patient_metrics']['precision']:.3f}")
    print(f"   Patient-level F2-Score: {best_model[1]['patient_metrics']['f2']:.3f}")

print("\n" + "=" * 50)
print("🎉 NEW DATASET ANALYSIS COMPLETE")
print("=" * 50)

In [None]:
## Comprehensive Visualizations

# Set up plotting style
plt.style.use('default')
sns.set_palette(\"husl\")

def plot_confusion_matrices(advanced_results, figsize=(20, 12)):
    \"\"\"Plot confusion matrices for all models\"\"\"
    n_models = len(advanced_results)
    fig, axes = plt.subplots(2, n_models, figsize=figsize)
    
    if n_models == 1:
        axes = axes.reshape(2, 1)
    
    for i, (name, result) in enumerate(advanced_results.items()):
        # File-level confusion matrix
        ax1 = axes[0, i]
        cm_file = result['file_cm']
        if cm_file.size > 0:
            sns.heatmap(cm_file, annot=True, fmt='d', cmap='Blues', ax=ax1,
                       xticklabels=['TB-', 'TB+'], yticklabels=['TB-', 'TB+'])
            ax1.set_title(f'{name}\\nFile-level Confusion Matrix')
            ax1.set_xlabel('Predicted')
            ax1.set_ylabel('Actual')
        
        # Patient-level confusion matrix
        ax2 = axes[1, i]
        cm_patient = result['patient_cm']
        if cm_patient.size > 0:
            sns.heatmap(cm_patient, annot=True, fmt='d', cmap='Oranges', ax=ax2,
                       xticklabels=['TB-', 'TB+'], yticklabels=['TB-', 'TB+'])
            ax2.set_title(f'{name}\\nPatient-level Confusion Matrix')
            ax2.set_xlabel('Predicted')
            ax2.set_ylabel('Actual')
    
    plt.tight_layout()
    plt.show()

def plot_roc_curves(advanced_results, figsize=(15, 6)):
    \"\"\"Plot ROC curves for all models\"\"\"
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=figsize)
    
    # File-level ROC curves
    for name, result in advanced_results.items():
        if result['file_roc_auc'] > 0:
            # Calculate ROC curve points
            y_true = np.concatenate([np.ones(result['tp_p']), np.zeros(result['tn_p']), 
                                   np.ones(result['fn_p']), np.zeros(result['fp_p'])])
            y_scores = result['file_probs']
            
            if len(np.unique(y_true)) > 1:
                fpr, tpr, _ = roc_curve(y_true, y_scores)
                ax1.plot(fpr, tpr, label=f'{name} (AUC={result[\"file_roc_auc\"]:.3f})')
    
    ax1.plot([0, 1], [0, 1], 'k--', alpha=0.5)
    ax1.set_xlabel('False Positive Rate')
    ax1.set_ylabel('True Positive Rate')
    ax1.set_title('File-level ROC Curves')
    ax1.legend()
    ax1.grid(True, alpha=0.3)
    
    # Patient-level ROC curves
    for name, result in advanced_results.items():
        if result['patient_roc_auc'] > 0:
            # Use patient-level data
            y_true_patient = np.concatenate([np.ones(result['tp_p']), np.zeros(result['tn_p']), 
                                           np.ones(result['fn_p']), np.zeros(result['fp_p'])])
            y_scores_patient = result['patient_probs']
            
            if len(np.unique(y_true_patient)) > 1 and len(y_scores_patient) > 0:
                fpr, tpr, _ = roc_curve(y_true_patient, y_scores_patient)
                ax2.plot(fpr, tpr, label=f'{name} (AUC={result[\"patient_roc_auc\"]:.3f})')
    
    ax2.plot([0, 1], [0, 1], 'k--', alpha=0.5)
    ax2.set_xlabel('False Positive Rate')
    ax2.set_ylabel('True Positive Rate')
    ax2.set_title('Patient-level ROC Curves')
    ax2.legend()
    ax2.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()

def plot_precision_recall_curves(advanced_results, figsize=(15, 6)):
    \"\"\"Plot Precision-Recall curves for all models\"\"\"
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=figsize)
    
    # File-level PR curves
    for name, result in advanced_results.items():
        if result['file_pr_auc'] > 0:
            y_true = np.concatenate([np.ones(result['tp_p']), np.zeros(result['tn_p']), 
                                   np.ones(result['fn_p']), np.zeros(result['fp_p'])])
            y_scores = result['file_probs']
            
            if len(np.unique(y_true)) > 1:
                precision, recall, _ = precision_recall_curve(y_true, y_scores)
                ax1.plot(recall, precision, label=f'{name} (AUC={result[\"file_pr_auc\"]:.3f})')
    
    ax1.set_xlabel('Recall')
    ax1.set_ylabel('Precision')
    ax1.set_title('File-level Precision-Recall Curves')
    ax1.legend()
    ax1.grid(True, alpha=0.3)
    
    # Patient-level PR curves
    for name, result in advanced_results.items():
        if result['patient_pr_auc'] > 0:
            y_true_patient = np.concatenate([np.ones(result['tp_p']), np.zeros(result['tn_p']), 
                                           np.ones(result['fn_p']), np.zeros(result['fp_p'])])
            y_scores_patient = result['patient_probs']
            
            if len(np.unique(y_true_patient)) > 1 and len(y_scores_patient) > 0:
                precision, recall, _ = precision_recall_curve(y_true_patient, y_scores_patient)
                ax2.plot(recall, precision, label=f'{name} (AUC={result[\"patient_pr_auc\"]:.3f})')
    
    ax2.set_xlabel('Recall')
    ax2.set_ylabel('Precision')
    ax2.set_title('Patient-level Precision-Recall Curves')
    ax2.legend()
    ax2.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()

def plot_performance_metrics(advanced_results, figsize=(15, 10)):
    \"\"\"Plot performance metrics comparison\"\"\"
    models = list(advanced_results.keys())
    
    # Extract metrics
    file_metrics = {
        'Sensitivity': [advanced_results[m]['file_metrics']['sensitivity'] for m in models],
        'Specificity': [advanced_results[m]['file_metrics']['specificity'] for m in models],
        'Precision': [advanced_results[m]['file_metrics']['precision'] for m in models],
        'F2-Score': [advanced_results[m]['file_metrics']['f2'] for m in models],
        'Accuracy': [advanced_results[m]['file_metrics']['accuracy'] for m in models]
    }
    
    patient_metrics = {
        'Sensitivity': [advanced_results[m]['patient_metrics']['sensitivity'] for m in models],
        'Specificity': [advanced_results[m]['patient_metrics']['specificity'] for m in models],
        'Precision': [advanced_results[m]['patient_metrics']['precision'] for m in models],
        'F2-Score': [advanced_results[m]['patient_metrics']['f2'] for m in models],
        'Accuracy': [advanced_results[m]['patient_metrics']['accuracy'] for m in models]
    }
    
    fig, (ax1, ax2) = plt.subplots(2, 1, figsize=figsize)
    
    # File-level metrics
    file_df = pd.DataFrame(file_metrics, index=models)
    file_df.plot(kind='bar', ax=ax1, width=0.8)
    ax1.set_title('File-level Performance Metrics')
    ax1.set_ylabel('Score')
    ax1.set_ylim(0, 1)
    ax1.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
    ax1.grid(True, alpha=0.3)
    
    # Add clinical target line for sensitivity
    ax1.axhline(y=0.8, color='red', linestyle='--', alpha=0.7, label='Clinical Target (80%)')
    
    # Patient-level metrics
    patient_df = pd.DataFrame(patient_metrics, index=models)
    patient_df.plot(kind='bar', ax=ax2, width=0.8)
    ax2.set_title('Patient-level Performance Metrics')
    ax2.set_ylabel('Score')
    ax2.set_ylim(0, 1)
    ax2.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
    ax2.grid(True, alpha=0.3)
    
    # Add clinical target line for sensitivity
    ax2.axhline(y=0.8, color='red', linestyle='--', alpha=0.7, label='Clinical Target (80%)')
    
    plt.tight_layout()
    plt.show()

# Generate all visualizations
if len(advanced_results) > 0:
    print(\"\\n\" + \"=\" * 60)
    print(\"📊 COMPREHENSIVE VISUALIZATIONS\")
    print(\"=\" * 60)
    
    # 1. Confusion Matrices
    print(\"\\n1️⃣ Confusion Matrices (File-level and Patient-level)\")
    plot_confusion_matrices(advanced_results)
    
    # 2. ROC Curves
    print(\"\\n2️⃣ ROC Curves (File-level and Patient-level)\")
    plot_roc_curves(advanced_results)
    
    # 3. Precision-Recall Curves
    print(\"\\n3️⃣ Precision-Recall Curves (File-level and Patient-level)\")
    plot_precision_recall_curves(advanced_results)
    
    # 4. Performance Metrics Comparison
    print(\"\\n4️⃣ Performance Metrics Comparison\")
    plot_performance_metrics(advanced_results)
    
    print(\"\\n\" + \"=\" * 60)
    print(\"📈 ALL VISUALIZATIONS COMPLETE\")
    print(\"=\" * 60)
else:
    print(\"⚠️ No results available for visualization\")

In [3]:
## Detailed Performance Metrics Table

def create_detailed_metrics_table(advanced_results):
    \"\"\"Create a comprehensive metrics table\"\"\"
    detailed_data = []
    
    for name, result in advanced_results.items():
        # File-level metrics
        detailed_data.append({
            'Model': name,
            'Level': 'File',
            'Sensitivity (Recall)': f\"{result['file_metrics']['sensitivity']:.3f}\",
            'Specificity': f\"{result['file_metrics']['specificity']:.3f}\",
            'Precision (PPV)': f\"{result['file_metrics']['precision']:.3f}\",
            'NPV': f\"{result['file_metrics']['npv']:.3f}\",
            'F1-Score': f\"{result['file_metrics']['f1']:.3f}\",
            'F2-Score': f\"{result['file_metrics']['f2']:.3f}\",
            'Accuracy': f\"{result['file_metrics']['accuracy']:.3f}\",
            'ROC-AUC': f\"{result['file_roc_auc']:.3f}\",
            'PR-AUC': f\"{result['file_pr_auc']:.3f}\",
            'TP': result.get('tp_f', 0),
            'TN': result.get('tn_f', 0),
            'FP': result.get('fp_f', 0),
            'FN': result.get('fn_f', 0)\n        })\n        \n        # Patient-level metrics\n        detailed_data.append({\n            'Model': name,\n            'Level': 'Patient',\n            'Sensitivity (Recall)': f\"{result['patient_metrics']['sensitivity']:.3f}\",\n            'Specificity': f\"{result['patient_metrics']['specificity']:.3f}\",\n            'Precision (PPV)': f\"{result['patient_metrics']['precision']:.3f}\",\n            'NPV': f\"{result['patient_metrics']['npv']:.3f}\",\n            'F1-Score': f\"{result['patient_metrics']['f1']:.3f}\",\n            'F2-Score': f\"{result['patient_metrics']['f2']:.3f}\",\n            'Accuracy': f\"{result['patient_metrics']['accuracy']:.3f}\",\n            'ROC-AUC': f\"{result['patient_roc_auc']:.3f}\",\n            'PR-AUC': f\"{result['patient_pr_auc']:.3f}\",\n            'TP': result['tp_p'],\n            'TN': result['tn_p'],\n            'FP': result['fp_p'],\n            'FN': result['fn_p']\n        })\n    \n    return pd.DataFrame(detailed_data)\n\n# Create and display detailed metrics table\nif len(advanced_results) > 0:\n    print(\"\\n\" + \"=\" * 80)\n    print(\"📋 DETAILED PERFORMANCE METRICS TABLE\")\n    print(\"=\" * 80)\n    \n    detailed_df = create_detailed_metrics_table(advanced_results)\n    \n    # Display with better formatting\n    pd.set_option('display.max_columns', None)\n    pd.set_option('display.width', None)\n    pd.set_option('display.max_colwidth', None)\n    \n    print(detailed_df.to_string(index=False))\n    \n    # Clinical interpretation\n    print(\"\\n\" + \"=\" * 80)\n    print(\"🏥 CLINICAL INTERPRETATION\")\n    print(\"=\" * 80)\n    \n    print(\"📊 Key Clinical Metrics:\")\n    print(\"   • Sensitivity (Recall): Proportion of TB cases correctly identified\")\n    print(\"   • Specificity: Proportion of non-TB cases correctly identified\")\n    print(\"   • Precision (PPV): Proportion of positive predictions that are correct\")\n    print(\"   • NPV: Proportion of negative predictions that are correct\")\n    print(\"   • F2-Score: Weighted harmonic mean favoring recall (clinical focus)\")\n    print(\"   • Clinical Target: ≥80% sensitivity for TB screening\")\n    \n    # Find models meeting clinical target\n    meeting_target = detailed_df[\n        (detailed_df['Level'] == 'Patient') & \n        (detailed_df['Sensitivity (Recall)'].astype(float) >= 0.8)\n    ]\n    \n    if len(meeting_target) > 0:\n        print(f\"\\n✅ Models meeting clinical target (≥80% sensitivity):\")\n        for _, row in meeting_target.iterrows():\n            print(f\"   • {row['Model']}: {row['Sensitivity (Recall)']} sensitivity\")\n    else:\n        print(f\"\\n⚠️ No models currently meet the clinical target of ≥80% sensitivity\")\n        best_sensitivity = detailed_df[\n            detailed_df['Level'] == 'Patient'\n        ]['Sensitivity (Recall)'].astype(float).max()\n        print(f\"   • Best sensitivity achieved: {best_sensitivity:.3f}\")\n        print(f\"   • Gap to target: {0.8 - best_sensitivity:.3f}\")\n    \n    print(\"\\n\" + \"=\" * 80)\n    print(\"📈 METRICS TABLE COMPLETE\")\n    print(\"=\" * 80)\nelse:\n    print(\"⚠️ No results available for detailed metrics table\")"

SyntaxError: unexpected character after line continuation character (3139027568.py, line 4)