In [None]:
print("\n[Metric 2] Cluster Separation & Decision Boundary Confidence")
print("-"*70)

def calculate_separation_metrics(X, labels, centroids):
    """
    For each point, calculate:
    1. Distance to assigned cluster centroid (d1)
    2. Distance to nearest other cluster centroid (d2)
    3. Separation ratio: (d2 - d1) / d1 (higher = more confident assignment)
    """
    results = {
        'cluster': [],
        'assigned_distance': [],
        'nearest_other_distance': [],
        'separation_ratio': [],
        'nearest_other_cluster': []
    }
    
    for i, point in enumerate(X):
        assigned_cluster = labels[i]
        
        # Distance to assigned centroid
        d1 = np.linalg.norm(point - centroids[assigned_cluster])
        
        # Find nearest other cluster
        other_distances = {}
        for cid, centroid in centroids.items():
            if cid != assigned_cluster:
                other_distances[cid] = np.linalg.norm(point - centroid)
        
        nearest_other_cluster = min(other_distances, key=other_distances.get)
        d2 = other_distances[nearest_other_cluster]
        
        # Separation ratio: higher means more confident assignment
        sep_ratio = (d2 - d1) / d1 if d1 > 0 else np.inf
        
        results['cluster'].append(assigned_cluster)
        results['assigned_distance'].append(d1)
        results['nearest_other_distance'].append(d2)
        results['separation_ratio'].append(sep_ratio)
        results['nearest_other_cluster'].append(nearest_other_cluster)
    
    return pd.DataFrame(results)

# Calculate for train
sep_train = calculate_separation_metrics(X_train, train_final, cent_train)

# Calculate for test
sep_test = calculate_separation_metrics(X_test, test_final, cent_train)

# Summary by cluster
print("\nTrain Set - Cluster Assignment Confidence:")
print(f"{'Cluster':<8} {'Avg Sep Ratio':<18} {'% Ambiguous (<0.3)':<20} {'Confused With':<15}")
print("-"*70)

for cid in sorted(sep_train['cluster'].unique()):
    cluster_data = sep_train[sep_train['cluster'] == cid]
    avg_sep = cluster_data['separation_ratio'].mean()
    pct_ambiguous = (cluster_data['separation_ratio'] < 0.3).mean() * 100
    
    # Most common confused cluster
    confused_with = cluster_data['nearest_other_cluster'].mode()[0] if len(cluster_data) > 0 else 'N/A'
    
    status = "✓ Clear" if pct_ambiguous < 10 else ("⚠ Moderate" if pct_ambiguous < 25 else "❌ Fuzzy")
    print(f"{cid:<8} {avg_sep:<18.3f} {pct_ambiguous:>6.1f}%  {status:<12} Cluster {confused_with}")

print("\nTest Set - Cluster Assignment Confidence:")
print(f"{'Cluster':<8} {'Avg Sep Ratio':<18} {'% Ambiguous (<0.3)':<20} {'Confused With':<15}")
print("-"*70)

for cid in sorted(sep_test['cluster'].unique()):
    cluster_data = sep_test[sep_test['cluster'] == cid]
    avg_sep = cluster_data['separation_ratio'].mean()
    pct_ambiguous = (cluster_data['separation_ratio'] < 0.3).mean() * 100
    
    confused_with = cluster_data['nearest_other_cluster'].mode()[0] if len(cluster_data) > 0 else 'N/A'
    
    status = "✓ Clear" if pct_ambiguous < 10 else ("⚠ Moderate" if pct_ambiguous < 25 else "❌ Fuzzy")
    print(f"{cid:<8} {avg_sep:<18.3f} {pct_ambiguous:>6.1f}%  {status:<12} Cluster {confused_with}")

# Identify most ambiguous assignments in test set
print("\n⚠ Most Ambiguous Assignments in Test Set (Separation Ratio < 0.2):")
ambiguous = sep_test[sep_test['separation_ratio'] < 0.2].copy()
ambiguous = ambiguous.sort_values('separation_ratio')
print(f"Found {len(ambiguous)} ambiguous assignments ({len(ambiguous)/len(sep_test)*100:.1f}% of test set)")
print(f"\n{'Index':<8} {'Assigned':<10} {'Could be':<10} {'Sep Ratio':<12}")
print("-"*50)
for idx, row in ambiguous.head(10).iterrows():
    print(f"{idx:<8} {int(row['cluster']):<10} {int(row['nearest_other_cluster']):<10} {row['separation_ratio']:<12.3f}")

In [None]:
print("\n[Metric 3] Feature Contribution to Cluster Assignment")
print("-"*70)

def calculate_feature_importance_for_assignment(X, labels, centroids, feature_names):
    """
    For each cluster, calculate which features most distinguish it from others.
    Uses the ratio of within-cluster variance to between-cluster variance per feature.
    """
    feature_importance = {}
    
    for cid in np.unique(labels):
        cluster_points = X[labels == cid]
        other_points = X[labels != cid]
        
        if len(cluster_points) == 0:
            continue
        
        # For each feature, calculate separation
        importance_scores = []
        for f in range(X.shape[1]):
            cluster_vals = cluster_points[:, f]
            other_vals = other_points[:, f]
            
            # Effect size (Cohen's d)
            mean_diff = abs(cluster_vals.mean() - other_vals.mean())
            pooled_std = np.sqrt((cluster_vals.std()**2 + other_vals.std()**2) / 2)
            
            if pooled_std > 0:
                cohens_d = mean_diff / pooled_std
            else:
                cohens_d = 0
            
            importance_scores.append(cohens_d)
        
        feature_importance[cid] = pd.DataFrame({
            'Feature': feature_names,
            'Importance': importance_scores
        }).sort_values('Importance', ascending=False)
    
    return feature_importance

# Get feature names (original features before UMAP)
# You'll need to use your original feature columns here
# For now, if you want to do this on UMAP dimensions:
umap_feature_names = [f"UMAP_{i}" for i in range(X_train.shape[1])]

feat_importance_train = calculate_feature_importance_for_assignment(
    X_train, train_final, cent_train, umap_feature_names
)

print("\nTop 3 Distinguishing Features per Cluster (Train Set):")
print("-"*70)
for cid in sorted(feat_importance_train.keys()):
    print(f"\nCluster {cid}:")
    top_features = feat_importance_train[cid].head(3)
    for _, row in top_features.iterrows():
        print(f"  {row['Feature']:<15} Importance: {row['Importance']:.3f}")

# For TEST set - check if feature importance pattern holds
feat_importance_test = calculate_feature_importance_for_assignment(
    X_test, test_final, cent_train, umap_feature_names
)

print("\n\nFeature Importance Consistency (Train vs Test):")
print(f"{'Cluster':<8} {'Train Top Feat':<20} {'Test Top Feat':<20} {'Match?':<10}")
print("-"*70)
for cid in sorted(feat_importance_train.keys()):
    if cid in feat_importance_test:
        train_top = feat_importance_train[cid].iloc[0]['Feature']
        test_top = feat_importance_test[cid].iloc[0]['Feature']
        match = "✓" if train_top == test_top else "✗"
        print(f"{cid:<8} {train_top:<20} {test_top:<20} {match:<10}")

In [None]:
print("\n[Metric 4] Cluster Membership Stability")
print("-"*70)

from sklearn.utils import resample

def calculate_stability_score(X, labels, centroids, n_bootstrap=50):
    """
    For each point, see how often it gets assigned to the same cluster
    when we add noise or bootstrap sample the data.
    """
    n_samples = X.shape[0]
    stability_scores = np.zeros(n_samples)
    
    for _ in range(n_bootstrap):
        # Add small Gaussian noise
        noise_std = 0.05 * X.std(axis=0)  # 5% noise
        X_noisy = X + np.random.normal(0, noise_std, X.shape)
        
        # Reassign to nearest centroid
        new_labels = np.array([
            min(centroids.keys(), 
                key=lambda c: np.linalg.norm(X_noisy[i] - centroids[c]))
            for i in range(n_samples)
        ])
        
        # Check if assignment matches original
        stability_scores += (new_labels == labels).astype(int)
    
    return stability_scores / n_bootstrap

# Calculate stability for test set
stability_test = calculate_stability_score(X_test, test_final, cent_train, n_bootstrap=50)

# Summary by cluster
stability_df = pd.DataFrame({
    'cluster': test_final,
    'stability': stability_test
})

print("\nCluster Assignment Stability (Test Set):")
print(f"{'Cluster':<8} {'Mean Stability':<18} {'% Unstable (<0.7)':<20} {'Status':<10}")
print("-"*70)

for cid in sorted(stability_df['cluster'].unique()):
    cluster_stab = stability_df[stability_df['cluster'] == cid]['stability']
    mean_stab = cluster_stab.mean()
    pct_unstable = (cluster_stab < 0.7).mean() * 100
    
    status = "✓ Robust" if pct_unstable < 10 else ("⚠ Moderate" if pct_unstable < 25 else "❌ Fragile")
    print(f"{cid:<8} {mean_stab:<18.3f} {pct_unstable:>6.1f}%  {status:<15} {status}")

print(f"\nOverall Test Set Stability: {stability_test.mean():.3f}")
print(f"Samples with stability < 0.7: {(stability_test < 0.7).sum()} ({(stability_test < 0.7).mean()*100:.1f}%)")