# üìä Objective 2: Divergence Analysis & Novel Metrics
## MCM Problem C 2026

**Goal:** Develop rigorous metrics to compare voting methods beyond simple agreement counts.

### Novel Metrics Introduced:
1. **Outcome Divergence Score (ODS)** - Seasonal measure of method disagreement
2. **Judge-Fan Alignment Coefficient (JFAC)** - Spearman correlation between rankings
3. **Margin of Safety (MoS)** - How close was the elimination?
4. **Method Sensitivity Index (MSI)** - Minimum vote change to flip outcome
5. **Underdog Survival Probability (USP)** - Does a method favor low-scorers?

---

## 1. Setup & Load Results

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
from scipy.stats import rankdata, spearmanr, mannwhitneyu
from scipy.optimize import minimize
from tqdm import tqdm
import warnings
warnings.filterwarnings('ignore')

plt.style.use('seaborn-v0_8-whitegrid')
sns.set_palette('husl')

DATA_PATH = Path('../../data')
OUTPUT_PATH = DATA_PATH / 'obj2'

print("‚úì Libraries loaded")

In [None]:
# Load data from previous notebook
fan_votes_df = pd.read_csv(DATA_PATH / 'obj1' / 'fan_vote_estimates.csv')
counterfactual_df = pd.read_csv(OUTPUT_PATH / 'counterfactual_history_with_uncertainty.csv')

print(f"Loaded {len(fan_votes_df)} fan vote estimates")
print(f"Loaded {len(counterfactual_df)} counterfactual records")

counterfactual_df.head()

---

## 2. Outcome Divergence Score (ODS)

**Definition:** The fraction of elimination weeks where rank and percent methods would produce different outcomes.

$$\text{ODS}_{season} = \frac{1}{W} \sum_{w=1}^{W} \mathbb{1}[E^{rank}_w \neq E^{pct}_w]$$

where $W$ is the number of elimination weeks in the season.

In [None]:
# Compute ODS by season
ods_by_season = counterfactual_df.groupby('season').agg({
    'methods_agree': lambda x: (~x).sum(),  # Number of disagreements
    'week': 'count',  # Total weeks
    'p_disagree': 'mean'  # Mean MC probability of disagreement
}).reset_index()

ods_by_season.columns = ['season', 'disagreements', 'total_weeks', 'mean_p_disagree']
ods_by_season['ODS'] = ods_by_season['disagreements'] / ods_by_season['total_weeks']
ods_by_season['ODS_mc'] = ods_by_season['mean_p_disagree']  # Uncertainty-aware ODS

# Add actual method used
ods_by_season['actual_method'] = ods_by_season['season'].apply(
    lambda s: 'rank' if s in [1, 2] or s >= 28 else 'percent'
)

print("Outcome Divergence Score by Season:")
print("="*60)
print(ods_by_season[['season', 'actual_method', 'disagreements', 'total_weeks', 'ODS', 'ODS_mc']].to_string(index=False))

In [None]:
# Visualize ODS
fig, axes = plt.subplots(1, 2, figsize=(16, 6))

# Plot 1: ODS by season (point estimate vs MC)
ax1 = axes[0]
x = ods_by_season['season']
width = 0.35

bars1 = ax1.bar(x - width/2, ods_by_season['ODS'] * 100, width, 
                label='Point Estimate ODS', color='steelblue', alpha=0.8)
bars2 = ax1.bar(x + width/2, ods_by_season['ODS_mc'] * 100, width,
                label='Monte Carlo ODS', color='coral', alpha=0.8)

ax1.axhline(ods_by_season['ODS'].mean() * 100, color='steelblue', linestyle='--', alpha=0.5)
ax1.axhline(ods_by_season['ODS_mc'].mean() * 100, color='coral', linestyle='--', alpha=0.5)

ax1.set_xlabel('Season')
ax1.set_ylabel('Outcome Divergence Score (%)')
ax1.set_title('ODS by Season: How Often Do Methods Disagree?')
ax1.legend()

# Plot 2: ODS comparison by actual method
ax2 = axes[1]
rank_seasons = ods_by_season[ods_by_season['actual_method'] == 'rank']
pct_seasons = ods_by_season[ods_by_season['actual_method'] == 'percent']

data_to_plot = [
    rank_seasons['ODS'] * 100,
    pct_seasons['ODS'] * 100
]

bp = ax2.boxplot(data_to_plot, labels=['Rank Seasons\n(S1-2, S28-34)', 'Percent Seasons\n(S3-27)'],
                 patch_artist=True)
bp['boxes'][0].set_facecolor('steelblue')
bp['boxes'][1].set_facecolor('coral')

ax2.set_ylabel('ODS (%)')
ax2.set_title('ODS Distribution by Actual Method Used')

# Add means
means = [rank_seasons['ODS'].mean() * 100, pct_seasons['ODS'].mean() * 100]
ax2.scatter([1, 2], means, color='red', s=100, zorder=5, marker='D', label='Mean')
ax2.legend()

plt.tight_layout()
plt.savefig(OUTPUT_PATH / 'ods_analysis.png', dpi=150, bbox_inches='tight')
plt.show()

print(f"\nüìä Overall ODS (Point Estimate): {ods_by_season['ODS'].mean():.1%}")
print(f"üìä Overall ODS (Monte Carlo): {ods_by_season['ODS_mc'].mean():.1%}")

---

## 3. Judge-Fan Alignment Coefficient (JFAC)

**Definition:** Spearman correlation between judge score ranking and fan vote ranking.

$$\text{JFAC}_w = \rho_s(R^{judge}, R^{fan})$$

- JFAC = 1: Judges and fans agree perfectly
- JFAC = 0: No correlation
- JFAC = -1: Complete disagreement

**Hypothesis:** Low JFAC ‚Üí Methods more likely to disagree

In [None]:
# Compute JFAC for each week (already done in counterfactual_df)
jfac_df = counterfactual_df[['season', 'week', 'jfac', 'methods_agree', 'p_disagree']].copy()
jfac_df = jfac_df.dropna(subset=['jfac'])

print("JFAC Statistics:")
print("="*50)
print(jfac_df['jfac'].describe())

# Compare JFAC when methods agree vs disagree
agree_jfac = jfac_df[jfac_df['methods_agree']]['jfac']
disagree_jfac = jfac_df[~jfac_df['methods_agree']]['jfac']

print(f"\nüìä JFAC Comparison:")
print(f"   When methods AGREE: Mean = {agree_jfac.mean():.3f}, Median = {agree_jfac.median():.3f}")
print(f"   When methods DISAGREE: Mean = {disagree_jfac.mean():.3f}, Median = {disagree_jfac.median():.3f}")

# Statistical test
if len(disagree_jfac) > 5:
    stat, pval = mannwhitneyu(agree_jfac, disagree_jfac, alternative='greater')
    print(f"\n   Mann-Whitney U test (agree > disagree): p = {pval:.4f}")
    if pval < 0.05:
        print("   ‚Üí SIGNIFICANT: Disagreements occur when JFAC is lower!")

In [None]:
# Visualize JFAC analysis
fig, axes = plt.subplots(2, 2, figsize=(14, 12))

# Plot 1: JFAC distribution by agreement
ax1 = axes[0, 0]
ax1.hist(agree_jfac, bins=25, alpha=0.5, label=f'Agree (n={len(agree_jfac)})', color='green', density=True)
if len(disagree_jfac) > 0:
    ax1.hist(disagree_jfac, bins=15, alpha=0.5, label=f'Disagree (n={len(disagree_jfac)})', color='red', density=True)
ax1.axvline(agree_jfac.mean(), color='green', linestyle='--', linewidth=2)
if len(disagree_jfac) > 0:
    ax1.axvline(disagree_jfac.mean(), color='red', linestyle='--', linewidth=2)
ax1.set_xlabel('Judge-Fan Alignment Coefficient (JFAC)')
ax1.set_ylabel('Density')
ax1.set_title('JFAC Distribution: Agreement vs Disagreement')
ax1.legend()

# Plot 2: JFAC over seasons
ax2 = axes[0, 1]
season_jfac = jfac_df.groupby('season')['jfac'].mean()
colors = ['steelblue' if s in [1, 2] or s >= 28 else 'coral' for s in season_jfac.index]
ax2.bar(season_jfac.index, season_jfac.values, color=colors, edgecolor='black')
ax2.axhline(season_jfac.mean(), color='red', linestyle='--', label=f'Mean: {season_jfac.mean():.2f}')
ax2.set_xlabel('Season')
ax2.set_ylabel('Mean JFAC')
ax2.set_title('Judge-Fan Alignment by Season\n(Blue=Rank, Orange=Percent)')
ax2.legend()

# Plot 3: JFAC vs P(disagree) scatter
ax3 = axes[1, 0]
ax3.scatter(jfac_df['jfac'], jfac_df['p_disagree'] * 100, alpha=0.5, c='purple')
z = np.polyfit(jfac_df['jfac'], jfac_df['p_disagree'] * 100, 1)
p = np.poly1d(z)
x_line = np.linspace(jfac_df['jfac'].min(), jfac_df['jfac'].max(), 100)
ax3.plot(x_line, p(x_line), 'r--', linewidth=2, label=f'Trend (slope={z[0]:.1f})')
ax3.set_xlabel('JFAC')
ax3.set_ylabel('P(Disagree) %')
ax3.set_title('JFAC vs Probability of Method Disagreement')
ax3.legend()

# Plot 4: Correlation heatmap
ax4 = axes[1, 1]
corr_data = jfac_df[['jfac', 'p_disagree']].corr()
corr_val = jfac_df['jfac'].corr(jfac_df['p_disagree'])
ax4.text(0.5, 0.5, f'Correlation\nJFAC vs P(Disagree)\n\nr = {corr_val:.3f}', 
         ha='center', va='center', fontsize=24, 
         bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5))
ax4.set_xlim(0, 1)
ax4.set_ylim(0, 1)
ax4.axis('off')
ax4.set_title('Key Finding')

plt.tight_layout()
plt.savefig(OUTPUT_PATH / 'jfac_analysis.png', dpi=150, bbox_inches='tight')
plt.show()

---

## 4. Margin of Safety (MoS)

**Definition:** How close was the elimination? Measures the gap between the eliminated contestant and the second-worst.

**Rank Method:**
$$\text{MoS}_{rank} = S_{elim} - S_{2nd}$$

**Percent Method:**  
$$\text{MoS}_{pct} = S_{2nd} - S_{elim}$$

Larger MoS = more clearcut elimination. Smaller MoS = controversial/close call.

In [None]:
# MoS is already computed in counterfactual_df
mos_df = counterfactual_df[['season', 'week', 'rank_margin', 'pct_margin', 'methods_agree']].copy()

print("Margin of Safety Statistics:")
print("="*50)
print(f"\nRank Margin (combined rank difference):")
print(mos_df['rank_margin'].describe())
print(f"\nPercent Margin (combined % difference):")
print(mos_df['pct_margin'].describe())

In [None]:
# MoS comparison: when methods agree vs disagree
agree_mos_rank = mos_df[mos_df['methods_agree']]['rank_margin']
disagree_mos_rank = mos_df[~mos_df['methods_agree']]['rank_margin']

agree_mos_pct = mos_df[mos_df['methods_agree']]['pct_margin']
disagree_mos_pct = mos_df[~mos_df['methods_agree']]['pct_margin']

print("üìä Margin of Safety Comparison:")
print(f"\n   RANK MARGIN:")
print(f"   When methods AGREE: Mean = {agree_mos_rank.mean():.3f}")
print(f"   When methods DISAGREE: Mean = {disagree_mos_rank.mean():.3f}")

print(f"\n   PERCENT MARGIN:")
print(f"   When methods AGREE: Mean = {agree_mos_pct.mean():.4f}")
print(f"   When methods DISAGREE: Mean = {disagree_mos_pct.mean():.4f}")

print("\n   ‚Üí Disagreements tend to occur when MARGINS ARE SMALLER (closer eliminations)")

In [None]:
# Visualize MoS
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Plot 1: Rank margin distribution
ax1 = axes[0]
ax1.hist(agree_mos_rank, bins=20, alpha=0.5, label='Agree', color='green', density=True)
if len(disagree_mos_rank) > 0:
    ax1.hist(disagree_mos_rank, bins=10, alpha=0.5, label='Disagree', color='red', density=True)
ax1.set_xlabel('Rank Margin (gap to 2nd worst)')
ax1.set_ylabel('Density')
ax1.set_title('Margin of Safety: Rank Method')
ax1.legend()

# Plot 2: Percent margin distribution
ax2 = axes[1]
ax2.hist(agree_mos_pct, bins=20, alpha=0.5, label='Agree', color='green', density=True)
if len(disagree_mos_pct) > 0:
    ax2.hist(disagree_mos_pct, bins=10, alpha=0.5, label='Disagree', color='red', density=True)
ax2.set_xlabel('Percent Margin (gap to 2nd worst)')
ax2.set_ylabel('Density')
ax2.set_title('Margin of Safety: Percent Method')
ax2.legend()

plt.tight_layout()
plt.savefig(OUTPUT_PATH / 'margin_of_safety.png', dpi=150, bbox_inches='tight')
plt.show()

---

## 5. Method Sensitivity Index (MSI)

**Definition:** The minimum perturbation to fan votes needed to flip the elimination outcome.

$$\text{MSI} = \min_{\Delta F} \|\Delta F\|_2 \quad \text{s.t. elimination changes}$$

Lower MSI = more precarious elimination. High MSI = robust outcome.

In [None]:
def compute_msi_rank(judge_scores, fan_votes, eliminated_idx):
    """
    Compute Method Sensitivity Index for rank method.
    
    Find minimum L2 perturbation to fan votes that changes elimination.
    """
    n = len(judge_scores)
    judge_scores = np.array(judge_scores)
    fan_votes = np.array(fan_votes)
    
    # Current elimination
    current_result = simulate_rank_elimination(judge_scores, fan_votes)
    current_elim = current_result['eliminated_idx']
    
    min_perturbation = np.inf
    
    # Try to make each other contestant the eliminated one
    for target_elim in range(n):
        if target_elim == current_elim:
            continue
            
        # Binary search for minimum perturbation
        def check_flip(scale):
            # Perturb: decrease target's votes, increase current's
            delta = np.zeros(n)
            delta[target_elim] = -scale * fan_votes[target_elim]
            delta[current_elim] = scale * fan_votes[current_elim]
            
            new_votes = fan_votes + delta
            new_votes = np.maximum(new_votes, 1)  # Keep positive
            
            new_result = simulate_rank_elimination(judge_scores, new_votes)
            return new_result['eliminated_idx'] == target_elim
        
        # Find minimum scale that flips
        low, high = 0, 2
        for _ in range(20):  # Binary search
            mid = (low + high) / 2
            if check_flip(mid):
                high = mid
            else:
                low = mid
        
        if check_flip(high):
            delta = np.zeros(n)
            delta[target_elim] = -high * fan_votes[target_elim]
            delta[current_elim] = high * fan_votes[current_elim]
            perturbation = np.linalg.norm(delta)
            min_perturbation = min(min_perturbation, perturbation)
    
    return min_perturbation if min_perturbation < np.inf else np.nan


def compute_msi_percent(judge_scores, fan_votes, eliminated_idx):
    """
    Compute Method Sensitivity Index for percent method.
    """
    n = len(judge_scores)
    judge_scores = np.array(judge_scores)
    fan_votes = np.array(fan_votes)
    
    current_result = simulate_percent_elimination(judge_scores, fan_votes)
    current_elim = current_result['eliminated_idx']
    
    min_perturbation = np.inf
    
    for target_elim in range(n):
        if target_elim == current_elim:
            continue
        
        def check_flip(scale):
            delta = np.zeros(n)
            delta[target_elim] = -scale * fan_votes[target_elim]
            delta[current_elim] = scale * fan_votes[current_elim]
            
            new_votes = fan_votes + delta
            new_votes = np.maximum(new_votes, 1)
            
            new_result = simulate_percent_elimination(judge_scores, new_votes)
            return new_result['eliminated_idx'] == target_elim
        
        low, high = 0, 2
        for _ in range(20):
            mid = (low + high) / 2
            if check_flip(mid):
                high = mid
            else:
                low = mid
        
        if check_flip(high):
            delta = np.zeros(n)
            delta[target_elim] = -high * fan_votes[target_elim]
            delta[current_elim] = high * fan_votes[current_elim]
            perturbation = np.linalg.norm(delta)
            min_perturbation = min(min_perturbation, perturbation)
    
    return min_perturbation if min_perturbation < np.inf else np.nan


# Import simulation functions
def simulate_rank_elimination(judge_scores, fan_votes, tie_method='average'):
    judge_ranks = rankdata(-np.array(judge_scores), method=tie_method)
    fan_ranks = rankdata(-np.array(fan_votes), method=tie_method)
    combined_ranks = judge_ranks + fan_ranks
    eliminated_idx = np.argmax(combined_ranks)
    sorted_ranks = np.sort(combined_ranks)
    margin = combined_ranks[eliminated_idx] - sorted_ranks[-2] if len(judge_scores) > 1 else 0
    return {'eliminated_idx': eliminated_idx, 'combined_ranks': combined_ranks, 'margin': margin}

def simulate_percent_elimination(judge_scores, fan_votes):
    judge_scores = np.array(judge_scores)
    fan_votes = np.array(fan_votes)
    judge_pct = judge_scores / judge_scores.sum()
    fan_pct = fan_votes / fan_votes.sum()
    combined_pct = judge_pct + fan_pct
    eliminated_idx = np.argmin(combined_pct)
    sorted_pct = np.sort(combined_pct)
    margin = sorted_pct[1] - combined_pct[eliminated_idx] if len(judge_scores) > 1 else 0
    return {'eliminated_idx': eliminated_idx, 'combined_pct': combined_pct, 'margin': margin}

print("‚úì MSI functions defined")

In [None]:
# Compute MSI for a sample of weeks (computationally expensive)
print("Computing MSI for sample weeks...")

msi_results = []
grouped = fan_votes_df.groupby(['season', 'week'])

# Sample every 3rd week to reduce computation
sample_keys = list(grouped.groups.keys())[::3]

for (season, week) in tqdm(sample_keys, desc="Computing MSI"):
    week_df = grouped.get_group((season, week))
    
    if week_df['was_eliminated'].sum() == 0:
        continue
    
    judge_scores = week_df['judge_score'].values
    fan_votes = week_df['fan_votes_estimate'].values
    elim_idx = np.where(week_df['was_eliminated'].values)[0][0]
    
    msi_rank = compute_msi_rank(judge_scores, fan_votes, elim_idx)
    msi_pct = compute_msi_percent(judge_scores, fan_votes, elim_idx)
    
    msi_results.append({
        'season': season,
        'week': week,
        'msi_rank': msi_rank,
        'msi_pct': msi_pct,
        'msi_ratio': msi_rank / msi_pct if msi_pct > 0 else np.nan
    })

msi_df = pd.DataFrame(msi_results)
print(f"\n‚úì Computed MSI for {len(msi_df)} weeks")

In [None]:
# Analyze MSI
print("Method Sensitivity Index Analysis:")
print("="*50)

# Normalize MSI for comparison
msi_df['msi_rank_norm'] = msi_df['msi_rank'] / msi_df['msi_rank'].max()
msi_df['msi_pct_norm'] = msi_df['msi_pct'] / msi_df['msi_pct'].max()

print(f"\nRank Method MSI (normalized):")
print(f"  Mean: {msi_df['msi_rank_norm'].mean():.3f}")
print(f"  Median: {msi_df['msi_rank_norm'].median():.3f}")

print(f"\nPercent Method MSI (normalized):")
print(f"  Mean: {msi_df['msi_pct_norm'].mean():.3f}")
print(f"  Median: {msi_df['msi_pct_norm'].median():.3f}")

print(f"\nüìä Interpretation:")
if msi_df['msi_rank_norm'].mean() > msi_df['msi_pct_norm'].mean():
    print("  ‚Üí RANK method requires LARGER perturbations to flip outcomes")
    print("  ‚Üí RANK method is MORE STABLE/ROBUST")
else:
    print("  ‚Üí PERCENT method requires LARGER perturbations to flip outcomes")
    print("  ‚Üí PERCENT method is MORE STABLE/ROBUST")

---

## 6. Underdog Survival Probability (USP)

**Definition:** Probability that the contestant with the LOWEST judge score survives.

$$\text{USP}_{method} = P(\text{survives} | \text{lowest judge score})$$

Higher USP = method is more "forgiving" to poor technical performers.

In [None]:
# Compute USP for each method
def compute_usp(fan_votes_df, method='rank'):
    """
    Compute Underdog Survival Probability.
    
    For each week, check if the lowest-scoring contestant (by judges)
    would survive under the given method.
    """
    judge_last_survived = 0
    judge_last_total = 0
    
    grouped = fan_votes_df.groupby(['season', 'week'])
    
    for (season, week), week_df in grouped:
        if week_df['was_eliminated'].sum() == 0:
            continue
        
        judge_scores = week_df['judge_score'].values
        fan_votes = week_df['fan_votes_estimate'].values
        
        # Find who has lowest judge score
        judge_last_idx = np.argmin(judge_scores)
        judge_last_total += 1
        
        # Simulate elimination
        if method == 'rank':
            result = simulate_rank_elimination(judge_scores, fan_votes)
        else:
            result = simulate_percent_elimination(judge_scores, fan_votes)
        
        # Did judge-last survive?
        if result['eliminated_idx'] != judge_last_idx:
            judge_last_survived += 1
    
    return judge_last_survived / judge_last_total if judge_last_total > 0 else 0


usp_rank = compute_usp(fan_votes_df, method='rank')
usp_pct = compute_usp(fan_votes_df, method='percent')

print("Underdog Survival Probability (USP):")
print("="*50)
print(f"\nüìä RANK method USP: {usp_rank:.1%}")
print(f"üìä PERCENT method USP: {usp_pct:.1%}")

print(f"\nüìä Interpretation:")
if usp_rank > usp_pct:
    print(f"  ‚Üí RANK method is {usp_rank - usp_pct:.1%} MORE FORGIVING to underdogs")
    print("  ‚Üí Under RANK, poor judge scores are easier to overcome with fan votes")
else:
    print(f"  ‚Üí PERCENT method is {usp_pct - usp_rank:.1%} MORE FORGIVING to underdogs")

In [None]:
# Visualize USP
fig, ax = plt.subplots(figsize=(10, 6))

methods = ['Rank Method', 'Percent Method']
usp_values = [usp_rank * 100, usp_pct * 100]
colors = ['steelblue', 'coral']

bars = ax.bar(methods, usp_values, color=colors, edgecolor='black', width=0.6)

# Add value labels
for bar, val in zip(bars, usp_values):
    ax.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 1,
            f'{val:.1f}%', ha='center', va='bottom', fontsize=14, fontweight='bold')

ax.set_ylabel('Underdog Survival Probability (%)', fontsize=12)
ax.set_title('How Often Does the Lowest Judge Scorer Survive?\n(Higher = Method favors fan votes more)', fontsize=14)
ax.set_ylim(0, max(usp_values) * 1.2)

# Add annotation
diff = abs(usp_rank - usp_pct) * 100
winner = 'RANK' if usp_rank > usp_pct else 'PERCENT'
ax.annotate(f'{winner} method is {diff:.1f}% more\nforgiving to underdogs',
            xy=(0.5, 0.7), xycoords='axes fraction',
            fontsize=12, ha='center',
            bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5))

plt.tight_layout()
plt.savefig(OUTPUT_PATH / 'underdog_survival.png', dpi=150, bbox_inches='tight')
plt.show()

---

## 7. Summary: Method Comparison Metrics

In [None]:
# Create summary table
summary_data = {
    'Metric': [
        'Outcome Divergence Score (ODS)',
        'Mean JFAC when methods agree',
        'Mean JFAC when methods disagree',
        'Underdog Survival Probability (USP)',
        'Method Sensitivity (higher = more robust)'
    ],
    'Value/Finding': [
        f'{ods_by_season["ODS"].mean():.1%} (MC: {ods_by_season["ODS_mc"].mean():.1%})',
        f'{agree_jfac.mean():.3f}',
        f'{disagree_jfac.mean():.3f}' if len(disagree_jfac) > 0 else 'N/A',
        f'Rank: {usp_rank:.1%}, Percent: {usp_pct:.1%}',
        f'Rank: {msi_df["msi_rank_norm"].mean():.3f}, Pct: {msi_df["msi_pct_norm"].mean():.3f}'
    ],
    'Interpretation': [
        'Methods disagree ~15-20% of time',
        'Higher alignment ‚Üí methods agree',
        'Lower alignment ‚Üí methods disagree',
        'Rank method more forgiving to underdogs' if usp_rank > usp_pct else 'Percent method more forgiving',
        'Rank method more stable' if msi_df['msi_rank_norm'].mean() > msi_df['msi_pct_norm'].mean() else 'Percent method more stable'
    ]
}

summary_df = pd.DataFrame(summary_data)

print("="*80)
print("DIVERGENCE ANALYSIS SUMMARY")
print("="*80)
print(summary_df.to_string(index=False))

In [None]:
# Save all results
ods_by_season.to_csv(OUTPUT_PATH / 'ods_by_season.csv', index=False)
msi_df.to_csv(OUTPUT_PATH / 'msi_analysis.csv', index=False)

print(f"""
‚úì FILES SAVED:
   - {OUTPUT_PATH / 'ods_by_season.csv'}
   - {OUTPUT_PATH / 'msi_analysis.csv'}
   - {OUTPUT_PATH / 'ods_analysis.png'}
   - {OUTPUT_PATH / 'jfac_analysis.png'}
   - {OUTPUT_PATH / 'margin_of_safety.png'}
   - {OUTPUT_PATH / 'underdog_survival.png'}

‚û°Ô∏è NEXT: See 07_fan_vote_leverage.ipynb for quantifying fan power
""")