# CFP Committee Simulator - Validation & Backtesting

This notebook validates the model against historical CFP rankings

In [None]:
# Cell 1: Historical Validation Setup
from scipy.stats import spearmanr
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

# Load historical CFP rankings for comparison (FBS teams only)
# These represent the final CFP rankings for each season
historical_cfp = {
    2023: ['Michigan', 'Washington', 'Texas', 'Alabama', 'Georgia', 'Florida State', 
           'Oregon', 'Ohio State', 'Missouri', 'Penn State', 'Ole Miss', 'Oklahoma'],
    2022: ['Georgia', 'Michigan', 'TCU', 'Ohio State', 'Alabama', 'Tennessee', 
           'Penn State', 'Washington', 'Clemson', 'Kansas State', 'Utah', 'USC'],
    2021: ['Alabama', 'Michigan', 'Georgia', 'Cincinnati', 'Notre Dame', 'Ohio State',
           'Baylor', 'Ole Miss', 'Oklahoma State', 'Michigan State', 'Oklahoma', 'Pittsburgh'],
    2020: ['Alabama', 'Clemson', 'Ohio State', 'Notre Dame', 'Texas A&M', 'Florida',
           'Cincinnati', 'Georgia', 'Iowa State', 'Miami', 'North Carolina', 'Indiana'],
    2019: ['LSU', 'Ohio State', 'Clemson', 'Oklahoma', 'Georgia', 'Oregon',
           'Florida', 'Alabama', 'Penn State', 'Utah', 'Wisconsin', 'Auburn'],
    2018: ['Clemson', 'Alabama', 'Notre Dame', 'Oklahoma', 'Georgia', 'Ohio State',
           'Michigan', 'UCF', 'Florida', 'LSU', 'Washington', 'Penn State'],
    2017: ['Clemson', 'Oklahoma', 'Georgia', 'Alabama', 'Ohio State', 'Wisconsin',
           'Auburn', 'USC', 'Penn State', 'Miami', 'Washington', 'UCF'],
    2016: ['Alabama', 'Clemson', 'Ohio State', 'Washington', 'Penn State', 'Michigan',
           'Oklahoma', 'Wisconsin', 'USC', 'Florida State', 'Oklahoma State', 'Colorado'],
    2015: ['Clemson', 'Alabama', 'Michigan State', 'Oklahoma', 'Iowa', 'Stanford',
           'Ohio State', 'Notre Dame', 'Florida State', 'North Carolina', 'TCU', 'Ole Miss'],
    2014: ['Alabama', 'Oregon', 'Florida State', 'Ohio State', 'Baylor', 'TCU',
           'Michigan State', 'Mississippi State', 'Georgia Tech', 'Ole Miss', 'Arizona', 'Kansas State']
}

print('Historical CFP rankings loaded for validation')
print(f'Available seasons for backtesting: {list(historical_cfp.keys())}')

In [None]:
# Cell 2: Calculate Spearman Correlation
def validate_rankings(simulator_rankings, cfp_rankings):
    """
    Compare simulator rankings to CFP rankings using Spearman correlation.
    
    Args:
        simulator_rankings: DataFrame with 'team' and 'rank' columns
        cfp_rankings: List of teams in CFP ranking order
    
    Returns:
        Spearman correlation coefficient and p-value
    """
    # Find common teams
    common_teams = [team for team in cfp_rankings if team in simulator_rankings['team'].values]
    
    if len(common_teams) < 2:
        print(f"⚠️  Not enough common teams ({len(common_teams)}) for correlation")
        return None, None
    
    # Get simulator ranks for common teams
    sim_ranks = []
    cfp_ranks = []
    
    for i, team in enumerate(common_teams):
        cfp_ranks.append(i + 1)  # CFP rank (1-indexed)
        sim_rank = simulator_rankings[simulator_rankings['team'] == team]['rank'].iloc[0]
        sim_ranks.append(sim_rank)
    
    # Calculate Spearman correlation
    correlation, p_value = spearmanr(sim_ranks, cfp_ranks)
    
    return correlation, p_value

# Example validation (requires running full simulation for historical year)
print('Validation function ready')
print('\nTo validate:')
print('1. Run data pipeline for historical year (e.g., 2023)')
print('2. Run ranking algorithms on that data')
print('3. Call validate_rankings() with your results')
print('\nTarget: Spearman correlation > 0.85')