# CFP 12-Team Playoff Selection

This notebook implements the official 12-team College Football Playoff selection protocol:

**Selection Rules:**
- **5 Automatic Bids:** Highest-ranked conference champions
- **7 At-Large Bids:** Next highest-ranked teams regardless of conference
- **Seeding:** Top 4 seeds (conference champions) receive first-round byes
- **First Round:** Seeds 5-8 host seeds 12-9 on campus
- **Quarterfinals & Beyond:** Neutral-site bowl games
- **No Reseeding:** Fixed bracket structure

In [15]:
# Cell 1: Setup and Imports
import sys
import os
import pandas as pd
import json
from pathlib import Path
from datetime import datetime
from IPython.display import HTML, display

# Add src to path
sys.path.insert(0, os.path.abspath('..'))

from src.playoff.bracket import (
    select_playoff_field,
    seed_playoff_teams,
    create_bracket_matchups,
    visualize_bracket,
    visualize_bracket_html
)

# Create output directories
output_dir = Path('./data/output')
brackets_dir = output_dir / 'brackets'
exports_dir = output_dir / 'exports'
brackets_dir.mkdir(parents=True, exist_ok=True)
exports_dir.mkdir(parents=True, exist_ok=True)

print('‚úÖ Imports loaded successfully')
print(f'Output directory: {output_dir}')

‚úÖ Imports loaded successfully
Output directory: data/output


In [16]:
# Cell 2: Load Rankings
year = 2025
week = 15

# Load final composite rankings
rankings_dir = output_dir / 'rankings'
final_rankings = pd.read_csv(rankings_dir / f'composite_rankings_{year}_week{week}.csv')

# Load resume rankings to get conf_champ column
# Note: resume_rankings.csv has conf_champ but not a separate conference column
resume_rankings = pd.read_csv(rankings_dir / f'resume_rankings_{year}_week{week}.csv')

# Extract conference from conf_champ column (format: 'Yes (Conference Name)' or 'No')
# Also need to get conference for non-champions from games data

# First, extract conference from conf_champ for champions
def extract_conference_from_conf_champ(conf_champ_value):
    """Extract conference name from conf_champ column"""
    if pd.isna(conf_champ_value):
        return None
    if 'Yes (' in str(conf_champ_value):
        # Format: 'Yes (Sun Belt)' -> 'Sun Belt'
        return str(conf_champ_value).split('(')[1].split(')')[0]
    return None

# Extract conference for champions
resume_rankings['conference'] = resume_rankings['conf_champ'].apply(extract_conference_from_conf_champ)

# For non-champions, we need to get conference from games data
# Load games data to extract conference information
cache_dir = Path(f'./data/cache/{year}')
parquet_path = cache_dir / f'games_w{week}.parquet'
csv_path = cache_dir / f'games_w{week}.csv'

if parquet_path.exists():
    try:
        games_df = pd.read_parquet(parquet_path)
    except (ImportError, ModuleNotFoundError):
        games_df = pd.read_csv(csv_path)
elif csv_path.exists():
    games_df = pd.read_csv(csv_path)
else:
    games_df = pd.DataFrame()  # Empty if not found

# Extract conference from games data for teams missing conference
if not games_df.empty:
    team_conferences = {}
    for _, game in games_df.iterrows():
        if pd.notna(game.get('home_conference')):
            team_conferences[game['home_team']] = game['home_conference']
        if pd.notna(game.get('away_conference')):
            team_conferences[game['away_team']] = game['away_conference']
    
    # Fill in missing conferences from games data
    resume_rankings['conference'] = resume_rankings.apply(
        lambda row: row['conference'] if pd.notna(row['conference']) else team_conferences.get(row['team']),
        axis=1
    )

# Merge to add conference and conf_champ columns to final_rankings
final_rankings = final_rankings.merge(
    resume_rankings[['team', 'conference', 'conf_champ']],
    on='team',
    how='left'  # Left join to keep all teams from composite rankings
)

# Fill missing values (for teams without conferences or championship status)
final_rankings['conference'] = final_rankings['conference'].fillna('Independent')
final_rankings['conf_champ'] = final_rankings['conf_champ'].fillna('No')

# Configuration

print(f'Season: {year} (2025-2026)')
print(f'Week: {week}')
print(f'Total teams: {len(final_rankings)}')
print(f'Top team: #{1} {final_rankings.iloc[0]["team"]}')
print()
print('Top 10 Rankings:')
print(final_rankings[['team', 'rank', 'wins', 'losses', 'composite_score']].head(10))
print()
print(f'Conference champions found: {(final_rankings["conf_champ"].str.contains("Yes", na=False)).sum()}')


Season: 2025 (2025-2026)
Week: 15
Total teams: 136
Top team: #1 Notre Dame

Top 10 Rankings:
               team  rank  wins  losses  composite_score
0        Notre Dame     1     9       0         0.808513
1     James Madison     2     9       0         0.752737
2           Alabama     3     7       1         0.713791
3     South Florida     4     6       2         0.687789
4            Oregon     5     7       1         0.680856
5        Ohio State     6     9       0         0.679691
6           Georgia     7     8       1         0.643742
7        Texas Tech     8     7       1         0.634297
8       North Texas     9     7       1         0.629774
9  Washington State    10     4       4         0.626783

Conference champions found: 9


---

## Step 1: Select 12-Team Playoff Field

Apply the 5+7 protocol to select the playoff field.

In [17]:
# Cell 3: Select Playoff Field using 5+7 Protocol
# Ensure rankings are sorted
final_rankings_sorted = final_rankings.sort_values('rank').reset_index(drop=True)

# Select playoff field
selection = select_playoff_field(
    rankings_df=final_rankings_sorted,
    conference_col='conference',
    conf_champ_col='conf_champ',
    n_auto_bids=5,
    n_at_large=7
)

# Display audit log
print('=' * 80)
print('PLAYOFF SELECTION AUDIT LOG')
print('=' * 80)
for log_entry in selection.audit_log:
    print(log_entry)

# Check for displaced team
if selection.champ_pulled_in:
    print()
    print('!' * 80)
    print('SPECIAL CASE: Conference champion outside Top 12 pulled into field')
    print('!' * 80)
    if selection.displaced_team:
        print(f'Displaced team: #{selection.displaced_team["rank"]} {selection.displaced_team["team"]}')

PLAYOFF SELECTION AUDIT LOG
Found 9 conference champions

Automatic bids (top 5 conference champions):
  1. #2 James Madison (Yes (Sun Belt))
  2. #6 Ohio State (Yes (Big Ten))
  3. #7 Georgia (Yes (SEC))
  4. #8 Texas Tech (Yes (Big 12))
  5. #9 North Texas (Yes (American Athletic))

At-large bids (7 spots):
  1. #1 Notre Dame
  2. #3 Alabama
  3. #4 South Florida
  4. #5 Oregon
  5. #10 Washington State
  6. #11 USC
  7. #12 Ole Miss

Final 12-team playoff field:
  1. #1 Notre Dame (AT-LARGE)
  2. #2 James Madison (AUTO)
  3. #3 Alabama (AT-LARGE)
  4. #4 South Florida (AT-LARGE)
  5. #5 Oregon (AT-LARGE)
  6. #6 Ohio State (AUTO)
  7. #7 Georgia (AUTO)
  8. #8 Texas Tech (AUTO)
  9. #9 North Texas (AUTO)
  10. #10 Washington State (AT-LARGE)
  11. #11 USC (AT-LARGE)
  12. #12 Ole Miss (AT-LARGE)


---

## Step 2: Seed Playoff Teams

Top 4 conference champions receive first-round byes and seeds 1-4.

In [18]:
# Cell 4: Seed the Bracket
seeded_df = seed_playoff_teams(
    playoff_teams=selection.playoff_teams,
    auto_bid_teams=selection.auto_bids
)

print('=' * 80)
print('SEEDED 12-TEAM PLAYOFF FIELD')
print('=' * 80)
print()
print(f'{'Seed':<6} {'Team':<30} {'Rank':<6} {'Record':<10} {'Bye?':<10} {'Status'}')
print('-' * 80)

auto_bid_names = {ab['team'] for ab in selection.auto_bids}

for _, team in seeded_df.iterrows():
    record = f"{int(team.get('wins', 0))}-{int(team.get('losses', 0))}"
    bye_status = 'YES' if team['is_bye'] else 'No'
    status = 'AUTO-BID' if team['team'] in auto_bid_names else 'AT-LARGE'
    print(f"{team['seed']:<6} {team['team']:<30} #{team['rank']:<5} {record:<10} {bye_status:<10} {status}")

SEEDED 12-TEAM PLAYOFF FIELD

Seed   Team                           Rank   Record     Bye?       Status
--------------------------------------------------------------------------------
1      James Madison                  #2     0-0        YES        AUTO-BID
2      Ohio State                     #6     0-0        YES        AUTO-BID
3      Georgia                        #7     0-0        YES        AUTO-BID
4      Texas Tech                     #8     0-0        YES        AUTO-BID
5      Notre Dame                     #1     0-0        No         AT-LARGE
6      Alabama                        #3     0-0        No         AT-LARGE
7      South Florida                  #4     0-0        No         AT-LARGE
8      Oregon                         #5     0-0        No         AT-LARGE
9      North Texas                    #9     0-0        No         AUTO-BID
10     Washington State               #10    0-0        No         AT-LARGE
11     USC                            #11    0-0       

---

## Step 3: Create Bracket Matchups

In [19]:
# Cell 5: Create Bracket Matchups
first_round, all_rounds = create_bracket_matchups(seeded_df)

print('=' * 80)
print('FIRST ROUND MATCHUPS (On-Campus Sites)')
print('=' * 80)
print()

for matchup in first_round:
    print(f'Game {matchup.game_num}:')
    print(f'  Seed #{matchup.seed_low}: {matchup.team_low}')
    print(f'    @')
    print(f'  Seed #{matchup.seed_high}: {matchup.team_high} (HOST)')
    print(f'  Location: {matchup.location}')
    print()

print('=' * 80)
print('QUARTERFINALS (Bowl Games, Neutral Sites)')
print('=' * 80)
print()

for matchup in all_rounds['quarterfinals']:
    print(f'QF {matchup.game_num}:')
    print(f'  Seed #{matchup.seed_high}: {matchup.team_high}')
    print(f'    vs')
    print(f'  {matchup.team_low}')
    print(f'  Location: {matchup.location}')
    print()

FIRST ROUND MATCHUPS (On-Campus Sites)

Game 1:
  Seed #12: Ole Miss
    @
  Seed #5: Notre Dame (HOST)
  Location: Campus of #5 seed

Game 2:
  Seed #11: USC
    @
  Seed #6: Alabama (HOST)
  Location: Campus of #6 seed

Game 3:
  Seed #10: Washington State
    @
  Seed #7: South Florida (HOST)
  Location: Campus of #7 seed

Game 4:
  Seed #9: North Texas
    @
  Seed #8: Oregon (HOST)
  Location: Campus of #8 seed

QUARTERFINALS (Bowl Games, Neutral Sites)

QF 1:
  Seed #1: James Madison
    vs
  Winner 8/9
  Location: Bowl Game (Neutral Site)

QF 2:
  Seed #2: Ohio State
    vs
  Winner 7/10
  Location: Bowl Game (Neutral Site)

QF 3:
  Seed #3: Georgia
    vs
  Winner 6/11
  Location: Bowl Game (Neutral Site)

QF 4:
  Seed #4: Texas Tech
    vs
  Winner 5/12
  Location: Bowl Game (Neutral Site)



---

## Visual Playoff Bracket

Interactive HTML bracket visualization.

In [20]:
# Cell 6: Display Visual Bracket
# Generate HTML bracket
html_bracket = visualize_bracket_html(seeded_df, first_round)

# Display in notebook
display(HTML(html_bracket))

# Also show ASCII version
print()
print('=' * 80)
print('ASCII BRACKET')
print('=' * 80)
print()
ascii_bracket = visualize_bracket(seeded_df, first_round, f'{year} CFP Bracket - Week {week}')
print(ascii_bracket)


ASCII BRACKET

                           2025 CFP Bracket - Week 15                           

FIRST ROUND BYES:
--------------------------------------------------------------------------------
  Seed #1: James Madison                  (Rank #2)
  Seed #2: Ohio State                     (Rank #6)
  Seed #3: Georgia                        (Rank #7)
  Seed #4: Texas Tech                     (Rank #8)

FIRST ROUND (On-Campus Sites):
--------------------------------------------------------------------------------
Game 1:
  Seed #5: Notre Dame
    vs
  Seed #12: Ole Miss
  Location: Campus of #5 seed

Game 2:
  Seed #6: Alabama
    vs
  Seed #11: USC
  Location: Campus of #6 seed

Game 3:
  Seed #7: South Florida
    vs
  Seed #10: Washington State
  Location: Campus of #7 seed

Game 4:
  Seed #8: Oregon
    vs
  Seed #9: North Texas
  Location: Campus of #8 seed

QUARTERFINALS (Bowl Games, Neutral Sites):
--------------------------------------------------------------------------------
 

---

## Export Results

In [21]:
# Cell 7: Export Bracket and Results
# Save HTML bracket
html_path = brackets_dir / f'playoff_bracket_{year}_week{week}.html'
with open(html_path, 'w') as f:
    f.write(f'''<!DOCTYPE html>
<html>
<head>
    <meta charset="UTF-8">
    <title>{year} CFP Bracket - Week {week}</title>
</head>
<body>
{html_bracket}
</body>
</html>''')

# Save ASCII bracket
ascii_path = brackets_dir / f'playoff_bracket_{year}_week{week}.txt'
with open(ascii_path, 'w') as f:
    f.write(ascii_bracket)

# Save audit log
audit_path = brackets_dir / f'selection_audit_{year}_week{week}.txt'
with open(audit_path, 'w') as f:
    f.write('\n'.join(selection.audit_log))

# Prepare JSON export
playoff_json = {
    'timestamp': datetime.now().isoformat(),
    'season': year,
    'week': week,
    'selection_protocol': {
        'automatic_bids': 5,
        'at_large_bids': 7,
        'total_teams': 12,
        'bye_seeds': 4
    },
    'playoff_field': [
        {
            'seed': int(row['seed']),
            'team': row['team'],
            'rank': int(row['rank']),
            'is_bye': bool(row['is_bye']),
            'conference': row.get('conference', ''),
            'conf_champ': row.get('conf_champ', '')
        }
        for _, row in seeded_df.iterrows()
    ],
    'first_round_matchups': [
        {
            'game': m.game_num,
            'home_seed': m.seed_high,
            'home_team': m.team_high,
            'away_seed': m.seed_low,
            'away_team': m.team_low,
            'location': m.location
        }
        for m in first_round
    ],
    'automatic_bids': [
        {
            'team': ab['team'],
            'rank': int(ab['rank']),
            'conference': ab.get('conf_champ', '')
        }
        for ab in selection.auto_bids
    ],
    'at_large_bids': [
        {
            'team': al['team'],
            'rank': int(al['rank'])
        }
        for al in selection.at_large_bids
    ],
    'special_cases': {
        'champ_pulled_in': selection.champ_pulled_in,
        'displaced_team': {
            'team': selection.displaced_team['team'],
            'rank': int(selection.displaced_team['rank'])
        } if selection.displaced_team else None
    }
}

# Save JSON
json_path = brackets_dir / f'playoff_bracket_{year}_week{week}.json'
with open(json_path, 'w') as f:
    json.dump(playoff_json, f, indent=2)

# Save seeded field CSV
csv_path = exports_dir / f'playoff_field_{year}_week{week}.csv'
seeded_df.to_csv(csv_path, index=False)

print('‚úÖ Bracket and selection data exported:')
print(f'   HTML: {html_path}')
print(f'   ASCII: {ascii_path}')
print(f'   JSON: {json_path}')
print(f'   CSV: {csv_path}')
print(f'   Audit: {audit_path}')

‚úÖ Bracket and selection data exported:
   HTML: data/output/brackets/playoff_bracket_2025_week15.html
   ASCII: data/output/brackets/playoff_bracket_2025_week15.txt
   JSON: data/output/brackets/playoff_bracket_2025_week15.json
   CSV: data/output/exports/playoff_field_2025_week15.csv
   Audit: data/output/brackets/selection_audit_2025_week15.txt


---

## Summary

12-team College Football Playoff bracket complete!

**Outputs:**
- HTML bracket (open in browser for interactive view)
- JSON export (for external applications)
- Audit log (transparency on selection decisions)

**Next Steps:**
- `06_visualization_report.ipynb` - Stability analysis and error metrics
- `07_quick_simulator.ipynb` - Streamlined end-to-end analysis