In [1]:
import os
import requests
import pandas as pd
import numpy as np

CFBD_API_KEY = 'Your Key Here'

In [62]:
BASE_URL = "https://api.collegefootballdata.com"

def cfbd_get(path: str, params: dict, api_key: str | None = None, timeout: int = 60):
    api_key = api_key or os.environ.get("CFBD_API_KEY")
    if not api_key:
        raise RuntimeError("Missing CFBD_API_KEY env var (or pass api_key=...).")

    headers = {
        "Authorization": f"Bearer {api_key}",
        "accept": "application/json",
    }

    url = f"{BASE_URL}{path}"
    r = requests.get(url, headers=headers, params=params, timeout=timeout)
    r.raise_for_status()
    return r.json()

def fetch_games(year: int = 2025, season_type: str = "regular") -> pd.DataFrame:
    data = cfbd_get("/games", params={"year": year, "seasonType": season_type}, api_key=CFBD_API_KEY)
    return pd.DataFrame(data)

def fetch_games_2025_all() -> pd.DataFrame:
    reg = fetch_games(2025, "regular")
    post = fetch_games(2025, "postseason")
    df = pd.concat([reg, post], ignore_index=True)


    return df

df_games_raw = fetch_games_2025_all()


In [63]:
######################
## Construct Points ##
######################

# Drop any missing points
df_games_raw = df_games_raw.dropna(subset=['homePoints', 'awayPoints', 'venue'])

# Drop Division II and Division III games
df_games_raw = df_games_raw[~df_games_raw["homeClassification"].isin(["ii", "iii"])].copy()
df_games_raw = df_games_raw[~df_games_raw["awayClassification"].isin(["ii", "iii"])].copy()

# Drop if postseason
df_games_raw = df_games_raw[~df_games_raw['seasonType'].isin(['postseason'])].copy()

df_games_points = df_games_raw.copy()

# Construct points
df_games_points['y_home'] = df_games_points['homePoints']
df_games_points['y_away'] = df_games_points['awayPoints']


In [64]:
##########################################
## Construct Numerical Team Identifiers ##
##########################################

# Sort by team
teams = pd.Index(sorted(set(df_games_raw['homeTeam']).union(df_games_raw['awayTeam'])))

# Team --> numerical index
team_to_idx = {t: i for i, t in enumerate(teams)}

# Numerical index --> team
idx_to_team = {i: t for t, i in team_to_idx.items()}

# Create a column that indexes which team (numerically) is playing in each game
home_idx = df_games_raw['homeTeam'].map(team_to_idx).to_numpy()
away_idx = df_games_raw['awayTeam'].map(team_to_idx).to_numpy()

# Number of teams tracked
n_teams = len(teams)

# Indicate whether this game was a conference game
conf_game = df_games_raw["conferenceGame"].astype(int).to_numpy()


In [65]:
#############################################
## Construct Numerical Stadium Identifiers ##
#############################################

# Pull unique stadiums
stadiums = pd.Index(sorted(df_games_raw['venue'].dropna().unique()))

# Stadium --> numerical index
stadium_to_idx = {s: i for i, s in enumerate(stadiums)}

# Numerical index --> stadium
idx_to_stadium = {i: s for s, i in stadium_to_idx.items()}

# Create a column that index which stadium the game is at
stadium_idx = df_games_raw['venue'].map(stadium_to_idx).to_numpy().astype(int)

# Number of stadiums
n_stadiums = len(stadiums)

# Track whether the site was a neutral site
neutral = df_games_raw['neutralSite'].astype(int).to_numpy()


In [66]:
###########################
## Note FBS-FCS Matchups ##
###########################

# Dummy for if home/away is an fcs team
home_is_fcs = (df_games_raw['homeClassification'] == 'fcs').astype(int).to_numpy()
away_is_fcs = (df_games_raw['awayClassification'] == 'fcs').astype(int).to_numpy()

# Dummies for if the matchup is FBS vs. FCS
home_fbs_vs_fcs = ((home_is_fcs == 0) & (away_is_fcs == 1)).astype(int)
home_fcs_vs_fbs = ((home_is_fcs == 1) & (away_is_fcs == 0)).astype(int)

away_fbs_vs_fcs = ((away_is_fcs == 0) & (home_is_fcs == 1)).astype(int)
away_fcs_vs_fbs = ((away_is_fcs == 1) & (home_is_fcs == 0)).astype(int)

# Combine into data vector
mismatch_home = ((home_is_fcs == 0) & (away_is_fcs == 1)).astype(int) - ((home_is_fcs == 1) & (away_is_fcs == 0)).astype(int)
mismatch_away = -mismatch_home

fcs_fcs = ((home_is_fcs == 1) & (away_is_fcs == 1)).astype(int)


In [67]:
################
## Note Weeks ##
################

# Create week index
week_idx = df_games_raw['week'].to_numpy().astype(int)
week_idx = week_idx - week_idx.min()


In [68]:
######################
## Note Conferences ##
######################

df_games_raw['homeConference'] = df_games_raw['homeConference'].fillna('Independent')
df_games_raw['awayConference'] = df_games_raw['awayConference'].fillna('Independent')

# Pull unique conferences
conferences = pd.Index(sorted(set(df_games_raw['homeConference']).union(df_games_raw['awayConference'])))

# Conference --> numerical index
conference_to_idx = {t: i for i, t in enumerate(conferences)}

# Numerical index --> conference
idx_to_conference = {i: t for t, i in conference_to_idx.items()}

# Create a column that indexes which conference (numerically) is playing in each game
home_conf_idx = df_games_raw['homeConference'].map(conference_to_idx).to_numpy()
away_conf_idx = df_games_raw['awayConference'].map(conference_to_idx).to_numpy()

# Number of conferences tracked
n_conferences = len(conferences)



In [72]:
###########################
## Build Final Dataframe ##
###########################

# Model specific data
model_df = pd.DataFrame({'home_idx': home_idx, 'away_idx': away_idx, 'y_home': df_games_points['y_home'].to_numpy(), 'y_away': df_games_points['y_away'].to_numpy(), 'stadium_idx': stadium_idx, 'neutral': neutral, 'conf_game': conf_game, 'mismatch_home': mismatch_home, 'mismatch_away': mismatch_away, 'week_idx': week_idx, 'fcs_fcs': fcs_fcs, 'home_conf_idx': home_conf_idx, 'away_conf_idx': away_conf_idx})

keep = ~(model_df["fcs_fcs"].astype(bool))
model_df = model_df[keep].copy()
model_df.reset_index(drop=True, inplace=True)

model_df.to_csv('Data/cfb_2025_model_input.csv', index=False)

# Crosswalks
team_map_df = pd.DataFrame({'team_idx': list(idx_to_team.keys()), 'team_name': list(idx_to_team.values()),}).sort_values('team_idx')
team_map_df.to_csv('Data/cfb_2025_team_map.csv', index=False)

stadium_map_df = pd.DataFrame({'stadium_idx': list(idx_to_stadium.keys()), 'stadium_key': list(idx_to_stadium.values()),}).sort_values('stadium_idx')
stadium_map_df.to_csv('Data/cfb_2025_stadium_map.csv', index=False)
