In [27]:
# Cell 1: Imports
import pandas as pd
import os

# Helper to load & preview files
def preview_csv(path, n=5):
    df = pd.read_csv(path)
    print(f"\n📄 {os.path.basename(path)}")
    print(f"Columns: {len(df.columns)}")
    if "games.csv" in path:
        print(df.tail(n))
    else:
        print(df.head(n))

# 📘 Data Dictionary

Welcome to the College Football Data Starter Pack!

This notebook serves as a reference guide to help you understand what’s included in the ZIP archive and how the different datasets relate to each other.

Each dataset listed below is accompanied by a short description and a preview of the columns and data format.

## 🧭 Metadata Files

In [28]:
# conferences.csv
preview_csv("./data/conferences.csv")

# teams.csv
preview_csv("./data/teams.csv")


📄 conferences.csv
Columns: 3
                name abbreviation division
0  American Athletic          AAC      fbs
1               AAWU         AAWU      fbs
2                ACC          ACC      fbs
3       Atlantic Sun         ASUN      fcs
4        Atlantic 10        ATL10      fcs

📄 teams.csv
Columns: 21
     id     school abbreviation   nickname        mascot  \
0  2005  Air Force          AFA  Air Force       Falcons   
1  2006      Akron          AKR      Akron          Zips   
2   333    Alabama          ALA    Alabama  Crimson Tide   
3  2026  App State          APP  App State  Mountaineers   
4    12    Arizona         ARIZ    Arizona      Wildcats   

                full_name classification  conference_id     conference  \
0       Air Force Falcons            fbs             17  Mountain West   
1              Akron Zips            fbs             15   Mid-American   
2    Alabama Crimson Tide            fbs              8            SEC   
3  App State Mountaineers     

### `conferences.csv`
Basic info about FBS-level conferences.

- `name`: Full name of the conference
- `abbreviation`: Short label (e.g., SEC, Big Ten)
- `division`: NCAA division (typically "fbs")

---

### `teams.csv`
Master reference of all teams with full metadata.

- Includes `classification`, `conference`, home venue info, location, elevation, etc.

## 🏟️ Game-Level Data

In [25]:
# games.csv
preview_csv("./data/games.csv")


📄 games.csv
Columns: 33
               id  season season_type  week           start_date  \
106038  401762871    2025     regular    14  2025-11-28 00:00:00   
106039  401762872    2025     regular    14  2025-11-29 00:00:00   
106040  401762873    2025     regular     6  2025-10-04 00:00:00   
106041  401762874    2025     regular     9  2025-10-25 00:00:00   
106042  401762875    2025     regular    14  2025-11-29 00:00:00   

       start_time_tbd  neutral_site conference_game  attendance  venue_id  \
106038           True         False            True         NaN    3764.0   
106039           True         False            True         NaN    3786.0   
106040           True         False            True         NaN    3764.0   
106041           True         False            True         NaN    3996.0   
106042           True         False            True         NaN    3996.0   

        ... away_team_id         away_team  away_conference_id  \
106038  ...         2309        Kent 

  df = pd.read_csv(path)


In [26]:
# game_stats/2023.csv
preview_csv("./data/game_stats/2023.csv")


📄 2023.csv
Columns: 46
     game_id  season  week season_type home_away  team_id  \
0  401520145    2023     1     regular      away     2638   
1  401520145    2023     1     regular      home       55   
2  401520146    2023     1     regular      away     2229   
3  401520146    2023     1     regular      home     2348   
4  401520147    2023     1     regular      away       62   

                    team      conference  opponent_id               opponent  \
0                   UTEP  Conference USA           55     Jacksonville State   
1     Jacksonville State  Conference USA         2638                   UTEP   
2  Florida International  Conference USA         2348         Louisiana Tech   
3         Louisiana Tech  Conference USA         2229  Florida International   
4                Hawai'i   Mountain West          238             Vanderbilt   

   ... sacks tackles  tacklesForLoss  thirdDownEff totalFumbles  \
0  ...   1.0      33             4.0          3-12           

### `games.csv`
Master list of games from 1869–present. Includes team IDs, points, Elo ratings, win probabilities, venue info, and more.

---

### `game_stats/YYYY.csv`
Traditional box score stats per team per game (yardage, completions, sacks, etc.). Use with `games.csv` to connect metadata.

## 📊 Season-Level Stats

In [23]:
# season_stats/2023.csv
preview_csv("./data/season_stats/2023.csv")


📄 2023.csv
Columns: 66
   season       team     conference  firstDowns  firstDownsOpponent  \
0    2023  Air Force  Mountain West         255                 191   
1    2023      Akron   Mid-American         200                 214   
2    2023    Alabama            SEC         277                 228   
3    2023  App State       Sun Belt         332                 279   
4    2023    Arizona         Pac-12         299                 256   

   fourthDownConversions  fourthDownConversionsOpponent  fourthDowns  \
0                     17                             12           25   
1                      8                             12           23   
2                      5                             11            7   
3                     18                              8           28   
4                     10                              6           15   

   fourthDownsOpponent  fumblesLost  ...  tacklesForLoss  \
0                   22           11  ...              68

In [24]:
# advanced_season_stats/2023.csv
preview_csv("./data/advanced_season_stats/2023.csv")


📄 2023.csv
Columns: 82
   season       team     conference  offense_passingPlays_explosiveness  \
0    2023  Air Force  Mountain West                            2.308738   
1    2023      Akron   Mid-American                            1.328119   
2    2023    Alabama            SEC                            1.860456   
3    2023  App State       Sun Belt                            1.595906   
4    2023    Arizona         Big 12                            1.588850   

   offense_passingPlays_successRate  offense_passingPlays_totalPPA  \
0                          0.409091                      54.829427   
1                          0.357143                      29.299802   
2                          0.430518                     142.236419   
3                          0.494647                     206.380380   
4                          0.504202                     224.401777   

   offense_passingPlays_ppa  offense_passingPlays_rate  \
0                  0.498449                   

### `season_stats/YYYY.csv`
Raw season-long team stats (first downs, turnovers, penalties, etc.)

---

### `advanced_season_stats/YYYY.csv`
Custom-derived season metrics:
- EPA, explosiveness, success rates
- Down splits (standard vs passing)
- Field position, havoc, efficiency metrics

## 🧬 Game-Level Advanced Stats

In [13]:
# advanced_game_stats/2023.csv
preview_csv("./data/advanced_game_stats/2023.csv")


📄 2023.csv
Columns: 61
      gameId  season  week                   team               opponent  \
0  401520145    2023     1     Jacksonville State                   UTEP   
1  401520145    2023     1                   UTEP     Jacksonville State   
2  401520146    2023     1  Florida International         Louisiana Tech   
3  401520146    2023     1         Louisiana Tech  Florida International   
4  401520147    2023     1                Hawai'i             Vanderbilt   

   offense_passingPlays_explosiveness  offense_passingPlays_successRate  \
0                            1.347663                          0.285714   
1                            1.455518                          0.428571   
2                            0.000000                          0.000000   
3                            1.546979                          0.500000   
4                            1.763509                          0.542857   

   offense_passingPlays_totalPPA  offense_passingPlays_ppa  \
0     

### `advanced_game_stats/YYYY.csv`
Per-team, per-game advanced stats derived from play-by-play data.

Includes EPA, success rate, explosiveness, line yards, etc.

## 🔄 Play & Drive Data

In [21]:
# drives/2023.csv
preview_csv("./data/drives/drives_2023.csv")


📄 drives_2023.csv
Columns: 24
              offense offenseConference             defense defenseConference  \
0  Jacksonville State    Conference USA                UTEP    Conference USA   
1                UTEP    Conference USA  Jacksonville State    Conference USA   
2  Jacksonville State    Conference USA                UTEP    Conference USA   
3                UTEP    Conference USA  Jacksonville State    Conference USA   
4  Jacksonville State    Conference USA                UTEP    Conference USA   

      gameId          id  driveNumber  scoring  startPeriod  startYardline  \
0  401520145  4015201451            1    False            1             25   
1  401520145  4015201452            2    False            1             95   
2  401520145  4015201453            3     True            1             53   
3  401520145  4015201454            4    False            1             75   
4  401520145  4015201455            5     True            1             20   

   ...  endYa

In [22]:
# plays/2023/regular_13_plays.csv
preview_csv("./data/plays/2023/regular_13_plays.csv")


📄 regular_13_plays.csv
Columns: 27
                   id      driveId     gameId  driveNumber  playNumber  \
0  401524065102995204  40152406513  401524065           13           7   
1  401524065102995203  40152406513  401524065           13           6   
2  401524065102995202  40152406513  401524065           13           5   
3  401524065102997803  40152406513  401524065           13          10   
4  401524065102995201  40152406513  401524065           13           4   

      offense offenseConference  offenseScore   defense      home  ...  \
0  Notre Dame  FBS Independents            21  Stanford  Stanford  ...   
1  Notre Dame  FBS Independents            21  Stanford  Stanford  ...   
2  Notre Dame  FBS Independents            21  Stanford  Stanford  ...   
3  Notre Dame  FBS Independents            21  Stanford  Stanford  ...   
4  Notre Dame  FBS Independents            21  Stanford  Stanford  ...   

  yardline yardsToGoal  down  distance yardsGained  scoring  \
0       24 

### `drives/drives_YYYY.csv`
One row per drive. Includes periods, starting/ending yard lines, results, and score progression.

---

### `plays/YYYY/SEASONTYPE_WEEK_plays.csv`
Play-by-play data with down, distance, yardage, play type, and custom PPA metric.

## 🧩 Relationships & Tips

- Use `gameId` to join plays → drives → games → advanced stats
- Use `team_id` or `school` to join stats with `teams.csv`
- Normalize time columns like `possessionTime` if needed