# NFL Data Analysis Example

This notebook shows how to connect to the NFL database and perform basic analysis.

In [17]:
import duckdb
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path

# Set up plotting
plt.style.use('seaborn-v0_8')
sns.set_palette('husl')

In [23]:
# Connect to the NFL database
db_path = Path('../data/nfl.duckdb')
conn = duckdb.connect(str(db_path))

# Quick check - how much data do we have?
result = conn.execute('SELECT COUNT(*) as total_plays FROM pbp').fetchone()
print(f"Total plays in database: {result[0]:,}")

# Check seasons available
seasons = conn.execute('SELECT season, COUNT(*) as plays FROM pbp GROUP BY season ORDER BY season').fetchall()
print("\nSeasons available:")
for season, plays in seasons:
    print(f"  {season}: {plays:,} plays")

Total plays in database: 1,230,855

Seasons available:
  1999: 46,136 plays
  2000: 45,491 plays
  2001: 44,969 plays
  2002: 47,355 plays
  2003: 46,811 plays
  2004: 46,705 plays
  2005: 46,823 plays
  2006: 46,299 plays
  2007: 46,266 plays
  2008: 45,917 plays
  2009: 46,519 plays
  2010: 46,892 plays
  2011: 47,448 plays
  2012: 47,834 plays
  2013: 48,158 plays
  2014: 47,629 plays
  2015: 48,122 plays
  2016: 47,651 plays
  2017: 47,245 plays
  2018: 47,109 plays
  2019: 47,258 plays
  2020: 47,705 plays
  2021: 49,922 plays
  2022: 49,434 plays
  2023: 49,665 plays
  2024: 49,492 plays


In [22]:
# Close connection
conn.close()