# NFL Data Analysis Example

This notebook shows how to connect to the NFL database and perform basic analysis.

In [None]:
import duckdb
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path

# Set up plotting
plt.style.use('seaborn-v0_8')
sns.set_palette('husl')

In [None]:
# Connect to the NFL database
db_path = Path('../data/nfl.duckdb')
conn = duckdb.connect(str(db_path))

# Quick check - how much data do we have?
result = conn.execute('SELECT COUNT(*) as total_plays FROM pbp_all').fetchone()
print(f"Total plays in database: {result[0]:,}")

# Check seasons available
seasons = conn.execute('SELECT season, COUNT(*) as plays FROM pbp_all GROUP BY season ORDER BY season').fetchall()
print("\nSeasons available:")
for season, plays in seasons:
    print(f"  {season}: {plays:,} plays")

In [None]:
# Simple analysis: EPA by team across all seasons
query = """
SELECT 
    posteam as team,
    season,
    AVG(epa) as avg_epa,
    COUNT(*) as total_plays
FROM pbp_all 
WHERE posteam IS NOT NULL
GROUP BY posteam, season
ORDER BY season, avg_epa DESC
"""

df_epa = conn.execute(query).df()
print("Average EPA by team and season:")
print(df_epa.head(10))

In [None]:
# Visualize EPA trends over time
plt.figure(figsize=(12, 6))

# Group by season and team for line plot
for team in df_epa['team'].unique():
    team_data = df_epa[df_epa['team'] == team]
    plt.plot(team_data['season'], team_data['avg_epa'], marker='o', label=team)

plt.title('Average EPA by Team Over Seasons (2020-2024)')
plt.xlabel('Season')
plt.ylabel('Average EPA')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

In [None]:
# Close connection
conn.close()