# NFL Predictive Analysis

This notebook uses historical spread coverage analysis to identify today's games with high probability betting opportunities.

## Step 1: Get Today's Games

Fetch today's games from the Odds API with proper EST timezone handling.

In [1]:
import pandas as pd
import sys
import os
from pathlib import Path
from datetime import datetime, timedelta, timezone
import time

# Add src directory to path
project_root = Path().resolve().parent
sys.path.insert(0, str(project_root / 'src'))

from odds_api_client import OddsAPIClient, OddsAPIError
import config

print("Libraries imported successfully")

# Helper function for timezone parsing (used in multiple cells)
def parse_commence_time_to_est(commence_time):
    """Parse commence_time from API and convert to EST datetime"""
    try:
        event_time_utc = pd.to_datetime(commence_time)
        if event_time_utc.tzinfo is None:
            event_time_utc = event_time_utc.replace(tzinfo=timezone.utc)
        elif isinstance(event_time_utc, pd.Timestamp):
            if event_time_utc.tz is None:
                event_time_utc = event_time_utc.tz_localize('UTC').to_pydatetime()
            else:
                event_time_utc = event_time_utc.to_pydatetime()
        
        # Convert to EST
        est = timezone(timedelta(hours=-5))
        event_time_est = event_time_utc.astimezone(est)
        return event_time_est
    except Exception as e:
        return None

Libraries imported successfully


In [2]:
# Initialize API client
client = OddsAPIClient()
api_sport_key = config.get_sport_api_key('nfl')

# Get today's date in EST (matching our data collection pattern)
est = timezone(timedelta(hours=-5))
today_est = datetime.now(est).date()

print(f"Today's date (EST): {today_est}")
print(f"Looking for games scheduled for today...")

# Get upcoming games with odds
try:
    time.sleep(config.API_RATE_LIMIT_DELAY)
    endpoint = f"sports/{api_sport_key}/odds"
    params = {
        "regions": "us",
        "markets": "spreads",
        "oddsFormat": "american",
        "dateFormat": "iso"
    }
    
    upcoming_odds = client._make_request(endpoint, params)
    
    print(f"‚úì Fetched data from API")
    print(f"Type: {type(upcoming_odds)}")
    
    # Filter to only games happening today (using EST timezone)
    if isinstance(upcoming_odds, list):
        today_games = []
        for game in upcoming_odds:
            if not isinstance(game, dict):
                continue
            
            commence_time = game.get('commence_time')
            if commence_time:
                event_time_est = parse_commence_time_to_est(commence_time)
                if event_time_est and event_time_est.date() == today_est:
                    today_games.append(game)
        
        upcoming_odds = today_games
        print(f"Found {len(upcoming_odds)} games scheduled for today (EST)")
    elif isinstance(upcoming_odds, dict):
        print(f"Response keys: {list(upcoming_odds.keys())}")
        upcoming_odds = []
    
except Exception as e:
    print(f"Error: {e}")
    import traceback
    traceback.print_exc()
    upcoming_odds = []

Today's date (EST): 2025-12-12
Looking for games scheduled for today...
‚úì Fetched data from API
Type: <class 'list'>
Found 0 games scheduled for today (EST)


In [3]:
# Process and display today's games in a clean DataFrame
if upcoming_odds and len(upcoming_odds) > 0:
    games_data = []
    
    for game in upcoming_odds:
        if not isinstance(game, dict):
            continue
        
        # Extract basic game info
        event_id = game.get('id', '')
        home_team = game.get('home_team', '')
        away_team = game.get('away_team', '')
        commence_time = game.get('commence_time', '')
        
        # Parse commence_time using helper function
        event_time_est = parse_commence_time_to_est(commence_time)
        game_time_str = event_time_est.strftime('%Y-%m-%d %H:%M:%S %Z') if event_time_est else None
        
        # Extract current spread from DraftKings
        current_spread = None
        spread_odds = None
        bookmakers = game.get('bookmakers', [])
        
        for bookmaker in bookmakers:
            if 'draftkings' in bookmaker.get('key', '').lower():
                for market in bookmaker.get('markets', []):
                    if market.get('key') == 'spreads':
                        outcomes = market.get('outcomes', [])
                        if len(outcomes) >= 2:
                            # Find home team outcome
                            for outcome in outcomes:
                                outcome_name = outcome.get('name', '')
                                if home_team.lower() in outcome_name.lower() or outcome_name.lower() in home_team.lower():
                                    current_spread = outcome.get('point')
                                    spread_odds = outcome.get('price')
                                    break
                            
                            # If not found, use first outcome
                            if current_spread is None:
                                current_spread = outcomes[0].get('point')
                                spread_odds = outcomes[0].get('price')
                        break
                break
        
        games_data.append({
            'event_id': event_id,
            'game_time_est': game_time_str,
            'home_team': home_team,
            'away_team': away_team,
            'current_spread': current_spread,
            'spread_odds': spread_odds
        })
    
    # Create DataFrame
    df_today = pd.DataFrame(games_data)
    
    if len(df_today) > 0:
        print(f"\nToday's NFL Games ({len(df_today)} games):")
        print("="*80)
        print(df_today.to_string(index=False))
    else:
        print("No games data to display")
else:
    print("No games scheduled for today")
    df_today = pd.DataFrame()

No games scheduled for today


## Step 2: Load Historical Team Performance Data

Load the historical NFL data to see how each team has performed against the spread this season.

In [4]:
# Load historical NFL season results
historical_file = project_root / 'data' / 'results' / 'nfl_season_results.xlsx'

if historical_file.exists():
    df_historical = pd.read_excel(historical_file)
    print(f"‚úì Loaded {len(df_historical)} historical games")
    print(f"Date range: {df_historical['game_date'].min()} to {df_historical['game_date'].max()}")
    print(f"\nFirst few rows:")
    print(df_historical.head())
else:
    print(f"‚úó Historical data file not found at {historical_file}")
    df_historical = pd.DataFrame()

‚úì Loaded 209 historical games
Date range: 2025-09-04 00:00:00 to 2025-12-11 00:00:00

First few rows:
   game_date             home_team             away_team  closing_spread  \
0 2025-09-04   Philadelphia Eagles        Dallas Cowboys            -8.5   
1 2025-09-06  Los Angeles Chargers    Kansas City Chiefs             3.0   
2 2025-09-07  Jacksonville Jaguars     Carolina Panthers            -4.5   
3 2025-09-07       Atlanta Falcons  Tampa Bay Buccaneers             1.5   
4 2025-09-07    Indianapolis Colts        Miami Dolphins            -1.5   

   home_score  away_score  spread_result_difference  
0          24          20                      -4.5  
1          27          21                       9.0  
2          26          10                      11.5  
3          20          23                      -1.5  
4          33           8                      23.5  


## Step 3: Calculate Team Spread Coverage Statistics

For each team, calculate:
- How often they cover the spread (cover %)
- How many games they've played
- Their average spread_result_difference

In [5]:
# Calculate team spread coverage statistics
if len(df_historical) > 0:
    # Filter to only completed games with scores and spreads
    df_completed = df_historical[
        (df_historical['home_score'].notna()) & 
        (df_historical['away_score'].notna()) & 
        (df_historical['closing_spread'].notna()) &
        (df_historical['spread_result_difference'].notna())
    ].copy()
    
    print(f"Completed games with all data: {len(df_completed)}")
    
    # Calculate stats for each team (both as home and away)
    team_stats = []
    all_teams = set(df_completed['home_team'].unique()) | set(df_completed['away_team'].unique())
    
    for team in all_teams:
        # Games where team was home
        home_games = df_completed[df_completed['home_team'] == team].copy()
        # Games where team was away
        away_games = df_completed[df_completed['away_team'] == team].copy()
        
        # For home games: positive spread_result_difference = cover
        # For away games: negative spread_result_difference = cover
        home_covers = (home_games['spread_result_difference'] > 0).sum() if len(home_games) > 0 else 0
        away_covers = (away_games['spread_result_difference'] < 0).sum() if len(away_games) > 0 else 0
        
        total_games = len(home_games) + len(away_games)
        total_covers = home_covers + away_covers
        
        if total_games > 0:
            cover_pct = (total_covers / total_games) * 100
            avg_spread_diff = (
                (home_games['spread_result_difference'].sum() if len(home_games) > 0 else 0) +
                (away_games['spread_result_difference'].sum() * -1 if len(away_games) > 0 else 0)
            ) / total_games
            
            team_stats.append({
                'team': team,
                'total_games': total_games,
                'covers': total_covers,
                'non_covers': total_games - total_covers,
                'cover_pct': cover_pct,
                'avg_spread_diff': avg_spread_diff,
                'home_games': len(home_games),
                'away_games': len(away_games)
            })
    
    # Create DataFrame of team stats
    df_team_stats = pd.DataFrame(team_stats)
    df_team_stats = df_team_stats.sort_values('cover_pct', ascending=False)
    
    print(f"\n‚úì Calculated stats for {len(df_team_stats)} teams")
    print(f"\nTop 10 teams by cover percentage:")
    print(df_team_stats.head(10).to_string(index=False))
    
    print(f"\nBottom 10 teams by cover percentage:")
    print(df_team_stats.tail(10).to_string(index=False))
else:
    print("No historical data available")
    df_team_stats = pd.DataFrame()
    all_teams = set()

Completed games with all data: 209

‚úì Calculated stats for 32 teams

Top 10 teams by cover percentage:
                team  total_games  covers  non_covers  cover_pct  avg_spread_diff  home_games  away_games
    Seattle Seahawks           13      10           3  76.923077         8.576923           6           7
    Los Angeles Rams           13       9           4  69.230769         5.884615           6           7
New England Patriots           13       9           4  69.230769         6.115385           7           6
   Carolina Panthers           13       8           5  61.538462         1.115385           6           7
 San Francisco 49ers           13       8           5  61.538462         2.000000           5           8
Jacksonville Jaguars           13       8           5  61.538462         4.461538           7           6
       Detroit Lions           13       7           6  53.846154         2.307692           7           6
  Indianapolis Colts           13       7      

## Step 3.5: Calculate Team Coverage with 5-Point Handicap

Calculate each team's spread coverage when given a 5-point handicap. This helps identify teams that consistently beat the spread by significant margins.

In [6]:
# Calculate team spread coverage with 5-point handicap
if len(df_completed) > 0:
    team_stats_handicap_5 = []
    
    for team in all_teams:
        # Games where team was home
        home_games = df_completed[df_completed['home_team'] == team].copy()
        # Games where team was away
        away_games = df_completed[df_completed['away_team'] == team].copy()
        
        # For home games: adjust spread by +5 (making it easier to cover)
        home_covers_handicap = 0
        if len(home_games) > 0:
            adjusted_spread_result = home_games['spread_result_difference'] + 5
            home_covers_handicap = (adjusted_spread_result > 0).sum()
        
        # For away games: adjust spread by -5 (making it easier to cover)
        away_covers_handicap = 0
        if len(away_games) > 0:
            adjusted_spread_result = away_games['spread_result_difference'] - 5
            away_covers_handicap = (adjusted_spread_result < 0).sum()
        
        total_games = len(home_games) + len(away_games)
        total_covers_handicap = home_covers_handicap + away_covers_handicap
        
        if total_games > 0:
            cover_pct_handicap_5 = (total_covers_handicap / total_games) * 100
            
            team_stats_handicap_5.append({
                'team': team,
                'total_games': total_games,
                'covers_handicap_5': total_covers_handicap,
                'cover_pct_handicap_5': cover_pct_handicap_5
            })
    
    # Create DataFrame
    df_team_stats_handicap_5 = pd.DataFrame(team_stats_handicap_5)
    df_team_stats_handicap_5 = df_team_stats_handicap_5.sort_values('cover_pct_handicap_5', ascending=False)
    
    print(f"‚úì Calculated 5-point handicap stats for {len(df_team_stats_handicap_5)} teams")
    print(f"\nTop 10 teams by 5-point handicap cover percentage:")
    print(df_team_stats_handicap_5.head(10).to_string(index=False))
else:
    print("No historical data available")
    df_team_stats_handicap_5 = pd.DataFrame()

‚úì Calculated 5-point handicap stats for 32 teams

Top 10 teams by 5-point handicap cover percentage:
                team  total_games  covers_handicap_5  cover_pct_handicap_5
    Los Angeles Rams           13                 11             84.615385
      Houston Texans           13                 11             84.615385
    Seattle Seahawks           13                 11             84.615385
New England Patriots           13                 11             84.615385
       Chicago Bears           13                 11             84.615385
  Indianapolis Colts           13                 10             76.923077
Jacksonville Jaguars           13                 10             76.923077
   Green Bay Packers           13                  9             69.230769
   Arizona Cardinals           13                  9             69.230769
      Miami Dolphins           13                  9             69.230769


## Step 4: Connect Today's Games with Historical Performance

Merge today's games with each team's historical spread coverage statistics (both standard and 5-point handicap).

In [7]:
# Merge today's games with team statistics (including 5-point handicap)
if len(df_today) > 0 and len(df_team_stats) > 0:
    games_with_stats = df_today.copy()
    
    # Merge home team standard stats
    games_with_stats = games_with_stats.merge(
        df_team_stats[['team', 'cover_pct', 'total_games', 'avg_spread_diff']],
        left_on='home_team',
        right_on='team',
        how='left'
    )
    games_with_stats = games_with_stats.rename(columns={
        'cover_pct': 'home_cover_pct',
        'total_games': 'home_total_games',
        'avg_spread_diff': 'home_avg_spread_diff'
    })
    games_with_stats = games_with_stats.drop(columns=['team'])
    
    # Merge home team 5-point handicap stats
    if len(df_team_stats_handicap_5) > 0:
        games_with_stats = games_with_stats.merge(
            df_team_stats_handicap_5[['team', 'cover_pct_handicap_5']],
            left_on='home_team',
            right_on='team',
            how='left'
        )
        games_with_stats = games_with_stats.rename(columns={'cover_pct_handicap_5': 'home_cover_pct_handicap_5'})
        games_with_stats = games_with_stats.drop(columns=['team'])
    
    # Merge away team standard stats
    games_with_stats = games_with_stats.merge(
        df_team_stats[['team', 'cover_pct', 'total_games', 'avg_spread_diff']],
        left_on='away_team',
        right_on='team',
        how='left'
    )
    games_with_stats = games_with_stats.rename(columns={
        'cover_pct': 'away_cover_pct',
        'total_games': 'away_total_games',
        'avg_spread_diff': 'away_avg_spread_diff'
    })
    games_with_stats = games_with_stats.drop(columns=['team'])
    
    # Merge away team 5-point handicap stats
    if len(df_team_stats_handicap_5) > 0:
        games_with_stats = games_with_stats.merge(
            df_team_stats_handicap_5[['team', 'cover_pct_handicap_5']],
            left_on='away_team',
            right_on='team',
            how='left'
        )
        games_with_stats = games_with_stats.rename(columns={'cover_pct_handicap_5': 'away_cover_pct_handicap_5'})
        games_with_stats = games_with_stats.drop(columns=['team'])
    
    # Calculate advantage metrics
    games_with_stats['home_advantage'] = games_with_stats['home_cover_pct'] - games_with_stats['away_cover_pct']
    if 'home_cover_pct_handicap_5' in games_with_stats.columns and 'away_cover_pct_handicap_5' in games_with_stats.columns:
        games_with_stats['home_handicap_advantage'] = games_with_stats['home_cover_pct_handicap_5'] - games_with_stats['away_cover_pct_handicap_5']
    
    print(f"\n‚úì Merged today's {len(games_with_stats)} games with team statistics")
    print(f"\nToday's Games with Team Stats:")
    print("="*100)
    
    # Display key columns
    display_cols = ['game_time_est', 'away_team', 'home_team', 'current_spread', 
                    'away_cover_pct', 'home_cover_pct', 'home_advantage']
    if 'home_cover_pct_handicap_5' in games_with_stats.columns:
        display_cols.extend(['away_cover_pct_handicap_5', 'home_cover_pct_handicap_5', 'home_handicap_advantage'])
    print(games_with_stats[display_cols].to_string(index=False))
else:
    print("Cannot merge: Missing today's games or team statistics")
    games_with_stats = pd.DataFrame()

Cannot merge: Missing today's games or team statistics


## Step 5: Identify Potential Betting Opportunities

**Focus: Games where home team has better 5-point handicap coverage than away team**

This analysis identifies games where the home team's spread coverage with a 5-point handicap
is higher than the away team's. For each game, we display:
- The matchup and current spread
- Home team's 5-point handicap cover percentage
- Away team's 5-point handicap cover percentage
- The difference between them (home team advantage)

**Note:** This is just one factor to consider. Always do your own research before betting!

In [8]:
# Identify potential betting opportunities
# Focus: Games where home team has better 5-point handicap coverage than away team

if len(games_with_stats) > 0:
    # Filter for games where we have enough data (at least 5 games per team)
    games_with_enough_data = games_with_stats[
        (games_with_stats['home_total_games'] >= 5) & 
        (games_with_stats['away_total_games'] >= 5)
    ].copy()
    
    print(f"Games with sufficient historical data (5+ games per team): {len(games_with_enough_data)}")
    
    # Check if handicap stats are available
    if 'home_cover_pct_handicap_5' in games_with_enough_data.columns and 'away_cover_pct_handicap_5' in games_with_enough_data.columns:
        # Filter for games where home team has better 5-point handicap coverage
        home_handicap_better = games_with_enough_data[
            (games_with_enough_data['home_cover_pct_handicap_5'].notna()) &
            (games_with_enough_data['away_cover_pct_handicap_5'].notna()) &
            (games_with_enough_data['home_cover_pct_handicap_5'] > games_with_enough_data['away_cover_pct_handicap_5'])
        ].copy()
        
        if len(home_handicap_better) > 0:
            # Calculate the difference
            home_handicap_better['handicap_pct_difference'] = (
                home_handicap_better['home_cover_pct_handicap_5'] - 
                home_handicap_better['away_cover_pct_handicap_5']
            )
            
            # Sort by biggest difference (most favorable for home team)
            home_handicap_better = home_handicap_better.sort_values('handicap_pct_difference', ascending=False)
            
            print(f"\n{'='*100}")
            print(f"üè† HOME TEAM HAS BETTER 5-POINT HANDICAP COVERAGE ({len(home_handicap_better)} games)")
            print(f"{'='*100}")
            print("These games show where the home team's spread coverage with a 5-point handicap")
            print("is better than the away team's. This suggests the home team consistently")
            print("beats spreads by larger margins.")
            print()
            
            # Display each game with detailed information
            for idx, (row_idx, row) in enumerate(home_handicap_better.iterrows(), 1):
                print(f"\nGame {idx}:")
                print(f"  Time: {row['game_time_est']}")
                print(f"  Matchup: {row['away_team']} @ {row['home_team']}")
                print(f"  Current Spread: {row['current_spread']}")
                print(f"  Home Team 5-Point Handicap Cover %: {row['home_cover_pct_handicap_5']:.2f}%")
                print(f"  Away Team 5-Point Handicap Cover %: {row['away_cover_pct_handicap_5']:.2f}%")
                print(f"  Difference: {row['handicap_pct_difference']:.2f}% (Home team advantage)")
                print("  " + "-"*96)
            
            # Also show as a table for easy comparison
            print(f"\n\nSummary Table:")
            print("="*100)
            display_cols = ['game_time_est', 'away_team', 'home_team', 'current_spread',
                            'away_cover_pct_handicap_5', 'home_cover_pct_handicap_5', 'handicap_pct_difference']
            
            # Format the table nicely
            summary_df = home_handicap_better[display_cols].copy()
            summary_df['away_cover_pct_handicap_5'] = summary_df['away_cover_pct_handicap_5'].round(2)
            summary_df['home_cover_pct_handicap_5'] = summary_df['home_cover_pct_handicap_5'].round(2)
            summary_df['handicap_pct_difference'] = summary_df['handicap_pct_difference'].round(2)
            
            # Rename columns for better readability
            summary_df = summary_df.rename(columns={
                'game_time_est': 'Game Time (EST)',
                'away_team': 'Away Team',
                'home_team': 'Home Team',
                'current_spread': 'Spread',
                'away_cover_pct_handicap_5': 'Away 5pt Handicap %',
                'home_cover_pct_handicap_5': 'Home 5pt Handicap %',
                'handicap_pct_difference': 'Difference (%)'
            })
            
            print(summary_df.to_string(index=False))
            
            print(f"\n\nTotal games found: {len(home_handicap_better)}")
            print(f"Average difference: {home_handicap_better['handicap_pct_difference'].mean():.2f}%")
            print(f"Largest difference: {home_handicap_better['handicap_pct_difference'].max():.2f}%")
            print(f"Smallest difference: {home_handicap_better['handicap_pct_difference'].min():.2f}%")
        else:
            print(f"\n{'='*100}")
            print("No games found where home team has better 5-point handicap coverage")
            print(f"{'='*100}")
    else:
        print(f"\n{'='*100}")
        print("5-point handicap stats not available")
        print("Make sure Step 3.5 ran successfully to calculate handicap statistics")
        print(f"{'='*100}")
        
else:
    print("No games data available for analysis")

No games data available for analysis
