In [1]:
import sqlite3
from IPython.display import display
import pandas as pd
from tabulate import tabulate

In [2]:
!rm -rf data
!cp -r ../Scrapers/data .
!cp ../Scrapers/nfl.db data/

# BETTING TRENDS

## Performance as Favorite

## Performance as Underdog

In [4]:
# Loop all teams and saving to files ^

# Function to get underdog games
def get_underdog_games(team_name, seasons, db_path):
    # Connect to the database
    conn = sqlite3.connect(db_path)
    
    # Prepare a query to get all games for the given team where they were the underdog
    query = f"""
    SELECT season, week, away_team, home_team, spread_line, team_favorite, result, home_score, away_score 
    FROM Games
    WHERE season IN ({','.join(map(str, seasons))})
    AND (home_team = '{team_name}' OR away_team = '{team_name}')
    AND team_favorite != '{team_name}';
    """
    
    # Execute query
    games = conn.execute(query).fetchall()
    conn.close()
    
    # Create a DataFrame
    columns = ['season', 'week', 'away_team', 'home_team', 'spread_line', 'team_favorite', 'result', 'home_score', 'away_score']
    games_df = pd.DataFrame(games, columns=columns)
    
    # Calculate the point differential and whether the team covered the spread
    games_df['point_differential'] = games_df.apply(lambda row: row['away_score'] - row['home_score'] if row['away_team'] == team_name else row['home_score'] - row['away_score'], axis=1)
    games_df['ats_differential'] = games_df.apply(lambda row: (row['point_differential'] + float(row['spread_line'])) if row['team_favorite'] != team_name else (row['point_differential'] - float(row['spread_line'])), axis=1)
    games_df['spread_covered'] = games_df['ats_differential'] > 0
    
    return games_df

# Function to summarize ATS record and return data for CSV
def summarize_ats_record_for_csv(team_name, seasons, db_path):
    underdog_games_df = get_underdog_games(team_name, seasons, db_path)
    
    results = []
    
    for _, game in underdog_games_df.iterrows():
        # Construct the result as a dictionary for later CSV storage
        result = {
            'season': game['season'],
            'week': game['week'],
            'team': team_name,
            'opponent': game['home_team'] if game['away_team'] == team_name else game['away_team'],
            'team_score': game['away_score'] if game['away_team'] == team_name else game['home_score'],
            'opponent_score': game['home_score'] if game['away_team'] == team_name else game['away_score'],
            'spread': game['spread_line'],
            'team_favorite': game['team_favorite'],
            'ats_result': 'WIN' if game['spread_covered'] else 'LOSE',
            'ats_differential': game['ats_differential'],
            'straight_up_result': 'WIN' if game['point_differential'] > 0 else 'LOSE'
        }
        results.append(result)
    
    return results

# Function to save the results for all teams into a CSV
def save_all_teams_to_csv(teams, seasons, db_path, csv_file_path):
    all_results = []
    for team in teams:
        team_results = summarize_ats_record_for_csv(team, seasons, db_path)
        all_results.extend(team_results)  # Accumulate the results for all teams
    
    # Convert the list of results to a DataFrame
    df = pd.DataFrame(all_results)
    
    # Save DataFrame to CSV
    df.to_csv(csv_file_path, index=False)
    print(f"All teams seasons data saved to {csv_file_path}\n\n")

# Example usage
db_path = 'data/nfl.db'  # Update this with the correct path to your database file
teams = [
    'ARI', 'ATL', 'BAL', 'BUF', 'CAR', 'CHI', 'CIN', 'CLE',
    'DAL', 'DEN', 'DET', 'GB', 'HOU', 'IND', 'JAX', 'KC',
    'LV', 'LAC', 'LAR', 'MIA', 'MIN', 'NE', 'NO', 'NYG',
    'NYJ', 'PHI', 'PIT', 'SF', 'SEA', 'TB', 'TEN', 'WAS'
]  # List of all NFL teams
seasons = [2021, 2022, 2023]  # Specify the seasons you want to analyze
csv_file_path = 'nfl_game_summaries.csv'  # Path to save the CSV file

# Save the results to a CSV
save_all_teams_to_csv(teams, seasons, db_path, csv_file_path)

# Load the CSV file
df = pd.read_csv('nfl_game_summaries.csv')

# Set the team name directly
# team_name = "DAL"  # Replace with the desired team name
team_name = "BAL"  # Replace with the desired team name
print(f'------------------------------------------ {team_name} Underdog Results 2021-2023 ------------------------------------------')

# Filter the data for the given team
team_data = df[df['team'] == team_name].sort_values(by=['season', 'week'])

# Variables to store win/loss counts
ats_wins, ats_losses = 0, 0
straight_up_wins, straight_up_losses = 0, 0

current_season = None
for _, row in team_data.iterrows():
    # Check if the season has changed to print it
    if row['season'] != current_season:
        if current_season is not None:
            print()  # Print a newline before starting a new season
        current_season = row['season']
        print(f"{row['season']}:")

    # Determine if the team is home or away and format the spread
    home_or_away = "vs" if row['team_favorite'] == team_name else "@"
    spread_info = f"{abs(row['spread'])} ({row['team_favorite']} favored)" if row['spread'] > 0 else f"{abs(row['spread'])} ({row['team_favorite']} favored)"
    
    # Format the game result
    result = f"Week {str(row['week']).zfill(2)}: {row['team']} {home_or_away} {row['opponent']}, spread: {spread_info}, "
    score_result = f"result: {row['team']} {row['straight_up_result'].upper()} {row['team_score']}-{row['opponent_score']}, "
    straight_up = f"STRAIGHT-UP: {row['straight_up_result'].upper()}"
    ats = f"ATS: {row['ats_result'].upper()}, ATS Diff: {row['ats_differential']:+.1f}"

    # Print the formatted result
    print(f"{result}{score_result}{straight_up}, {ats}")

    # Count wins and losses
    if row['ats_result'].lower() == 'win':
        ats_wins += 1
    else:
        ats_losses += 1

    if row['straight_up_result'].lower() == 'win':
        straight_up_wins += 1
    else:
        straight_up_losses += 1

# Print final ATS and straight-up records
print()
print(f"\033[1mATS Record: {ats_wins}-{ats_losses}\033[0m")
print(f"\033[1mStraight-up Record: {straight_up_wins}-{straight_up_losses}\033[0m")

All teams seasons data saved to nfl_game_summaries.csv


------------------------------------------ BAL Underdog Results 2021-2023 ------------------------------------------
2021:
Week 02: BAL @ KC, spread: 3.5 (KC favored), result: BAL WIN 36.0-35.0, STRAIGHT-UP: WIN, ATS: LOSE, ATS Diff: -2.5
Week 04: BAL @ DEN, spread: 1.0 (DEN favored), result: BAL WIN 23.0-7.0, STRAIGHT-UP: WIN, ATS: WIN, ATS Diff: +17.0
Week 14: BAL @ CLE, spread: 3.0 (CLE favored), result: BAL LOSE 22.0-24.0, STRAIGHT-UP: LOSE, ATS: WIN, ATS Diff: +1.0
Week 15: BAL @ GB, spread: 9.5 (GB favored), result: BAL LOSE 30.0-31.0, STRAIGHT-UP: LOSE, ATS: LOSE, ATS Diff: -10.5
Week 16: BAL @ CIN, spread: 7.5 (CIN favored), result: BAL LOSE 21.0-41.0, STRAIGHT-UP: LOSE, ATS: LOSE, ATS Diff: -12.5
Week 17: BAL @ LAR, spread: 7.0 (LAR favored), result: BAL LOSE 19.0-20.0, STRAIGHT-UP: LOSE, ATS: LOSE, ATS Diff: -8.0

2022:
Week 04: BAL @ BUF, spread: 3.0 (BUF favored), result: BAL LOSE 20.0-23.0, STRAIGHT-UP: LOSE, ATS: LO

---

# TEAM TRENDS

# Defense

In [None]:
# Loop and rank all teams 2020-2024 seasons

# Function to calculate total defensive yards allowed for a given team and season
def calculate_team_total_yards_defensive_stats(team_abbreviation, season, data_df):
    # Filter for the specified season by extracting the season from the 'game_id' column
    data_df['season'] = data_df['game_id'].str.slice(0, 4).astype(int)

    # # Filter the data for the specific season
    # if season == 2024:
    #     # For the current season, only consider games played so far
    #     current_week = data_df[data_df['season'] == season]['game_id'].str.slice(5, 7).astype(int).max()  # Get the latest week
    #     season_games = data_df[(data_df['season'] == season) & (data_df['game_id'].str.slice(5, 7).astype(int) <= current_week)]
    # else:
    #     season_games = data_df[data_df['season'] == season]
    # Filter the data for the specific season
    if season == 2024:
        # Filter out unplayed games based on the key stats columns (assuming unplayed games have all zeros)
        season_games = data_df[(data_df['season'] == season) &
                               (data_df[['away_rush_yds', 'away_pass_yds', 'home_rush_yds', 'home_pass_yds']].sum(axis=1) > 0)]
    else:
        season_games = data_df[data_df['season'] == season]


    # Initialize lists for away and home games
    away_games = []
    home_games = []

    # Loop through each game and split game_id to determine home/away status
    for index, row in season_games.iterrows():
        game_id_parts = row['game_id'].split('_')
        game_season, week, away_team, home_team = game_id_parts

        # Convert the week to an integer to filter weeks 01-18
        week_num = int(week)

        if week_num > 18:
            continue  # Skip postseason weeks

        if away_team == team_abbreviation:
            away_games.append(row)  # The team was the away team in this game
        elif home_team == team_abbreviation:
            home_games.append(row)  # The team was the home team in this game

    # Convert lists to DataFrames for easier processing
    away_games_df = pd.DataFrame(away_games)
    home_games_df = pd.DataFrame(home_games)

    # Convert lists to DataFrames for easier processing
    away_games_df = pd.DataFrame(away_games)
    home_games_df = pd.DataFrame(home_games)

    if not away_games_df.empty and all(col in away_games_df for col in ['home_rush_yds', 'home_pass_yds']):
        away_defense_stats = away_games_df[['home_rush_yds', 'home_pass_yds']].sum()
    else:
        away_defense_stats = pd.Series({'home_rush_yds': 0, 'home_pass_yds': 0})
    
    if not home_games_df.empty and all(col in home_games_df for col in ['away_rush_yds', 'away_pass_yds']):
        home_defense_stats = home_games_df[['away_rush_yds', 'away_pass_yds']].sum()
    else:
        home_defense_stats = pd.Series({'away_rush_yds': 0, 'away_pass_yds': 0})

    # # For home games, we want the stats from the "away" columns (opponent is away team)
    # home_defense_stats = home_games_df[['away_rush_yds', 'away_pass_yds']].sum()

    # # For away games, we want the stats from the "home" columns (opponent is home team)
    # away_defense_stats = away_games_df[['home_rush_yds', 'home_pass_yds']].sum()

    # Combine rushing and passing yards for all games
    total_rush_yards_allowed = home_defense_stats['away_rush_yds'] + away_defense_stats['home_rush_yds']
    total_pass_yards_allowed = home_defense_stats['away_pass_yds'] + away_defense_stats['home_pass_yds']

    # Calculate total yards allowed (rushing + passing)
    total_yards_allowed = total_rush_yards_allowed + total_pass_yards_allowed

    # Number of games played by the team
    num_games = len(home_games_df) + len(away_games_df)

    # Calculate averages for total yards allowed
    defensive_summary = {
        "avg_total_yards_allowed": total_yards_allowed / num_games if num_games > 0 else 0,
        "avg_rush_yards_allowed": total_rush_yards_allowed / num_games if num_games > 0 else 0,
        "avg_pass_yards_allowed": total_pass_yards_allowed / num_games if num_games > 0 else 0,
        "season": season
    }

    return defensive_summary

# Load the dataset (assuming it's located in 'data/' folder)
file_path = 'data/all_team_game_logs.csv'
team_game_logs_df = pd.read_csv(file_path)

# List of all team abbreviations
teams = ['ARI', 'ATL', 'BAL', 'BUF', 'CAR', 'CHI', 'CIN', 'CLE', 'DAL', 'DEN', 
         'DET', 'GB', 'HOU', 'IND', 'JAX', 'KC', 'LAC', 'LAR', 'LVR', 'MIA', 
         'MIN', 'NE', 'NO', 'NYG', 'NYJ', 'PHI', 'PIT', 'SEA', 'SF', 'TB', 'TEN', 'WAS']

# List to hold defensive stats for all teams
defensive_stats_list = []

# Loop through the seasons 2020 to 2023, and the played games in 2024
for season in range(2020, 2025):
    for team in teams:
        stats = calculate_team_total_yards_defensive_stats(team, season, team_game_logs_df)
        stats['team'] = team  # Add team abbreviation to the stats
        defensive_stats_list.append(stats)

# # Handle the 2024 season for only played games
# for team in teams:
#     stats = calculate_team_total_yards_defensive_stats(team, 2024, team_game_logs_df)
#     stats['team'] = team  # Add team abbreviation to the stats
#     defensive_stats_list.append(stats)

# Create a DataFrame from the defensive stats list
defensive_stats_df = pd.DataFrame(defensive_stats_list)

# Insert 'team' column at the first position
defensive_stats_df.insert(0, 'team', defensive_stats_df.pop('team'))

# Sort the DataFrame by average total yards allowed in descending order
sorted_defensive_stats_df = defensive_stats_df.sort_values(by='avg_pass_yards_allowed', ascending=False)
# sorted_defensive_stats_df = defensive_stats_df.sort_values(by=['season'], ascending=[True])

# Reset index and drop the old index
sorted_defensive_stats_df.reset_index(drop=True, inplace=True)

# Add a new column for row numbers
# sorted_defensive_stats_df.insert(0, 'No', sorted_defensive_stats_df.index + 1)

# Save to CSV without an index
sorted_defensive_stats_df.to_csv('data/team_defense_analysis_2020_2024.csv', index=False)

# Display the sorted DataFrame using Tabulate
# print(tabulate(sorted_defensive_stats_df, headers='keys', tablefmt='psql', showindex=False))

# Print confirmation message
print("Defensive stats for all teams have been saved to 'team_defense_analysis_2020_2024.csv'.")

# Read the saved CSV file
final_df = pd.read_csv('data/team_defense_analysis_2020_2024.csv')

# Filter the DataFrame to include only the 2024 season
df_2024 = final_df[final_df['season'] == 2024].copy()

# Reset index for the filtered DataFrame (optional, for cleaner display)
df_2024.reset_index(drop=True, inplace=True)

# Add a new 'No' column specific to the 2024 data (optional)
# df_2024.insert(0, 'No', df_2024.index + 1)

# Display only the 2024 DataFrame using Tabulate
print('\n                                               2024 STATS')
print(tabulate(df_2024, headers='keys', tablefmt='psql', showindex=False))

# Print confirmation message
# print("Defensive stats for all teams have been saved to 'team_defense_analysis_2020_2024.csv'.")

# Filter the DataFrame to include only the 2023 season
df_2023 = final_df[final_df['season'] == 2023].copy()

# Reset index for the filtered DataFrame (optional, for cleaner display)
df_2023.reset_index(drop=True, inplace=True)

# Add a new 'No' column specific to the 2024 data (optional)
# df_2024.insert(0, 'No', df_2024.index + 1)

# Display only the 2024 DataFrame using Tabulate
print('\n                                               2023 STATS')
print(tabulate(df_2023, headers='keys', tablefmt='psql', showindex=False))

# Print confirmation message
# print("Defensive stats for all teams have been saved to 'team_defense_analysis_2020_2024.csv'.")

# !open data/team_defense_analysis_2020_2024.csv

## Sacks

In [None]:
# Sacks Given & Taken

years = [2021, 2022, 2023, 2024]

file_path = 'data/all_team_game_logs.csv'
df = pd.read_csv(file_path)

unplayed_games = df[
    df['game_id'].str.contains('2024') &  # Check if 'game_id' contains "2024"
    ((df['home_pts_off'].isnull() | (df['home_pts_off'] == 0)) &
     (df['away_pts_off'].isnull() | (df['away_pts_off'] == 0)))
]
unplayed_game_ids = unplayed_games['game_id'].tolist()
df = df[~df['game_id'].isin(unplayed_game_ids)]
# df.to_csv('data/all_team_game_logs.csv', index=False)
print("Unplayed games removed and updated CSV saved.")

# Extract year and week from 'game_id'
df[['year', 'week', 'away_team', 'home_team']] = df['game_id'].str.split('_', expand=True).iloc[:, :4]
df['year'] = df['year'].astype(int)
df['week'] = df['week'].astype(int)

for year in years:
    # Filter for the 2023 season and weeks <= 18
    df_2023 = df[(df['year'] == year) & (df['week'] <= 18)]
    
    # Initialize a dictionary to track sacks made and sacks taken
    sack_stats = {
        'team': [],
        'sacks_made': [],
        'sacks_taken': []
    }
    
    # List of all 32 NFL teams
    teams = [
        'ARI', 'ATL', 'BAL', 'BUF', 'CAR', 'CHI', 'CIN', 'CLE',
        'DAL', 'DEN', 'DET', 'GB', 'HOU', 'IND', 'JAX', 'KC',
        'LV', 'LAC', 'LAR', 'MIA', 'MIN', 'NE', 'NO', 'NYG',
        'NYJ', 'PHI', 'PIT', 'SF', 'SEA', 'TB', 'TEN', 'WAS'
    ]
    
    # Calculate the sacks made and sacks taken for each team
    for team in teams:
        # Sacks made by the team's defense (home and away games)
        sacks_made = df_2023.loc[(df_2023['home_team'] == team), 'away_pass_sacked'].sum() + \
                     df_2023.loc[(df_2023['away_team'] == team), 'home_pass_sacked'].sum()
        
        # Sacks taken (against the team) - includes home and away games
        sacks_taken = df_2023.loc[(df_2023['home_team'] == team), 'home_pass_sacked'].sum() + \
                      df_2023.loc[(df_2023['away_team'] == team), 'away_pass_sacked'].sum()
        
        # Store results
        sack_stats['team'].append(team)
        sack_stats['sacks_made'].append(sacks_made)
        sack_stats['sacks_taken'].append(sacks_taken)
    
    # Convert the dictionary to a DataFrame
    sack_stats_df = pd.DataFrame(sack_stats)
    
    # # Calculate average sacks made and taken (if needed by game or for total analysis)
    sack_stats_df['average_sacks_made'] = sack_stats_df['sacks_made'] / len(df_2023['week'].unique())
    sack_stats_df['average_sacks_taken'] = sack_stats_df['sacks_taken'] / len(df_2023['week'].unique())
    
    sacks_made_sorted = sack_stats_df[['team', 'sacks_made', 'average_sacks_made']].sort_values(by='sacks_made', ascending=False)
    sacks_taken_sorted = sack_stats_df[['team', 'sacks_taken', 'average_sacks_taken']].sort_values(by='sacks_taken', ascending=False)
    
    # Use tabulate to print both tables
    # print("Teams Sorted by Sacks Made:")
    # print(tabulate(sacks_made_sorted, headers='keys', tablefmt='grid'))
    print(sacks_made_sorted)
    sacks_made_sorted.to_csv(f'data/sacks_made_sorted_{year}.csv', index=False)
    print()
    print()
    
    # print("\nTeams Sorted by Sacks Taken:")
    # print(tabulate(sacks_taken_sorted, headers='keys', tablefmt='grid'))
    print(sacks_taken_sorted)
    sacks_taken_sorted.to_csv(f'data/sacks_taken_sorted_{year}.csv', index=False)
    print()
    print()

In [None]:
!open data/sacks_taken_sorted_2024.csv

## Offense

### Passing

### Rushing

## Defense

### General
- (Passing/Rushing ypg)

### Sacks

# PLAYER TRENDS 

# Quarterbacks

## Running Backs

## Wide Receivers

## Tight Ends