In [4]:
import pandas as pd
import os
from datetime import datetime

# Function to determine if a game is a playoff game based on the date
def is_playoff_game(date_str, season_start_year):
    # Convert date string to datetime object (assuming MM/DD/YYYY format)
    date_parts = date_str.split('/')
    month, day, year = int(date_parts[0]), int(date_parts[1]), int(date_parts[2])
    game_date = datetime(year, month, day)
    
    # Playoff start and end dates for each season
    playoff_dates = {
        '22': {
            'start': datetime(2023, 4, 16),
            'end': datetime(2023, 6, 12)
        },
        '23': {
            'start': datetime(2024, 4, 15),
            'end': datetime(2024, 6, 17)
        }
    }
    
    # Check if the game date is within playoff dates
    season_key = season_start_year[-2:]  # Get the last 2 digits of the year
    if season_key in playoff_dates:
        start_date = playoff_dates[season_key]['start']
        end_date = playoff_dates[season_key]['end']
        return start_date <= game_date <= end_date
    
    return False

# Function to determine playoff round based on date
def determine_playoff_round(date_str, season_start_year):
    date_parts = date_str.split('/')
    month, day, year = int(date_parts[0]), int(date_parts[1]), int(date_parts[2])
    game_date = datetime(year, month, day)
    
    # Playoff start dates
    playoff_dates = {
        '18': {
            'start': datetime(2019, 4, 14),
            'conf_finals_start': datetime(2019, 5, 12),
            'finals_start': datetime(2019, 5, 30)
        },
        '21': {
            'start': datetime(2022, 5, 22),
            'conf_finals_start': datetime(2022, 5, 27),
            'finals_start': datetime(2022, 6, 2)
        },
        '22': {
            'start': datetime(2023, 4, 16),
            'conf_finals_start': datetime(2023, 5, 14),
            'finals_start': datetime(2023, 5, 29)
        },
        '23': {
            'start': datetime(2024, 4, 15),
            'conf_finals_start': datetime(2024, 5, 13),
            'finals_start': datetime(2024, 5, 27)
        }
    }
    
    season_key = season_start_year[-2:]
    if season_key in playoff_dates:
        # Determine round based on date
        if game_date >= playoff_dates[season_key]['finals_start']:
            return "NBA Finals"
        elif game_date >= playoff_dates[season_key]['conf_finals_start']:
            return "Conference Finals"
        elif game_date >= playoff_dates[season_key]['start'] + pd.Timedelta(days=14):
            return "Second Round"
        else:
            return "First Round"
    
    return "Unknown"

# Load the CSV files
def load_and_process_csv(file_path, season_start_year):
    try:
        df = pd.read_csv(file_path)
        
        # Filter playoff games
        playoff_games = df[df['date'].apply(lambda x: is_playoff_game(x, season_start_year))]
        
        # Add season and playoff round information
        if not playoff_games.empty:
            playoff_games['season'] = f"{season_start_year}-{int(season_start_year[-2:]) + 1}"
            playoff_games['playoff_round'] = playoff_games['date'].apply(
                lambda x: determine_playoff_round(x, season_start_year)
            )
        
        return playoff_games
    except Exception as e:
        print(f"Error processing {file_path}: {e}")
        return pd.DataFrame()

# Main function to combine playoff games
def combine_playoff_games(file_paths, season_years):
    all_playoff_games = []
    
    for file_path, season_year in zip(file_paths, season_years):
        playoff_games = load_and_process_csv(file_path, season_year)
        if not playoff_games.empty:
            all_playoff_games.append(playoff_games)
    
    # Combine all playoff games
    if all_playoff_games:
        combined_df = pd.concat(all_playoff_games, ignore_index=True)
        
        # Ensure consistent columns
        # You may need to adjust this based on your specific data
        common_columns = [
            'date', 'season', 'playoff_round', 'home', 'road', 'hscore', 'rscore',
            'line', 'lineavg', 'linesag', 'linesage', 'linesagp', 'lineopen',
            'linemoore', 'linesaggm', 'linefox', 'linedok', 'linetalis',
            'linemassey', 'linepugh', 'linedonc', 'linedunk', 'lineespn',
            'linelogic', 'lineround'
        ]
        
        # Select only common columns that exist in the DataFrame
        existing_columns = [col for col in common_columns if col in combined_df.columns]
        combined_df = combined_df[existing_columns]
        
        # Sort games by date
        combined_df['temp_date'] = pd.to_datetime(combined_df['date'], format='%m/%d/%Y')
        combined_df.sort_values('temp_date', inplace=True)
        combined_df.drop('temp_date', axis=1, inplace=True)
        
        return combined_df
    else:
        return pd.DataFrame()

# File paths and corresponding season years
file_paths = [
    'nba22.csv',
    'nba23.csv'
]

season_years = [
    '2022',
    '2023'
]

# Execute the function to combine playoff games
playoff_games_df = combine_playoff_games(file_paths, season_years)

# Save the combined playoff games to a new CSV file
if not playoff_games_df.empty:
    playoff_games_df.to_csv('nba_playoff_games_combined.csv', index=False)
    print(f"Successfully combined {len(playoff_games_df)} playoff games from {len(season_years)} seasons.")
    
    # Print statistics
    print("\nStatistics by Season:")
    season_counts = playoff_games_df['season'].value_counts().sort_index()
    for season, count in season_counts.items():
        print(f"{season}: {count} games")
    
    print("\nStatistics by Playoff Round:")
    round_counts = playoff_games_df['playoff_round'].value_counts()
    for round_name, count in round_counts.items():
        print(f"{round_name}: {count} games")
else:
    print("No playoff games were found or an error occurred during processing.")

Successfully combined 168 playoff games from 2 seasons.

Statistics by Season:
2022-23: 80 games
2023-24: 88 games

Statistics by Playoff Round:
First Round: 76 games
Second Round: 50 games
Conference Finals: 28 games
NBA Finals: 14 games


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  playoff_games['season'] = f"{season_start_year}-{int(season_start_year[-2:]) + 1}"
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  playoff_games['playoff_round'] = playoff_games['date'].apply(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  playoff_games['season'] = f"{season_start_year}-{int(season_

In [5]:
import pandas as pd
import numpy as np

# Load the data
df = pd.read_csv('nba_playoff_games_combined.csv')

# Drop rows with missing ESPN line values
df = df.dropna(subset=['lineespn'])

# Identify game numbers in series
# Create separate DataFrames for each matchup
def identify_game_numbers(dataframe):
    # Sort by date to ensure games are in order
    dataframe = dataframe.sort_values('date')
    
    # Group by season and matchup (regardless of home/away)
    matchups = []
    for season in dataframe['season'].unique():
        season_df = dataframe[dataframe['season'] == season]
        
        # Get all pairs of teams that played against each other
        for i, row in season_df.iterrows():
            team1, team2 = sorted([row['home'], row['road']])
            matchup = (season, team1, team2)
            if matchup not in matchups:
                matchups.append(matchup)
        
    # Assign game numbers to each matchup
    dataframe['game_num'] = 0
    for season, team1, team2 in matchups:
        # Get all games between these teams in this season
        mask = (
            (dataframe['season'] == season) & 
            (
                ((dataframe['home'] == team1) & (dataframe['road'] == team2)) | 
                ((dataframe['home'] == team2) & (dataframe['road'] == team1))
            )
        )
        # Sort these games by date
        matchup_games = dataframe.loc[mask].sort_values('date')
        # Assign game numbers
        game_nums = list(range(1, len(matchup_games) + 1))
        dataframe.loc[matchup_games.index, 'game_num'] = game_nums
    
    return dataframe

# Apply the function to identify game numbers
df = identify_game_numbers(df)

# Calculate the difference between ESPN line and regular line
df['espn_vs_line_diff'] = df['lineespn'] - df['line']

# Home team wins when hscore > rscore
df['home_win'] = df['hscore'] > df['rscore']

# Analyze cases where ESPN favors the home team more than the regular line
df['espn_favors_home_more'] = (df['line'] > 0) & (df['espn_vs_line_diff'] > 0)

# Calculate overall win rate when ESPN favors home team more
espn_favors_home_more_df = df[df['espn_favors_home_more'] == True]
overall_win_rate = espn_favors_home_more_df['home_win'].mean()

print(f"Overall win rate when ESPN favors home team more than regular line: {overall_win_rate:.4f}")

# Focus on games 3 and 4
games_3_4 = df[df['game_num'].isin([3, 4])]

# Win rate for games 3 and 4 when ESPN favors home team more
g3_4_espn_favors_home = games_3_4[games_3_4['espn_favors_home_more'] == True]
g3_4_win_rate = g3_4_espn_favors_home['home_win'].mean()

print(f"Win rate for games 3 & 4 when ESPN favors home team more: {g3_4_win_rate:.4f}")

# Separate analysis for game 3 vs game 4
for game_num in [3, 4]:
    game_df = df[df['game_num'] == game_num]
    game_espn_favors_home = game_df[game_df['espn_favors_home_more'] == True]
    
    if len(game_espn_favors_home) > 0:
        game_win_rate = game_espn_favors_home['home_win'].mean()
        num_games = len(game_espn_favors_home)
        print(f"Game {game_num} win rate when ESPN favors home team more: {game_win_rate:.4f} (based on {num_games} games)")
    else:
        print(f"No game {game_num} instances where ESPN favors home team more than regular line")

# Additional analysis: show a breakdown by playoff round
print("\nWin rate by playoff round when ESPN favors home team more (games 3 & 4):")
for round_name in g3_4_espn_favors_home['playoff_round'].unique():
    round_df = g3_4_espn_favors_home[g3_4_espn_favors_home['playoff_round'] == round_name]
    round_win_rate = round_df['home_win'].mean()
    num_games = len(round_df)
    print(f"{round_name}: {round_win_rate:.4f} (based on {num_games} games)")

Overall win rate when ESPN favors home team more than regular line: 0.6410
Win rate for games 3 & 4 when ESPN favors home team more: 0.4545
Game 3 win rate when ESPN favors home team more: 0.6667 (based on 3 games)
Game 4 win rate when ESPN favors home team more: 0.3750 (based on 8 games)

Win rate by playoff round when ESPN favors home team more (games 3 & 4):
First Round: 0.5000 (based on 4 games)
Second Round: 0.5000 (based on 4 games)
Conference Finals: 0.5000 (based on 2 games)
NBA Finals: 0.0000 (based on 1 games)


In [1]:
import pandas as pd
import numpy as np

# Load the data
df = pd.read_csv('nba_playoff_games_combined.csv')

# Drop rows with missing ESPN line values
df = df.dropna(subset=['lineespn', 'line'])

# Function to identify game numbers in series
def identify_game_numbers(dataframe):
    # Sort by date to ensure games are in order
    dataframe = dataframe.sort_values('date')
    
    # Initialize a dictionary to track matchups
    matchups = {}
    
    # Assign game numbers to each matchup
    dataframe['game_num'] = 0
    
    for idx, row in dataframe.iterrows():
        # Create matchup key independent of home/away
        teams = sorted([row['home'], row['road']])
        season = row['season']
        matchup_key = f"{season}_{teams[0]}_{teams[1]}"
        
        # Initialize matchup counter if needed
        if matchup_key not in matchups:
            matchups[matchup_key] = 0
        
        # Increment game number for this matchup
        matchups[matchup_key] += 1
        
        # Assign game number to this row
        dataframe.loc[idx, 'game_num'] = matchups[matchup_key]
    
    return dataframe

# Apply the function to identify game numbers
df = identify_game_numbers(df)

# Calculate margin and if teams covered
df['margin'] = df['hscore'] - df['rscore']
df['home_covered'] = df['margin'] > df['line']
df['road_covered'] = df['margin'] < df['line']

# Calculate difference between ESPN line and regular line
df['espn_diff'] = df['lineespn'] - df['line']
df['abs_diff'] = df['espn_diff'].abs()

# Identify games with 3+ point differences
df_large_diff = df[df['abs_diff'] >= 3].copy()

# Identify which team ESPN favors more
df_large_diff['espn_favors_home_more'] = df_large_diff['espn_diff'] > 0
df_large_diff['espn_favors_road_more'] = df_large_diff['espn_diff'] < 0

# Overall analysis
total_games = len(df_large_diff)
espn_favors_home = df_large_diff[df_large_diff['espn_favors_home_more']].copy()
espn_favors_road = df_large_diff[df_large_diff['espn_favors_road_more']].copy()

home_covered_count = espn_favors_home['home_covered'].sum()
road_covered_count = espn_favors_road['road_covered'].sum()

print(f"Total playoff games with 3+ point differences: {total_games}")
print(f"\nWhen ESPN favors HOME team more: {home_covered_count}/{len(espn_favors_home)} covered ({home_covered_count/len(espn_favors_home)*100:.1f}%)")
print(f"When ESPN favors ROAD team more: {road_covered_count}/{len(espn_favors_road)} covered ({road_covered_count/len(espn_favors_road)*100:.1f}%)")

# Overall success rate
overall_correct = home_covered_count + road_covered_count
print(f"\nOverall success rate: {overall_correct}/{total_games} ({overall_correct/total_games*100:.1f}%)")

# Analysis by game number
print("\n=== ANALYSIS BY GAME NUMBER ===")
for game_num in range(1, 8):  # Games 1-7 in a playoff series
    game_df = df_large_diff[df_large_diff['game_num'] == game_num]
    
    if len(game_df) == 0:
        continue
    
    # Home more analysis
    home_more_df = game_df[game_df['espn_favors_home_more']]
    home_more_total = len(home_more_df)
    home_more_covered = home_more_df['home_covered'].sum()
    
    # Road more analysis
    road_more_df = game_df[game_df['espn_favors_road_more']]
    road_more_total = len(road_more_df) 
    road_more_covered = road_more_df['road_covered'].sum()
    
    # Print results
    print(f"\nGame {game_num} (total: {len(game_df)})")
    
    if home_more_total > 0:
        home_rate = home_more_covered / home_more_total * 100
        print(f"  ESPN favors HOME more: {home_more_covered}/{home_more_total} ({home_rate:.1f}%)")
    
    if road_more_total > 0:
        road_rate = road_more_covered / road_more_total * 100
        print(f"  ESPN favors ROAD more: {road_more_covered}/{road_more_total} ({road_rate:.1f}%)")

Total playoff games with 3+ point differences: 66

When ESPN favors HOME team more: 8/15 covered (53.3%)
When ESPN favors ROAD team more: 25/51 covered (49.0%)

Overall success rate: 33/66 (50.0%)

=== ANALYSIS BY GAME NUMBER ===

Game 1 (total: 12)
  ESPN favors HOME more: 3/4 (75.0%)
  ESPN favors ROAD more: 5/8 (62.5%)

Game 2 (total: 12)
  ESPN favors ROAD more: 5/12 (41.7%)

Game 3 (total: 15)
  ESPN favors HOME more: 1/4 (25.0%)
  ESPN favors ROAD more: 7/11 (63.6%)

Game 4 (total: 12)
  ESPN favors HOME more: 3/5 (60.0%)
  ESPN favors ROAD more: 1/7 (14.3%)

Game 5 (total: 8)
  ESPN favors HOME more: 1/1 (100.0%)
  ESPN favors ROAD more: 6/7 (85.7%)

Game 6 (total: 6)
  ESPN favors HOME more: 0/1 (0.0%)
  ESPN favors ROAD more: 1/5 (20.0%)

Game 7 (total: 1)
  ESPN favors ROAD more: 0/1 (0.0%)


In [2]:
import pandas as pd
import numpy as np

# Load the data
df = pd.read_csv('nba_playoff_games_combined.csv')

# Drop rows with missing ESPN line values
df = df.dropna(subset=['lineespn', 'line'])

# Function to identify game numbers in series
def identify_game_numbers(dataframe):
    # Sort by date to ensure games are in order
    dataframe = dataframe.sort_values('date')
    
    # Initialize a dictionary to track matchups
    matchups = {}
    
    # Assign game numbers to each matchup
    dataframe['game_num'] = 0
    
    for idx, row in dataframe.iterrows():
        # Create matchup key independent of home/away
        teams = sorted([row['home'], row['road']])
        season = row['season']
        matchup_key = f"{season}_{teams[0]}_{teams[1]}"
        
        # Initialize matchup counter if needed
        if matchup_key not in matchups:
            matchups[matchup_key] = 0
        
        # Increment game number for this matchup
        matchups[matchup_key] += 1
        
        # Assign game number to this row
        dataframe.loc[idx, 'game_num'] = matchups[matchup_key]
    
    return dataframe

# Apply the function to identify game numbers
df = identify_game_numbers(df)

# Calculate margin and if teams covered
df['margin'] = df['hscore'] - df['rscore']
df['home_covered'] = df['margin'] > df['line']
df['road_covered'] = df['margin'] < df['line']

# Calculate difference between ESPN line and regular line
df['espn_diff'] = df['lineespn'] - df['line']
df['abs_diff'] = df['espn_diff'].abs()

# Identify games with 1+ point differences
df_diff = df[df['abs_diff'] >= 1].copy()

# Identify which team ESPN favors more
df_diff['espn_favors_home_more'] = df_diff['espn_diff'] > 0
df_diff['espn_favors_road_more'] = df_diff['espn_diff'] < 0

# Overall analysis
total_games = len(df_diff)
espn_favors_home = df_diff[df_diff['espn_favors_home_more']].copy()
espn_favors_road = df_diff[df_diff['espn_favors_road_more']].copy()

home_covered_count = espn_favors_home['home_covered'].sum()
road_covered_count = espn_favors_road['road_covered'].sum()

print(f"Total playoff games with 1+ point differences: {total_games}")
print(f"\nWhen ESPN favors HOME team more: {home_covered_count}/{len(espn_favors_home)} covered ({home_covered_count/len(espn_favors_home)*100:.1f}%)")
print(f"When ESPN favors ROAD team more: {road_covered_count}/{len(espn_favors_road)} covered ({road_covered_count/len(espn_favors_road)*100:.1f}%)")

# Overall success rate
overall_correct = home_covered_count + road_covered_count
print(f"\nOverall success rate: {overall_correct}/{total_games} ({overall_correct/total_games*100:.1f}%)")

# Analysis by game number
print("\n=== ANALYSIS BY GAME NUMBER ===")
for game_num in range(1, 8):  # Games 1-7 in a playoff series
    game_df = df_diff[df_diff['game_num'] == game_num]
    
    if len(game_df) == 0:
        continue
    
    # Home more analysis
    home_more_df = game_df[game_df['espn_favors_home_more']]
    home_more_total = len(home_more_df)
    home_more_covered = home_more_df['home_covered'].sum()
    
    # Road more analysis
    road_more_df = game_df[game_df['espn_favors_road_more']]
    road_more_total = len(road_more_df) 
    road_more_covered = road_more_df['road_covered'].sum()
    
    # Print results
    print(f"\nGame {game_num} (total: {len(game_df)})")
    
    if home_more_total > 0:
        home_rate = home_more_covered / home_more_total * 100
        print(f"  ESPN favors HOME more: {home_more_covered}/{home_more_total} ({home_rate:.1f}%)")
    
    if road_more_total > 0:
        road_rate = road_more_covered / road_more_total * 100
        print(f"  ESPN favors ROAD more: {road_more_covered}/{road_more_total} ({road_rate:.1f}%)")

# Also analyze different thresholds (1+, 2+, 3+)
print("\n=== ANALYSIS BY DIFFERENCE SIZE ===")
for diff_size in [1, 2, 3]:
    # Filter for games with diff_size+ point differences
    df_threshold = df[df['abs_diff'] >= diff_size].copy()
    
    # Home more analysis
    home_more_df = df_threshold[df_threshold['espn_diff'] > 0]
    home_more_total = len(home_more_df)
    home_more_covered = home_more_df['home_covered'].sum()
    
    # Road more analysis
    road_more_df = df_threshold[df_threshold['espn_diff'] < 0]
    road_more_total = len(road_more_df) 
    road_more_covered = road_more_df['road_covered'].sum()
    
    # Print results
    print(f"\nDifference of {diff_size}+ points (total: {len(df_threshold)})")
    
    if home_more_total > 0:
        home_rate = home_more_covered / home_more_total * 100
        print(f"  ESPN favors HOME more: {home_more_covered}/{home_more_total} ({home_rate:.1f}%)")
    
    if road_more_total > 0:
        road_rate = road_more_covered / road_more_total * 100
        print(f"  ESPN favors ROAD more: {road_more_covered}/{road_more_total} ({road_rate:.1f}%)")

Total playoff games with 1+ point differences: 129

When ESPN favors HOME team more: 23/42 covered (54.8%)
When ESPN favors ROAD team more: 45/87 covered (51.7%)

Overall success rate: 68/129 (52.7%)

=== ANALYSIS BY GAME NUMBER ===

Game 1 (total: 26)
  ESPN favors HOME more: 7/9 (77.8%)
  ESPN favors ROAD more: 10/17 (58.8%)

Game 2 (total: 22)
  ESPN favors HOME more: 4/5 (80.0%)
  ESPN favors ROAD more: 7/17 (41.2%)

Game 3 (total: 27)
  ESPN favors HOME more: 3/8 (37.5%)
  ESPN favors ROAD more: 10/19 (52.6%)

Game 4 (total: 27)
  ESPN favors HOME more: 8/15 (53.3%)
  ESPN favors ROAD more: 5/12 (41.7%)

Game 5 (total: 14)
  ESPN favors HOME more: 1/2 (50.0%)
  ESPN favors ROAD more: 9/12 (75.0%)

Game 6 (total: 9)
  ESPN favors HOME more: 0/2 (0.0%)
  ESPN favors ROAD more: 2/7 (28.6%)

Game 7 (total: 4)
  ESPN favors HOME more: 0/1 (0.0%)
  ESPN favors ROAD more: 2/3 (66.7%)

=== ANALYSIS BY DIFFERENCE SIZE ===

Difference of 1+ points (total: 129)
  ESPN favors HOME more: 23/42

In [3]:
import pandas as pd
import numpy as np

# Load the data
df = pd.read_csv('nba_playoff_games_combined.csv')

# Drop rows with missing ESPN line values
df = df.dropna(subset=['lineespn', 'line'])

# Function to identify game numbers in series
def identify_game_numbers(dataframe):
    # Sort by date to ensure games are in order
    dataframe = dataframe.sort_values('date')
    
    # Initialize a dictionary to track matchups
    matchups = {}
    
    # Assign game numbers to each matchup
    dataframe['game_num'] = 0
    
    for idx, row in dataframe.iterrows():
        # Create matchup key independent of home/away
        teams = sorted([row['home'], row['road']])
        season = row['season']
        matchup_key = f"{season}_{teams[0]}_{teams[1]}"
        
        # Initialize matchup counter if needed
        if matchup_key not in matchups:
            matchups[matchup_key] = 0
        
        # Increment game number for this matchup
        matchups[matchup_key] += 1
        
        # Assign game number to this row
        dataframe.loc[idx, 'game_num'] = matchups[matchup_key]
    
    return dataframe

# Apply the function to identify game numbers
df = identify_game_numbers(df)

# Calculate margin and if teams covered
df['margin'] = df['hscore'] - df['rscore']
df['home_covered'] = df['margin'] > df['line']
df['road_covered'] = df['margin'] < df['line']

# Calculate difference between ESPN line and regular line
df['espn_diff'] = df['lineespn'] - df['line']
df['abs_diff'] = df['espn_diff'].abs()

# Identify favorite/underdog
df['home_favored'] = df['line'] > 0
df['road_favored'] = df['line'] < 0

# Check if ESPN favors favorite or underdog more
# When line is positive, home is favored
# When line is negative, road is favored
df['espn_favors_favorite_more'] = (df['line'] > 0) & (df['espn_diff'] > 0) | (df['line'] < 0) & (df['espn_diff'] < 0)
df['espn_favors_underdog_more'] = (df['line'] > 0) & (df['espn_diff'] < 0) | (df['line'] < 0) & (df['espn_diff'] > 0)

# Function to check if favorite covered
def favorite_covered(row):
    if row['line'] > 0:  # Home is favorite
        return row['home_covered']
    else:  # Road is favorite
        return row['road_covered']

# Function to check if underdog covered
def underdog_covered(row):
    if row['line'] > 0:  # Home is favorite, so road is underdog
        return not row['home_covered']
    else:  # Road is favorite, so home is underdog
        return not row['road_covered']

# Apply functions
df['favorite_covered'] = df.apply(favorite_covered, axis=1)
df['underdog_covered'] = df.apply(underdog_covered, axis=1)

# Filter for games with meaningful differences (1+ point)
df_diff = df[df['abs_diff'] >= 1].copy()

# Overall analysis
espn_favors_favorite = df_diff[df_diff['espn_favors_favorite_more']].copy()
espn_favors_underdog = df_diff[df_diff['espn_favors_underdog_more']].copy()

favorite_covered_count = espn_favors_favorite['favorite_covered'].sum()
underdog_covered_count = espn_favors_underdog['underdog_covered'].sum()

print(f"Total playoff games with 1+ point differences: {len(df_diff)}")
print(f"\nWhen ESPN favors FAVORITE more: {favorite_covered_count}/{len(espn_favors_favorite)} covered ({favorite_covered_count/len(espn_favors_favorite)*100:.1f}%)")
print(f"When ESPN favors UNDERDOG more: {underdog_covered_count}/{len(espn_favors_underdog)} covered ({underdog_covered_count/len(espn_favors_underdog)*100:.1f}%)")

# Analysis by game number
print("\n=== ANALYSIS BY GAME NUMBER ===")
for game_num in range(1, 8):  # Games 1-7 in a playoff series
    game_df = df_diff[df_diff['game_num'] == game_num]
    
    if len(game_df) == 0:
        continue
    
    # Favorite more analysis
    fav_more_df = game_df[game_df['espn_favors_favorite_more']]
    fav_more_total = len(fav_more_df)
    fav_more_covered = fav_more_df['favorite_covered'].sum()
    
    # Underdog more analysis
    dog_more_df = game_df[game_df['espn_favors_underdog_more']]
    dog_more_total = len(dog_more_df) 
    dog_more_covered = dog_more_df['underdog_covered'].sum()
    
    # Print results
    print(f"\nGame {game_num} (total: {len(game_df)})")
    
    if fav_more_total > 0:
        fav_rate = fav_more_covered / fav_more_total * 100
        print(f"  ESPN favors FAVORITE more: {fav_more_covered}/{fav_more_total} ({fav_rate:.1f}%)")
    
    if dog_more_total > 0:
        dog_rate = dog_more_covered / dog_more_total * 100
        print(f"  ESPN favors UNDERDOG more: {dog_more_covered}/{dog_more_total} ({dog_rate:.1f}%)")

# Also analyze different thresholds (1+, 2+, 3+)
print("\n=== ANALYSIS BY DIFFERENCE SIZE ===")
for diff_size in [1, 2, 3]:
    # Filter for games with diff_size+ point differences
    df_threshold = df[df['abs_diff'] >= diff_size].copy()
    
    # Favorite more analysis
    fav_more_df = df_threshold[df_threshold['espn_favors_favorite_more']]
    fav_more_total = len(fav_more_df)
    fav_more_covered = fav_more_df['favorite_covered'].sum()
    
    # Underdog more analysis
    dog_more_df = df_threshold[df_threshold['espn_favors_underdog_more']]
    dog_more_total = len(dog_more_df) 
    dog_more_covered = dog_more_df['underdog_covered'].sum()
    
    # Print results
    print(f"\nDifference of {diff_size}+ points (total: {len(df_threshold)})")
    
    if fav_more_total > 0:
        fav_rate = fav_more_covered / fav_more_total * 100
        print(f"  ESPN favors FAVORITE more: {fav_more_covered}/{fav_more_total} ({fav_rate:.1f}%)")
    
    if dog_more_total > 0:
        dog_rate = dog_more_covered / dog_more_total * 100
        print(f"  ESPN favors UNDERDOG more: {dog_more_covered}/{dog_more_total} ({dog_rate:.1f}%)")

Total playoff games with 1+ point differences: 129

When ESPN favors FAVORITE more: 19/37 covered (51.4%)
When ESPN favors UNDERDOG more: 51/92 covered (55.4%)

=== ANALYSIS BY GAME NUMBER ===

Game 1 (total: 26)
  ESPN favors FAVORITE more: 5/8 (62.5%)
  ESPN favors UNDERDOG more: 13/18 (72.2%)

Game 2 (total: 22)
  ESPN favors FAVORITE more: 5/6 (83.3%)
  ESPN favors UNDERDOG more: 7/16 (43.8%)

Game 3 (total: 27)
  ESPN favors FAVORITE more: 4/8 (50.0%)
  ESPN favors UNDERDOG more: 9/19 (47.4%)

Game 4 (total: 27)
  ESPN favors FAVORITE more: 4/10 (40.0%)
  ESPN favors UNDERDOG more: 9/17 (52.9%)

Game 5 (total: 14)
  ESPN favors FAVORITE more: 1/2 (50.0%)
  ESPN favors UNDERDOG more: 9/12 (75.0%)

Game 6 (total: 9)
  ESPN favors FAVORITE more: 0/2 (0.0%)
  ESPN favors UNDERDOG more: 2/7 (28.6%)

Game 7 (total: 4)
  ESPN favors FAVORITE more: 0/1 (0.0%)
  ESPN favors UNDERDOG more: 2/3 (66.7%)

=== ANALYSIS BY DIFFERENCE SIZE ===

Difference of 1+ points (total: 129)
  ESPN favors F