# Create mean scores DF from enhanced DF


mean_scores_df: Create new df with mean scores in regular season and playoff

In [5]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

df = pd.read_csv('enhanced_scores1.csv')

In [6]:
# Create DataFrames for home and away games
home_df = df[['schedule_season', 'schedule_playoff', 'team_home_utd', 'score_home']].rename(
    columns={'team_home_utd': 'team', 'score_home': 'mean_score'}
)
away_df = df[['schedule_season', 'schedule_playoff', 'team_away_utd', 'score_away']].rename(
    columns={'team_away_utd': 'team', 'score_away': 'mean_score'}
)

# Combine the two DataFrames
combined_df = pd.concat([home_df, away_df], ignore_index=True)

# Convert playoff boolean to readable category
combined_df['season_type'] = combined_df['schedule_playoff'].map({True: 'Playoffs', False: 'Regular Season'})

# Compute mean scores separately for regular season and playoffs
regular_season_scores = (
    combined_df[combined_df['season_type'] == 'Regular Season']
    .groupby(['schedule_season', 'team'])['mean_score']
    .mean()
    .reset_index()
    .round(2)
    .rename(columns={'mean_score': 'regular_season_mean_score'})
)

playoff_scores = (
    combined_df[combined_df['season_type'] == 'Playoffs']
    .groupby(['schedule_season', 'team'])['mean_score']
    .mean()
    .reset_index()
    .round(2)
    .rename(columns={'mean_score': 'playoff_mean_score'})
)

# Merge regular season and playoff scores
mean_scores_df = pd.merge(regular_season_scores, playoff_scores, on=['schedule_season', 'team'], how='left')

mean_scores_df

Unnamed: 0,schedule_season,team,regular_season_mean_score,playoff_mean_score
0,1966,Arizona Cardinals,18.86,
1,1966,Atlanta Falcons,14.57,
2,1966,Buffalo Bills,25.57,7.0
3,1966,Chicago Bears,16.71,
4,1966,Cleveland Browns,28.79,
...,...,...,...,...
1733,2024,San Francisco 49ers,22.88,
1734,2024,Seattle Seahawks,22.06,
1735,2024,Tampa Bay Buccaneers,29.53,
1736,2024,Tennessee Titans,18.29,


Calculate regualar season and playoff mean points Z-scores

In [9]:
# Function to calculate Z-scores by season
def calculate_z_scores(mean_scores_df, col):
    return (mean_scores_df[col] - mean_scores_df.groupby("schedule_season")[col].transform("mean")) / mean_scores_df.groupby("schedule_season")[col].transform("std")

# Apply the function
mean_scores_df["regular_season_z_score"] = calculate_z_scores(mean_scores_df, "regular_season_mean_score")
mean_scores_df["playoff_z_score"] = calculate_z_scores(mean_scores_df, "playoff_mean_score")

mean_scores_with_z = mean_scores_df
mean_scores_with_z

# Save to a new CSV file
#df.to_csv("/mnt/data/mean_scores_with_z.csv", index=False)


Unnamed: 0,schedule_season,team,regular_season_mean_score,playoff_mean_score,regular_season_z_score,playoff_z_score
0,1966,Arizona Cardinals,18.86,,-0.646786,
1,1966,Atlanta Falcons,14.57,,-1.524512,
2,1966,Buffalo Bills,25.57,7.0,0.726068,-1.307276
3,1966,Chicago Bears,16.71,,-1.086672,
4,1966,Cleveland Browns,28.79,,1.384873,
...,...,...,...,...,...,...
1733,2024,San Francisco 49ers,22.88,,-0.007017,
1734,2024,Seattle Seahawks,22.06,,-0.187539,
1735,2024,Tampa Bay Buccaneers,29.53,,1.456972,
1736,2024,Tennessee Titans,18.29,,-1.017500,
