In [None]:
import sys
import os
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from dotenv import load_dotenv
sys.path.append("..")

from mlb_2025.utils import describe_endpoint, compile_average_player_values

load_dotenv()

from data_sources import PyBaseball, MLBStatsAPI, Salary

py_baseball = PyBaseball()
mlb_api = MLBStatsAPI()

payroll_source_paths = {
    "historical": os.getenv("MLB_PAYROLLS"),
    "recent": os.getenv("MLB_PAYROLLS_2025")
}

salary_source_paths = {
    "historical": os.getenv("MLB_PLAYER_SALARY_DATA")
}

salary = Salary(payroll_source_paths=payroll_source_paths, salary_source_paths=salary_source_paths)
payrolls = salary.payroll()

# Contains matching keys between data sources
# chadwick = py_baseball.player_search.chadwick()

### Analyze hitters for performance per salary

In [None]:
batters_stats_2025 = py_baseball.batter.stats(start_season=2025)
# Used to obtain the player salaries
bwar_keep_cols = ["name_common", "mlb_ID", "year_ID", "team_ID", "salary"]
batters_bwar_2025 = py_baseball.batter.bref_war()
batters_bwar_2025 = batters_bwar_2025[bwar_keep_cols]

In [None]:
batters_stats_2025 = (
    batters_stats_2025
    .merge(
        batters_bwar_2025, 
        how="left", 
        left_on=["Name", "Team", "Season"], 
        right_on=["name_common", "team_ID", "year_ID"]
        )
    .drop(
        ["name_common", "year_ID", "team_ID"], 
        axis=1
        )
)


# For now assume that na values are league minimum
league_minimum = salary.league_minimum_salaries(2025)
batters_stats_2025["salary"] = batters_stats_2025["salary"].fillna(np.float64(league_minimum))

In [None]:
# Stats to see the value of
player_value_stats = [
    "WAR",
    "H",
    "HR",
    "R&RBI",
    "wRC",
    "WAR"
]

average_batter_values = compile_average_player_values(player_value_stats, batters_stats_2025)

df = pd.DataFrame(list(average_batter_values.items()), columns=["Metric", "Dollar per Stat"])

sns.barplot(
    data=df.sort_values("Dollar per Stat", ascending=False),
    x="Dollar per Stat", y="Metric", palette="viridis"
)

plt.title("Dollar Value per Batting Statistic (2025)", fontsize=14)
plt.xlabel("Dollars per Stat Unit")
plt.ylabel("")
plt.tight_layout()
plt.show()

In [None]:
salary_by_war = {}

batters_stats_2025["WAR_binned"] = pd.cut(batters_stats_2025["WAR"], 12)
salary_by_war_grouped_df = batters_stats_2025.groupby("WAR_binned")["salary"].mean().fillna(0)

for bin in batters_stats_2025["WAR_binned"].unique():
    salary_by_war[bin] = salary_by_war_grouped_df[bin]


df = pd.DataFrame(list(salary_by_war.items()), columns=["Metric", "Salary"])

sns.barplot(
    data=df.sort_values("Metric", ascending=False),
    x="Salary", y="Metric", palette="viridis"
)

plt.title("Average Salary by WAR (2025)", fontsize=14)
plt.xlabel("Average Salary")
plt.ylabel("WAR")
plt.tight_layout()
plt.show()

In [None]:
war_by_salary = {}

batters_stats_2025["salary_binned"] = pd.cut(batters_stats_2025["salary"], 12)
war_by_salary_grouped_df = batters_stats_2025.groupby("salary_binned")["WAR"].mean().fillna(0)

for bin in batters_stats_2025["salary_binned"].unique():
    war_by_salary[bin] = war_by_salary_grouped_df[bin]


df = pd.DataFrame(list(war_by_salary.items()), columns=["Salary", "WAR"])

sns.barplot(
    data=df.sort_values("Salary", ascending=False),
    x="WAR", y="Salary", palette="viridis"
)

plt.title("Average WAR by Salary (2025)", fontsize=14)
plt.xlabel("WAR")
plt.ylabel("Salary")
plt.tight_layout()
plt.show()

### Analyze pitcher performance per salary


In [None]:
pitchers_stats_2025 = py_baseball.pitcher.stats(start_season=2025, qual=1)
# Used to obtain the player salaries
bwar_keep_cols = ["name_common", "mlb_ID", "year_ID", "team_ID", "salary"]
pitchers_bwar_2025 = py_baseball.pitcher.bref_war()
pitchers_bwar_2025 = pitchers_bwar_2025[bwar_keep_cols]

In [None]:
pitchers_stats_2025 = (
    pitchers_stats_2025
    .merge(
        pitchers_bwar_2025,
        how="left",
        left_on=["Name", "Team", "Season"],
        right_on=["name_common", "team_ID", "year_ID"]
    )
    .drop(
        ["name_common", "year_ID", "team_ID"],
        axis=1
    )
)


# For now assume that na values are league minimum
league_minimum = salary.league_minimum_salaries(2025)
pitchers_stats_2025["salary"] = pitchers_stats_2025["salary"].fillna(np.float64(league_minimum))

In [None]:
player_value_stats = [
    "WAR",
    "W", 
    "SO",
    "Stuff+",
    "Location+",
    "Pitching+"
]

average_pitcher_values = compile_average_player_values(player_value_stats, pitchers_stats_2025)

df = pd.DataFrame(list(average_pitcher_values.items()), columns=["Metric", "Dollar per Stat"])

sns.barplot(
    data=df.sort_values("Dollar per Stat", ascending=False),
    x="Dollar per Stat", y="Metric", palette="viridis"
)

plt.title("Dollar Value per Pitching Statistic (2025)", fontsize=14)
plt.xlabel("Dollars per Stat Unit")
plt.ylabel("")
plt.tight_layout()
plt.show()

In [None]:
salary_by_war = {}

pitchers_stats_2025["WAR_binned"] = pd.cut(pitchers_stats_2025["WAR"], 12)
salary_by_war_grouped_df = pitchers_stats_2025.groupby("WAR_binned")["salary"].mean().fillna(0)

for bin in pitchers_stats_2025["WAR_binned"].unique():
    salary_by_war[bin] = salary_by_war_grouped_df[bin]


df = pd.DataFrame(list(salary_by_war.items()), columns=["Metric", "Salary"])

sns.barplot(
    data=df.sort_values("Metric", ascending=False),
    x="Salary", y="Metric", palette="viridis"
)

plt.title("Average Salary by WAR (2025)", fontsize=14)
plt.xlabel("Average Salary")
plt.ylabel("WAR")
plt.tight_layout()
plt.show()

In [None]:
war_by_salary = {}

pitchers_stats_2025["salary_binned"] = pd.cut(pitchers_stats_2025["salary"], 12)
war_by_salary_grouped_df = pitchers_stats_2025.groupby("salary_binned")["WAR"].mean().fillna(0)

for bin in pitchers_stats_2025["salary_binned"].unique():
    war_by_salary[bin] = war_by_salary_grouped_df[bin]


df = pd.DataFrame(list(war_by_salary.items()), columns=["Salary", "WAR"])

sns.barplot(
    data=df.sort_values("Salary", ascending=False),
    x="WAR", y="Salary", palette="viridis"
)

plt.title("Average WAR by Salary (2025)", fontsize=14)
plt.xlabel("WAR")
plt.ylabel("Salary")
plt.tight_layout()
plt.show()