In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Load data
df = pd.read_csv("/mnt/data/fangraphs-leaderboards(21).csv")

# Normalize wins and multi-year totals to per-season (consistent with earlier work)
df["wins_ps"] = df["wins"] / 3

totals_cols = ['G','PA','HR','R','RBI','SB','BsR','Off','Def','WAR','1B','2B','3B']
for c in totals_cols:
    if c in df.columns:
        df[c] = df[c] / 3

# Keep numeric columns only
numeric_df = df.select_dtypes(include=[np.number])

# Correlation with wins
corr = numeric_df.corr()["wins_ps"].drop("wins_ps")

# Sort by absolute correlation
corr_sorted = corr.reindex(corr.abs().sort_values(ascending=False).index)

# Create heatmap-style plot
plt.figure(figsize=(8, max(10, len(corr_sorted) * 0.35)))
colors = plt.cm.RdYlGn(np.abs(corr_sorted))  # red near 0, green near 1

plt.barh(corr_sorted.index, corr_sorted.values, color=colors)
plt.axvline(0, color="black", linewidth=0.8)

plt.title("Correlation of All Team Metrics with Wins (Per Season)")
plt.xlabel("Correlation Coefficient")
plt.xlim(-1, 1)

# Annotate values
for i, v in enumerate(corr_sorted.values):
    plt.text(v + (0.02 if v >= 0 else -0.08), i, f"{v:.2f}", va="center")

plt.tight_layout()
plt.show()

corr_sorted
