In [None]:
import pandas as pd
import numpy as np
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import RidgeCV
from sklearn.impute import SimpleImputer

# --- Load files ---
proj = pd.read_excel("/mnt/data/input.xlsx")
train = pd.read_csv("/mnt/data/fangraphs-leaderboards(21).csv")

# --- Normalize training data to per-season ---
train["wins_ps"] = train["wins"] / 3
totals_cols = ['G','PA','HR','R','RBI','SB','BsR','Off','Def','WAR','1B','2B','3B']
for c in totals_cols:
    if c in train.columns:
        train[c] = train[c] / 3

# --- Align feature names between train and projections ---
rename_map = {
    "WAR": "war",
    "ERA": "era",
    "WHIP": "whip",
    "K-BB%": "k-bb",
    "OBP": "obp",
    "R": "runs",
    "SLG": "slg"
}
train_feat = train.rename(columns=rename_map)

features = list(proj.columns)
features.remove("team")

# Train
X_train = train_feat[features]
y_train = train["wins_ps"]

model = Pipeline([
    ("imputer", SimpleImputer(strategy="median")),
    ("scaler", StandardScaler()),
    ("ridge", RidgeCV(alphas=np.logspace(-3,3,100)))
])
model.fit(X_train, y_train)

# Predict
proj_features = proj[features]
proj["proj_wins"] = model.predict(proj_features)
proj_out = proj.sort_values("proj_wins", ascending=False)

from caas_jupyter_tools import display_dataframe_to_user
display_dataframe_to_user("Proj output (your code)", proj_out)

proj_out.head(10)

