In [7]:
import pandas as pd
import numpy as np

BASE_STATS = ["G", "PO", "A", "E", "DP", "PB"]
DERIVED_STATS = ["FP", "RF"]
STATS = BASE_STATS + DERIVED_STATS
SOURCE_FILES = {"MLB": "mlb_fielding.csv","KBO": "kbo_fielding.csv","NPB": "npb_fielding.csv"}

def div(n, d):
    return np.where(d != 0, n / d, np.nan)

def keep_id_cols(cols):
    id_like = {c for c in cols if c.lower().endswith("id")}
    extras = {"playerID", "yearID", "lgID", "teamID", "POS", "stint"}
    return [c for c in cols if c in id_like or c in extras]

def harmonize(df):
    df = df.copy()
    if "FPCT" in df.columns:
        df = df.rename(columns={"FPCT": "FP"})
        
    for c in BASE_STATS:
        df[c] = df.get(c, 0)
        
    df["FP"] = div(df["PO"] + df["A"], df["PO"] + df["A"] + df["E"])
    df["RF"] = div(df["PO"] + df["A"], df["G"])
    df[["FP", "RF"]] = df[["FP", "RF"]].round(3)
    final_cols = list(dict.fromkeys(keep_id_cols(df.columns) + STATS))
    
    return df[final_cols]


In [None]:
for lg, path in SOURCE_FILES.items():
    raw = pd.read_csv(path)
    tidy = harmonize(raw)
    tidy.to_csv(f"{lg.lower()}_fielding2.csv", index=False, na_rep="")