In [None]:
import sys, subprocess

print("Using interpreter:", sys.executable)

subprocess.check_call([sys.executable, "-m", "pip", "install", "pandas", "numpy"])


In [None]:
import pandas as pd
import numpy as np

def compare_to_official(
    measured_csv,
    park_name,
    official_csv="mlb_wall_theta_distances_2025.csv",
    year=2025,
    # columns in your measured CSV:
    measured_angle_col="Angle_logical_deg",
    measured_dist_col="Distance_ft",
    # columns in the official CSV:
    official_angle_col="wall_theta",
    official_dist_col="wall_distance",
    official_venue_col="venue",
    official_year_col="year",
    # output file:
    output_csv=None
):
    # ---- Load your measured data ----
    df_meas = pd.read_csv(measured_csv)

    # Keep angle + distance, rename to common names
    df_meas = df_meas[[measured_angle_col, measured_dist_col]].rename(
        columns={measured_angle_col: "angle", measured_dist_col: "dist_measured"}
    )

    # ---- Load official data & filter by park/year ----
    df_off = pd.read_csv(official_csv)

    df_off = df_off[
        (df_off[official_venue_col] == park_name) &
        (df_off[official_year_col] == year)
    ][[official_angle_col, official_dist_col]]

    df_off = df_off.rename(
        columns={official_angle_col: "angle", official_dist_col: "dist_official"}
    )

    if df_off.empty:
        raise ValueError(f"No official rows found for park '{park_name}' and year {year}.")

    # ---- Merge on angle ----
    comp = pd.merge(df_meas, df_off, on="angle", how="inner").sort_values("angle")

    if comp.empty:
        raise ValueError("No overlapping angles between measured CSV and official data.")

    # ---- Error calculations ----
    comp["error_ft"] = comp["dist_measured"] - comp["dist_official"]
    comp["abs_error_ft"] = comp["error_ft"].abs()
    comp["pct_error"] = comp["abs_error_ft"] / comp["dist_official"] * 100.0

    # ---- Summary stats ----
    summary = {
        "num_angles_compared": int(len(comp)),
        "mean_error_ft": float(comp["error_ft"].mean()),
        "mean_abs_error_ft": float(comp["abs_error_ft"].mean()),
        "max_abs_error_ft": float(comp["abs_error_ft"].max()),
        "mean_pct_error": float(comp["pct_error"].mean()),
        "max_pct_error": float(comp["pct_error"].max()),
    }

    print("Comparison summary")
    print("------------------")
    print(f"Angles compared      : {summary['num_angles_compared']}")
    print(f"Mean error (ft)      : {summary['mean_error_ft']:.2f}")
    print(f"Mean |error| (ft)    : {summary['mean_abs_error_ft']:.2f}")
    print(f"Max |error| (ft)     : {summary['max_abs_error_ft']:.2f}")
    print(f"Mean % error         : {summary['mean_pct_error']:.2f}%")
    print(f"Max % error          : {summary['max_pct_error']:.2f}%")

    # ---- Write comparison CSV ----
    if output_csv is None:
        safe_name = park_name.replace(" ", "_")
        output_csv = f"{safe_name}_error_comparison.csv"

    comp.to_csv(output_csv, index=False)
    print(f"\nFull comparison written to: {output_csv}")

    return comp, summary
