In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load your CSV
df = pd.read_csv(
    "/Users/deniz/Projects/grand_tours/results/segment_cv_results_tdf_2024.csv"
)

# Sanitize column names if needed (optional, only if something's off)
df.columns = [col.strip() for col in df.columns]

# 1. Calculate mean cv_rmse and cv_mape by Model
summary = df.groupby("Model")[["cv_rmse", "cv_mape"]].mean().reset_index()
print("Average metrics per model:\n", summary)

# 2. Set seaborn theme
plt.style.use("bmh")

# --- Boxplot for RMSE ---
plt.figure(figsize=(5, 5))
sns.boxplot(data=df, x="Model", y="cv_rmse", palette="pastel")
plt.title("CV RMSE by Model")
plt.xticks(rotation=20)
plt.tight_layout()
plt.show()

# --- Boxplot for MAPE ---
plt.figure(figsize=(5, 5))
sns.boxplot(data=df, x="Model", y="cv_mape", palette="muted")
plt.title("CV MAPE by Model")
plt.xticks(rotation=20)
plt.tight_layout()
plt.show()

# --- RMSE vs MAPE scatter plot ---
plt.figure(figsize=(8, 6))
sns.scatterplot(data=df, x="cv_rmse", y="cv_mape", hue="Model", style="Model", s=100)
plt.title("RMSE vs MAPE by Model")
plt.xlabel("CV RMSE")
plt.ylabel("CV MAPE")
plt.tight_layout()
plt.show()