In [None]:
import sys
from pathlib import Path
from datetime import datetime
from typing import Dict, List

import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

project_root = Path.cwd().parents[1]
if str(project_root) not in sys.path:
    sys.path.insert(0, str(project_root))

from config import RESULTS_DIR, LISTINGS_GOLD
from elferspot_listings.utils.helpers import load_data

PREDICTIONS_DIR = RESULTS_DIR / 'model_predictions'
MODELS = ['catboost', 'ridge', 'elasticnet']
pd.options.display.float_format = '{:,.2f}'.format

def latest_prediction_path(model_name: str) -> Path:
    pattern = f"{model_name}_predictions_*.xlsx"
    files = sorted(PREDICTIONS_DIR.glob(pattern))
    if not files:
        raise FileNotFoundError(
            f"No prediction exports found for {model_name}. Run notebooks in 04_modeling/ first."
        )
    return files[-1]

def load_predictions(model_name: str) -> pd.DataFrame:
    file_path = latest_prediction_path(model_name)
    df = pd.read_excel(file_path, sheet_name='all_results')
    df['model'] = model_name
    df['prediction_file'] = file_path.name
    return df

## Load prediction exports
Bring in the latest `model_predictions/*.xlsx` artifacts for CatBoost, Ridge, and ElasticNet.

In [None]:
model_dfs: Dict[str, pd.DataFrame] = {}
for model_name in MODELS:
    try:
        model_dfs[model_name] = load_predictions(model_name)
        print(f"Loaded {len(model_dfs[model_name]):,} rows for {model_name} from {model_dfs[model_name]['prediction_file'].iloc[0]}")
    except FileNotFoundError as exc:
        print(exc)
        raise

combined_df = pd.concat(model_dfs.values(), ignore_index=True)
combined_df.head()

## Compare model performance
Aggregate MAE/RMSE/R² per model on the shared hold-out set.

In [None]:
metrics_rows = []
for model_name, df in model_dfs.items():
    residuals = df['price_in_eur'] - df['pred_price']
    mae = residuals.abs().mean()
    rmse = np.sqrt(np.mean(np.square(residuals)))
    ss_res = np.sum(np.square(residuals))
    ss_tot = np.sum(np.square(df['price_in_eur'] - df['price_in_eur'].mean()))
    r2 = 1 - ss_res / ss_tot
    coverage = (
        ((df['price_in_eur'] >= df['pred_lower']) & (df['price_in_eur'] <= df['pred_upper']))
        .mean()
    )
    metrics_rows.append({
        'model': model_name,
        'mae_eur': mae,
        'rmse_eur': rmse,
        'r2': r2,
        'interval_coverage': coverage,
        'sample_size': len(df),
    })

metrics_df = pd.DataFrame(metrics_rows).sort_values('rmse_eur')
metrics_df

## Residual diagnostics
Visualise actual vs predicted prices and residual spreads per model.

In [None]:
combined_df['residual_price'] = combined_df['price_in_eur'] - combined_df['pred_price']

fig_scatter = px.scatter(
    combined_df,
    x='price_in_eur',
    y='pred_price',
    color='model',
    hover_name='Title' if 'Title' in combined_df.columns else None,
    hover_data=['Model', 'Series', 'Car location'],
    title='Actual vs Predicted Price by Model',
    opacity=0.65,
    trendline='ols',
    labels={'price_in_eur': 'Actual Price (EUR)', 'pred_price': 'Predicted Price (EUR)'}
)
fig_scatter.add_shape(type='line', x0=combined_df['price_in_eur'].min(), x1=combined_df['price_in_eur'].max(), y0=combined_df['price_in_eur'].min(), y1=combined_df['price_in_eur'].max(), line=dict(color='black', dash='dash'))
fig_scatter.show()

fig_box = px.box(
    combined_df,
    x='model',
    y='residual_price',
    title='Residual distribution (EUR)',
    points='suspectedoutliers',
    labels={'residual_price': 'Residual (Actual - Pred, EUR)'},
    color='model'
)
fig_box.show()

## Under/over-valued watchlist
Flag listings whose actual price breaches the model prediction intervals.

In [None]:
underpriced = combined_df[combined_df['price_in_eur'] < combined_df['pred_lower']].copy()
overpriced = combined_df[combined_df['price_in_eur'] > combined_df['pred_upper']].copy()

def summarize_watchlist(df: pd.DataFrame, label: str, n: int = 10) -> pd.DataFrame:
    cols = ['model', 'price_in_eur', 'pred_price', 'pred_lower', 'pred_upper', 'residual_price', 'Title', 'Model', 'Series', 'Car location']
    cols = [c for c in cols if c in df.columns]
    summary = df.sort_values('residual_price').head(n) if label == 'Underpriced' else df.sort_values('residual_price', ascending=False).head(n)
    summary = summary[cols].copy()
    summary['price_delta_pct'] = summary['residual_price'] / summary['pred_price']
    summary['label'] = label
    return summary

underpriced_summary = summarize_watchlist(underpriced, 'Underpriced')
overpriced_summary = summarize_watchlist(overpriced, 'Overpriced')
display(underpriced_summary)
display(overpriced_summary)

## Notes & next steps
- CatBoost typically tops RMSE, but Ridge/ElasticNet provide interpretable baselines—promote whichever satisfies coverage targets.
- Exported watchlists can be pushed to CRM or dashboard (see `app/streamlit_app.py`).
- Re-run notebooks in `04_modeling/` before this analysis whenever Gold data refreshes.