In [None]:
import sys
from pathlib import Path
from datetime import datetime

import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

project_root = Path.cwd().parents[1]
if str(project_root) not in sys.path:
    sys.path.insert(0, str(project_root))

from config import LISTINGS_GOLD, RESULTS_DIR
from elferspot_listings.utils.helpers import load_data, ensure_dir

PREDICTIONS_DIR = RESULTS_DIR / 'model_predictions'
results_dir = ensure_dir(RESULTS_DIR / 'analysis_outputs')
report_path = results_dir / f"market_snapshot_{datetime.now().strftime('%Y%m%d_%H%M%S')}.xlsx"

def latest_prediction_file(model_name: str = 'catboost') -> Path:
    files = sorted(PREDICTIONS_DIR.glob(f"{model_name}_predictions_*.xlsx"))
    if not files:
        raise FileNotFoundError(f"Run 04_modeling/{model_name} notebook to create prediction exports.")
    return files[-1]

In [None]:
df_gold = load_data(LISTINGS_GOLD)
print(f"Loaded Gold dataset with {len(df_gold):,} rows")

catboost_file = latest_prediction_file('catboost')
df_preds = pd.read_excel(catboost_file, sheet_name='all_results')
print(f"Loaded CatBoost predictions from {catboost_file.name}")
df_preds.head()

## KPI summary
High-level metrics for executive reporting.

In [None]:
kpis = {
    'Listings': len(df_gold),
    'Median price (EUR)': df_gold['price_in_eur'].median(),
    'Average price (EUR)': df_gold['price_in_eur'].mean(),
    'Median mileage (km)': df_gold['Mileage_km'].median(),
    'Classic share (<1998)': (df_gold['Year of construction'] < 1998).mean(),
    'PTS share': df_gold.get('Paint-to-Sample (PTS)', pd.Series(dtype=float)).mean(),
}

kpi_df = pd.DataFrame.from_dict(kpis, orient='index', columns=['value'])
kpi_df

## Price vs. mileage by model category

In [None]:
if {'price_in_eur', 'Mileage_km', 'model_category'}.issubset(df_gold.columns):
    fig_price_mileage = px.scatter(
        df_gold,
        x='Mileage_km',
        y='price_in_eur',
        color='model_category',
        hover_name='Title' if 'Title' in df_gold.columns else None,
        title='Price vs Mileage by Model Category',
        labels={'Mileage_km': 'Mileage (km)', 'price_in_eur': 'Price (EUR)'},
        opacity=0.65,
        marginal_x='histogram',
        marginal_y='histogram',
    )
    fig_price_mileage.update_layout(legend_title_text='Model category')
    fig_price_mileage.show()
else:
    print("Required columns missing for scatter plot.")

## Under/over-valued segments
Use CatBoost prediction intervals to find notable opportunities by model category.

In [None]:
required_cols = {'pred_price', 'pred_lower', 'pred_upper', 'price_in_eur'}
if required_cols.issubset(df_preds.columns):
    df_preds['residual'] = df_preds['price_in_eur'] - df_preds['pred_price']
    df_preds['underpriced_flag'] = df_preds['price_in_eur'] < df_preds['pred_lower']
    df_preds['overpriced_flag'] = df_preds['price_in_eur'] > df_preds['pred_upper']

    summary_cols = ['model_category', 'Series', 'Model']
    available_summary = [col for col in summary_cols if col in df_preds.columns]

    agg = df_preds.groupby(available_summary).agg({
        'residual': ['mean', 'median'],
        'underpriced_flag': 'mean',
        'overpriced_flag': 'mean',
        'price_in_eur': ['count', 'median']
    })
    agg.columns = ['residual_mean', 'residual_median', 'underpriced_ratio', 'overpriced_ratio', 'listing_count', 'median_price']
    opportunity_df = agg.sort_values('underpriced_ratio', ascending=False).head(15)
    opportunity_df.reset_index(inplace=True)
    opportunity_df
else:
    print("Prediction columns not available; rerun CatBoost notebook to refresh exports.")

## Export curated report
Send KPIs, opportunity table, and raw prediction intervals to Excel for sharing.

In [None]:
summary_tables = {
    'kpi_summary': kpi_df.reset_index().rename(columns={'index': 'metric'}),
    'opportunities': opportunity_df if 'opportunity_df' in locals() else pd.DataFrame(),
    'catboost_predictions': df_preds
}

with pd.ExcelWriter(report_path) as writer:
    for sheet_name, table in summary_tables.items():
        table.to_excel(writer, sheet_name=sheet_name[:30], index=False)

print(f"âœ“ Market report exported to {report_path}")