# 02 — Explore results and what-ifs

This exploratory notebook evaluates the trained model's behaviour, visualises distributional trends, and applies scenario analyses with confidence intervals.

In [None]:
import pandas as pd
import numpy as np
from pathlib import Path
from sklearn.compose import ColumnTransformer
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder, StandardScaler

## Load marts

In [None]:
DATA_DIR = Path('data/marts')
FACT_DAY_PATH = DATA_DIR / 'sydney_day_samples.csv'
SCENARIO_PATH = DATA_DIR / 'what_if_scenarios.csv'
df = pd.read_csv(FACT_DAY_PATH, parse_dates=['date'])
scenarios = pd.read_csv(SCENARIO_PATH)
df.head()

## Daily trends

In [None]:
daily_summary = (
    df.assign(weekday=df['weekday'].astype('category'))
      .groupby('weekday')
      [['mood_score', 'weather_temp_c', 'beach_time_hours', 'commute_minutes']]
      .mean()
      .sort_index()
)
daily_summary

The averages show how mood aligns with the typical weekday rhythm.

## Refit the pipeline for inference

In [None]:
features = ['weekday', 'weather_temp_c', 'harbour_visits', 'beach_time_hours', 'commute_minutes', 'cultural_events']
target = 'mood_score'
X = df[features]
y = df[target]
preprocessor = ColumnTransformer(
    transformers=[
        ('cat', OneHotEncoder(handle_unknown='ignore'), ['weekday']),
        ('num', StandardScaler(), ['weather_temp_c', 'harbour_visits', 'beach_time_hours', 'commute_minutes', 'cultural_events']),
    ]
)
model = Pipeline(steps=[('prep', preprocessor), ('regressor', LinearRegression())])
model.fit(X, y)

## Confidence intervals for recent days

In [None]:
def bootstrap_interval(pipeline, features, target, samples=300, alpha=0.05, random_state=13):
    rng = np.random.default_rng(random_state)
    preds = []
    for _ in range(samples):
        idx = rng.integers(0, len(features), len(features))
        X_s = features.iloc[idx]
        y_s = target.iloc[idx]
        pipeline.fit(X_s, y_s)
        preds.append(pipeline.predict(features))
    preds = np.vstack(preds)
    point = pipeline.fit(features, target).predict(features)
    lower = np.percentile(preds, 100 * alpha / 2, axis=0)
    upper = np.percentile(preds, 100 * (1 - alpha / 2), axis=0)
    return point, lower, upper

recent = df.tail(7).copy()
point, lower, upper = bootstrap_interval(model, X, y)
recent['prediction'] = point[-7:]
recent['lower_ci'] = lower[-7:]
recent['upper_ci'] = upper[-7:]
recent[['date', 'weekday', 'mood_score', 'prediction', 'lower_ci', 'upper_ci']]

## Scenario-based sensitivity analysis

In [None]:
baseline = df[features].mean(numeric_only=True).to_dict()
baseline['weekday'] = df['weekday'].mode().iat[0]
baseline_df = pd.DataFrame([baseline])
baseline_prediction = model.predict(baseline_df)[0]
scenario_rows = []
for _, row in scenarios.iterrows():
    scenario_features = baseline_df.copy()
    scenario_features['weather_temp_c'] += row['delta_weather_temp_c']
    scenario_features['beach_time_hours'] += row['delta_beach_time_hours']
    scenario_features['commute_minutes'] += row['delta_commute_minutes']
    scenario_features['cultural_events'] += row['delta_cultural_events']
    prediction = model.predict(scenario_features)[0]
    scenario_rows.append({
        'scenario': row['scenario'],
        'description': row['description'],
        'predicted_mood': prediction,
        'delta_vs_baseline': prediction - baseline_prediction
    })
scenario_results = pd.DataFrame(scenario_rows)
scenario_results

Scenario deltas provide an actionable way to communicate how changes in weather, leisure, or commute patterns influence predicted mood.