# Beach Crowd Prediction — Darts: LSTM & TFT + Optuna Optimization

This notebook compares models across **five dataset strategies**:
1. **Daytime only** — remove night hours (8PM–6AM)
2. **Full 24h** — keep all data including noisy night counts
3. **Night = 0** — keep 24h but replace night counts with 0
4. **NightQ1** — replace night counts with Q1 of daytime counts per beach
5. **NightMin** — replace night counts with min daytime count per beach

Models tested:
- **Sklearn baselines**: Lasso, RandomForest, XGBoost, LightGBM, CatBoost
- **Darts Deep Learning**: LSTM (via BlockRNNModel), TFT (TFTModel)
- **Optuna-optimized**: XGBoost, LightGBM, CatBoost, LSTM

In [None]:
CACHE_DIR = "cache/predictions"
COUNTING_MODEL = "bayesian_vgg19"
SAVE_DIR = "models/darts_comparison"

SAMPLE_FRAC = 1.0
MAX_BEACHES = None

# Darts model parameters
N_EPOCHS = 50
BATCH_SIZE = 64
INPUT_CHUNK_LENGTH = 24
OUTPUT_CHUNK_LENGTH = 12

NIGHT_START = 20
NIGHT_END = 6

OPTUNA_TRIALS = 30
OPTUNA_TIMEOUT = 300

RUN_SKLEARN = True
RUN_DARTS = True
RUN_OPTUNA = True

In [None]:
import subprocess, sys
pkgs = ["darts", "xgboost", "lightgbm", "catboost", "optuna", "utilsforecast"]
for pkg in pkgs:
    subprocess.check_call([sys.executable, "-m", "pip", "install", pkg, "-q"])
print("Packages installed")

In [None]:
import json, time, warnings
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import optuna
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import Lasso
import torch

warnings.filterwarnings('ignore')
optuna.logging.set_verbosity(optuna.logging.WARNING)

try:
    from xgboost import XGBRegressor
    HAS_XGB = True
except: HAS_XGB = False

try:
    from lightgbm import LGBMRegressor
    HAS_LGBM = True
except: HAS_LGBM = False

try:
    from catboost import CatBoostRegressor
    HAS_CATBOOST = True
except: HAS_CATBOOST = False

try:
    from darts import TimeSeries
    from darts.models import TFTModel, BlockRNNModel
    from darts.dataprocessing.transformers import Scaler
    from darts.utils.likelihood_models import QuantileRegression
    HAS_DARTS = True
except Exception as e:
    print(f"Darts error: {e}")
    HAS_DARTS = False

if torch.cuda.is_available():
    ACCELERATOR = 'gpu'
elif hasattr(torch.backends, 'mps') and torch.backends.mps.is_available():
    ACCELERATOR = 'mps'
else:
    ACCELERATOR = 'cpu'

PL_TRAINER_KWARGS = {"accelerator": ACCELERATOR, "devices": 1}

print(f"Accelerator: {ACCELERATOR}")
print(f"XGB: {HAS_XGB}, LGBM: {HAS_LGBM}, CatBoost: {HAS_CATBOOST}, Darts: {HAS_DARTS}")

In [None]:
def calc_metrics(y_true, y_pred, max_count):
    mae = mean_absolute_error(y_true, y_pred)
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    r2 = r2_score(y_true, y_pred)
    rel_mae = (mae / max_count) * 100 if max_count > 0 else 0
    return {'MAE': mae, 'RMSE': rmse, 'R2': r2, 'RelMAE': rel_mae}

def eval_per_beach(df, y_pred, beach_col='unique_id'):
    results = []
    for b in df[beach_col].unique():
        mask = df[beach_col] == b
        if mask.sum() < 3:
            continue
        y_true = df.loc[mask, 'y'].values if 'y' in df.columns else df.loc[mask, 'count'].values
        y_p = y_pred[mask.values] if hasattr(mask, 'values') else y_pred[mask]
        max_count = y_true.max()
        m = calc_metrics(y_true, y_p, max_count)
        m['camera'] = b
        m['max_count'] = max_count
        m['n'] = mask.sum()
        results.append(m)
    return pd.DataFrame(results)

## Load and Prepare Data

In [None]:
def load_cache(cache_dir, model):
    cache_path = Path(cache_dir) / model
    records = []
    for jf in cache_path.rglob("*.json"):
        try:
            with open(jf) as f:
                r = json.load(f)
            if 'error' not in r:
                records.append(r)
        except: pass
    
    rows = []
    for r in records:
        row = {
            'beach': r.get('beach') or r.get('beach_folder'),
            'beach_folder': r.get('beach_folder'),
            'datetime': r.get('datetime'),
            'count': r.get('count')
        }
        for k, v in r.get('weather', {}).items():
            row[k] = v
        rows.append(row)
    
    df = pd.DataFrame(rows)
    df['datetime'] = pd.to_datetime(df['datetime'])
    df = df.sort_values('datetime').reset_index(drop=True)
    return df

df_raw = load_cache(CACHE_DIR, COUNTING_MODEL)
print(f"Loaded: {len(df_raw)} rows, {df_raw['beach'].nunique()} beaches")

In [None]:
EXCLUDE = ['livecampro/001', 'livecampro/011', 'livecampro/018', 'livecampro/021',
    'livecampro/030', 'livecampro/039', 'livecampro/070', 'MultimediaTres/PortAndratx',
    'SeeTheWorld/mallorca_pancam', 'skyline/es-pujols']
EXCLUDE_PREFIX = ['ibred', 'ClubNauticSoller', 'Guenthoer', 'youtube']

before = len(df_raw)
df_raw = df_raw[~df_raw['beach_folder'].isin(EXCLUDE)]
for p in EXCLUDE_PREFIX:
    df_raw = df_raw[~df_raw['beach_folder'].str.startswith(p, na=False)]
print(f"Filtered: {before} -> {len(df_raw)}")

In [None]:
if SAMPLE_FRAC < 1.0:
    df_raw = df_raw.sample(frac=SAMPLE_FRAC, random_state=42).sort_values('datetime').reset_index(drop=True)

if MAX_BEACHES:
    top = df_raw['beach'].value_counts().head(MAX_BEACHES).index.tolist()
    df_raw = df_raw[df_raw['beach'].isin(top)].reset_index(drop=True)

print(f"Final: {len(df_raw)} rows, {df_raw['beach'].nunique()} beaches")

In [None]:
df = df_raw.copy()
df['hour'] = df['datetime'].dt.hour
df['day_of_week'] = df['datetime'].dt.dayofweek
df['month'] = df['datetime'].dt.month
df['is_weekend'] = (df['day_of_week'] >= 5).astype(int)
df['is_summer'] = df['month'].isin([6, 7, 8]).astype(int)
df['is_night'] = ((df['hour'] >= NIGHT_START) | (df['hour'] <= NIGHT_END)).astype(int)

WEATHER_COLS = [c for c in df.columns if c.startswith('ae_') or c.startswith('om_')]
TEMPORAL_COLS = ['hour', 'day_of_week', 'month', 'is_weekend', 'is_summer', 'is_night']
ALL_FEATURES = WEATHER_COLS + TEMPORAL_COLS

df = df.dropna(subset=ALL_FEATURES + ['count']).reset_index(drop=True)
good = df.groupby('beach')['count'].max()
good = good[good > 20].index.tolist()
df = df[df['beach'].isin(good)].reset_index(drop=True)

print(f"After cleaning: {len(df)} rows, {len(good)} beaches")
print(f"Features: {len(ALL_FEATURES)}")

## Create Dataset Strategies

In [None]:
ds_daytime = df[df['is_night'] == 0].copy().reset_index(drop=True)
ds_full24h = df.copy()
ds_night0 = df.copy()
ds_night0.loc[ds_night0['is_night'] == 1, 'count'] = 0.0

ds_nightq1 = df.copy()
q1_per_beach = ds_nightq1[ds_nightq1['is_night'] == 0].groupby('beach')['count'].quantile(0.25)
ds_nightq1.loc[ds_nightq1['is_night'] == 1, 'count'] = ds_nightq1.loc[ds_nightq1['is_night'] == 1, 'beach'].map(q1_per_beach).fillna(0)

ds_nightmin = df.copy()
min_per_beach = ds_nightmin[ds_nightmin['is_night'] == 0].groupby('beach')['count'].min()
ds_nightmin.loc[ds_nightmin['is_night'] == 1, 'count'] = ds_nightmin.loc[ds_nightmin['is_night'] == 1, 'beach'].map(min_per_beach).fillna(0)

datasets = {'Daytime': ds_daytime, 'Full24h': ds_full24h, 'Night0': ds_night0, 'NightQ1': ds_nightq1, 'NightMin': ds_nightmin}

print("=" * 80)
print("DATASET COMPARISON")
print("=" * 80)

for name, d in datasets.items():
    night_rows = d[d['is_night'] == 1] if 'is_night' in d.columns else pd.DataFrame()
    day_rows = d[d['is_night'] == 0] if 'is_night' in d.columns else d
    print(f"\n{name}:")
    print(f"  Total rows: {len(d)}, Beaches: {d['beach'].nunique()}")
    print(f"  Night: {len(night_rows)} ({len(night_rows)/len(d)*100:.1f}%), Day: {len(day_rows)} ({len(day_rows)/len(d)*100:.1f}%)")
    print(f"  Count mean: {d['count'].mean():.1f}, max: {d['count'].max():.1f}, zeros: {(d['count'] == 0).sum()} ({(d['count'] == 0).sum()/len(d)*100:.1f}%)")

In [None]:
def split_data(df, train_frac=0.7, val_frac=0.15):
    n = len(df)
    t1 = int(n * train_frac)
    t2 = int(n * (train_frac + val_frac))
    return df.iloc[:t1], df.iloc[t1:t2], df.iloc[t2:]

splits = {}
for name, d in datasets.items():
    train, val, test = split_data(d)
    splits[name] = {'train': train, 'val': val, 'test': test}
    print(f"{name}: train={len(train)}, val={len(val)}, test={len(test)}")

In [None]:
fig, axes = plt.subplots(len(datasets), 1, figsize=(14, 3 * len(datasets)), sharex=True)

for ax, (ds_name, s) in zip(axes, splits.items()):
    train_df = s['train']
    val_df = s['val']
    test_df = s['test']
    
    ax.scatter(train_df['datetime'], train_df['count'], s=1, alpha=0.3, c='tab:blue', label='Train')
    ax.scatter(val_df['datetime'], val_df['count'], s=1, alpha=0.3, c='tab:orange', label='Validation')
    ax.scatter(test_df['datetime'], test_df['count'], s=1, alpha=0.3, c='tab:red', label='Test')
    
    split1 = val_df['datetime'].min()
    split2 = test_df['datetime'].min()
    ax.axvline(split1, color='black', linestyle='--', linewidth=1.5, label=f'Train/Val split ({split1.date()})')
    ax.axvline(split2, color='black', linestyle='-', linewidth=1.5, label=f'Val/Test split ({split2.date()})')
    
    ax.set_ylabel('Count')
    ax.set_title(f'{ds_name} — Train: {len(train_df)}, Val: {len(val_df)}, Test: {len(test_df)}')
    ax.legend(loc='upper right', markerscale=5, fontsize=8)

axes[-1].set_xlabel('Date')
plt.suptitle('Train / Validation / Test Split per Dataset', fontsize=14, y=1.01)
plt.tight_layout()
plt.show()

## Prepare Datasets with Gap Filling and Interpolation

In [None]:
from utilsforecast.preprocessing import fill_gaps

def to_nf_format(df, id_col='beach_folder'):
    cols = ['datetime', id_col, 'count'] + ALL_FEATURES
    cols = [c for c in cols if c in df.columns]
    nf_df = df[cols].copy()
    nf_df = nf_df.rename(columns={'datetime': 'ds', id_col: 'unique_id', 'count': 'y'})
    return nf_df

def prepare_dataset_with_filled_gaps(train_df, test_df, freq='h'):
    nf_train = to_nf_format(train_df)
    nf_test = to_nf_format(test_df)
    
    nf_train = nf_train.groupby(['unique_id', 'ds']).mean(numeric_only=True).reset_index()
    nf_test = nf_test.groupby(['unique_id', 'ds']).mean(numeric_only=True).reset_index()
    
    nf_train = fill_gaps(nf_train, freq=freq)
    nf_test = fill_gaps(nf_test, freq=freq)
    
    numeric_cols = nf_train.select_dtypes(include=[np.number]).columns.tolist()
    for col in numeric_cols:
        nf_train[col] = nf_train.groupby('unique_id')[col].transform(
            lambda x: x.interpolate(method='linear').ffill().bfill()
        )
        nf_test[col] = nf_test.groupby('unique_id')[col].transform(
            lambda x: x.interpolate(method='linear').ffill().bfill()
        )
    
    common_ids = set(nf_train['unique_id'].unique()) & set(nf_test['unique_id'].unique())
    nf_train = nf_train[nf_train['unique_id'].isin(common_ids)].reset_index(drop=True)
    nf_test = nf_test[nf_test['unique_id'].isin(common_ids)].reset_index(drop=True)
    
    return nf_train, nf_test, list(common_ids)

prepared_data = {}

print("=" * 70)
print("PREPARING DATASETS WITH FILLED GAPS + INTERPOLATION")
print("=" * 70)

for ds_name in datasets.keys():
    print(f"\nProcessing: {ds_name}")
    s = splits[ds_name]
    train_val = pd.concat([s['train'], s['val']])
    
    nf_train, nf_test, series_ids = prepare_dataset_with_filled_gaps(train_val, s['test'])
    
    prepared_data[ds_name] = {
        'train': nf_train,
        'test': nf_test,
        'series_ids': series_ids,
        'n_series': len(series_ids),
    }
    
    print(f"  Train: {len(nf_train)}, Test: {len(nf_test)}, Series: {len(series_ids)}")
    print(f"  NaN check - train: {nf_train['y'].isna().sum()}, test: {nf_test['y'].isna().sum()}")

## Sklearn Baseline Models

In [None]:
all_results = []
all_beach_results = []

if RUN_SKLEARN:
    for ds_name in datasets.keys():
        data = prepared_data[ds_name]
        train_df = data['train']
        test_df = data['test']
        
        feature_cols = [c for c in train_df.columns if c not in ['unique_id', 'ds', 'y']]
        
        X_train = train_df[feature_cols]
        y_train = train_df['y']
        X_test = test_df[feature_cols]
        y_test = test_df['y']
        
        sklearn_models = {
            'Lasso': Lasso(alpha=0.1),
            'RandomForest': RandomForestRegressor(n_estimators=100, max_depth=10, random_state=42, n_jobs=-1),
        }
        if HAS_XGB:
            sklearn_models['XGBoost'] = XGBRegressor(n_estimators=200, max_depth=6, random_state=42, n_jobs=-1, verbosity=0)
        if HAS_LGBM:
            sklearn_models['LightGBM'] = LGBMRegressor(n_estimators=200, max_depth=6, random_state=42, n_jobs=-1, verbose=-1)
        if HAS_CATBOOST:
            sklearn_models['CatBoost'] = CatBoostRegressor(n_estimators=200, max_depth=6, random_state=42, verbose=0)
        
        print(f"\n{'=' * 60}")
        print(f"SKLEARN - {ds_name}")
        print(f"{'=' * 60}")
        print(f"Train: {len(X_train)}, Test: {len(X_test)}, Features: {len(feature_cols)}")
        
        for name, model in sklearn_models.items():
            t0 = time.time()
            model.fit(X_train, y_train)
            y_pred = np.clip(model.predict(X_test), 0, None)
            elapsed = time.time() - t0
            
            m = calc_metrics(y_test.values, y_pred, y_test.max())
            
            eval_df = test_df[['unique_id', 'y']].copy()
            eval_df['count'] = eval_df['y']
            eval_df['beach'] = eval_df['unique_id']
            beach_df = eval_per_beach(eval_df, y_pred, 'beach')
            beach_df['model'] = name
            beach_df['dataset'] = ds_name
            all_beach_results.append(beach_df)
            
            avg_rel = beach_df['RelMAE'].mean()
            all_results.append({
                'Model': name, 'Dataset': ds_name, 'Type': 'Sklearn',
                'MAE': m['MAE'], 'RMSE': m['RMSE'], 'R2': m['R2'],
                'AvgRelMAE': avg_rel, 'Time': elapsed
            })
            print(f"  {name:15s} | {elapsed:5.1f}s | MAE={m['MAE']:.1f} | RelMAE={avg_rel:.1f}% | R2={m['R2']:.3f}")

## Darts Deep Learning Models: LSTM & TFT

| Model | Implementation | Strengths | Best For |
|-------|---------------|-----------|----------|
| **LSTM** | BlockRNNModel | Classic baseline, sequential patterns | Time-dependent features |
| **TFT** | TFTModel | Interpretable attention, covariates support | Feature importance, new beach prediction |

In [None]:
def df_to_darts_series(df, target_col='y', id_col='unique_id', time_col='ds', cov_cols=None):
    """Convert a long-format DataFrame into lists of Darts TimeSeries (target + covariates)."""
    target_series = []
    covariate_series = []
    
    for uid, group in df.groupby(id_col):
        group = group.sort_values(time_col).reset_index(drop=True)
        if len(group) < INPUT_CHUNK_LENGTH + OUTPUT_CHUNK_LENGTH:
            continue
        
        ts = TimeSeries.from_dataframe(
            group, time_col=time_col, value_cols=target_col, freq='h'
        )
        target_series.append(ts)
        
        if cov_cols:
            available = [c for c in cov_cols if c in group.columns]
            cov_ts = TimeSeries.from_dataframe(
                group, time_col=time_col, value_cols=available, freq='h'
            )
            covariate_series.append(cov_ts)
    
    return target_series, covariate_series if cov_cols else None

print(f"Darts conversion function ready. Features: {len(ALL_FEATURES)}")

In [None]:
if RUN_DARTS and HAS_DARTS:
    for ds_name in ['Full24h', 'Night0', 'NightQ1', 'NightMin']:
        nf_train = prepared_data[ds_name]['train']
        nf_test = prepared_data[ds_name]['test']
        
        print(f"\n{'=' * 60}")
        print(f"DARTS - {ds_name}")
        print(f"{'=' * 60}")
        
        # Convert to Darts TimeSeries
        nf_all = pd.concat([nf_train, nf_test]).sort_values(['unique_id', 'ds']).reset_index(drop=True)
        
        train_series, train_covs = df_to_darts_series(nf_train, cov_cols=ALL_FEATURES)
        
        # Build full series (train+test) for prediction
        full_series, full_covs = df_to_darts_series(nf_all, cov_cols=ALL_FEATURES)
        
        # Scale
        scaler_target = Scaler()
        scaler_cov = Scaler()
        train_series_scaled = scaler_target.fit_transform(train_series)
        train_covs_scaled = scaler_cov.fit_transform(train_covs) if train_covs else None
        full_series_scaled = scaler_target.transform(full_series)
        full_covs_scaled = scaler_cov.transform(full_covs) if full_covs else None
        
        # Determine test horizon per series
        test_horizon = nf_test.groupby('unique_id').size().min()
        horizon = min(OUTPUT_CHUNK_LENGTH, test_horizon)
        
        print(f"  Series: {len(train_series)}, Horizon: {horizon}")
        
        # === LSTM (BlockRNNModel) ===
        print(f"\n  LSTM (BlockRNNModel)...")
        try:
            t0 = time.time()
            lstm_model = BlockRNNModel(
                model='LSTM',
                input_chunk_length=INPUT_CHUNK_LENGTH,
                output_chunk_length=horizon,
                hidden_dim=64,
                n_rnn_layers=2,
                dropout=0.1,
                batch_size=BATCH_SIZE,
                n_epochs=N_EPOCHS,
                optimizer_kwargs={'lr': 1e-3},
                random_state=42,
                pl_trainer_kwargs=PL_TRAINER_KWARGS,
            )
            
            lstm_model.fit(
                series=train_series_scaled,
                past_covariates=train_covs_scaled,
                verbose=False,
            )
            
            # Predict on each series
            preds_scaled = lstm_model.predict(
                n=horizon,
                series=full_series_scaled,
                past_covariates=full_covs_scaled,
            )
            preds = scaler_target.inverse_transform(preds_scaled)
            elapsed = time.time() - t0
            
            # Collect predictions vs actuals
            y_true_all, y_pred_all, uids_all = [], [], []
            series_ids = sorted(nf_all['unique_id'].unique())
            
            for i, uid in enumerate(series_ids):
                if i >= len(preds):
                    break
                pred_vals = np.clip(preds[i].values().flatten(), 0, None)
                test_sub = nf_test[nf_test['unique_id'] == uid].sort_values('ds').tail(len(pred_vals))
                actual = test_sub['y'].values[:len(pred_vals)]
                n_match = min(len(actual), len(pred_vals))
                y_true_all.extend(actual[:n_match])
                y_pred_all.extend(pred_vals[:n_match])
                uids_all.extend([uid] * n_match)
            
            y_true_arr = np.array(y_true_all)
            y_pred_arr = np.array(y_pred_all)
            
            if len(y_true_arr) > 0:
                m = calc_metrics(y_true_arr, y_pred_arr, y_true_arr.max())
                eval_df = pd.DataFrame({'unique_id': uids_all, 'y': y_true_arr, 'beach': uids_all})
                beach_df = eval_per_beach(eval_df, y_pred_arr, 'beach')
                avg_rel = beach_df['RelMAE'].mean() if len(beach_df) > 0 else np.nan
                
                all_results.append({
                    'Model': 'LSTM', 'Dataset': ds_name, 'Type': 'Darts',
                    'MAE': m['MAE'], 'RMSE': m['RMSE'], 'R2': m['R2'],
                    'AvgRelMAE': avg_rel, 'Time': elapsed
                })
                print(f"    {elapsed:.1f}s | MAE={m['MAE']:.1f} | RelMAE={avg_rel:.1f}% | R2={m['R2']:.3f}")
            else:
                print("    No valid predictions")
        except Exception as e:
            print(f"    ERROR: {e}")
            import traceback; traceback.print_exc()
        
        # === TFT ===
        print(f"\n  TFT (TFTModel)...")
        try:
            t0 = time.time()
            tft_model = TFTModel(
                input_chunk_length=INPUT_CHUNK_LENGTH,
                output_chunk_length=horizon,
                hidden_size=64,
                lstm_layers=2,
                num_attention_heads=4,
                dropout=0.1,
                batch_size=BATCH_SIZE,
                n_epochs=N_EPOCHS,
                likelihood=QuantileRegression(quantiles=[0.1, 0.25, 0.5, 0.75, 0.9]),
                optimizer_kwargs={'lr': 1e-3},
                random_state=42,
                pl_trainer_kwargs=PL_TRAINER_KWARGS,
            )
            
            tft_model.fit(
                series=train_series_scaled,
                past_covariates=train_covs_scaled,
                verbose=False,
            )
            
            preds_scaled = tft_model.predict(
                n=horizon,
                series=full_series_scaled,
                past_covariates=full_covs_scaled,
                num_samples=50,
            )
            preds = scaler_target.inverse_transform(preds_scaled)
            elapsed = time.time() - t0
            
            y_true_all, y_pred_all, uids_all = [], [], []
            
            for i, uid in enumerate(series_ids):
                if i >= len(preds):
                    break
                pred_vals = np.clip(preds[i].values().mean(axis=2).flatten(), 0, None)
                test_sub = nf_test[nf_test['unique_id'] == uid].sort_values('ds').tail(len(pred_vals))
                actual = test_sub['y'].values[:len(pred_vals)]
                n_match = min(len(actual), len(pred_vals))
                y_true_all.extend(actual[:n_match])
                y_pred_all.extend(pred_vals[:n_match])
                uids_all.extend([uid] * n_match)
            
            y_true_arr = np.array(y_true_all)
            y_pred_arr = np.array(y_pred_all)
            
            if len(y_true_arr) > 0:
                m = calc_metrics(y_true_arr, y_pred_arr, y_true_arr.max())
                eval_df = pd.DataFrame({'unique_id': uids_all, 'y': y_true_arr, 'beach': uids_all})
                beach_df = eval_per_beach(eval_df, y_pred_arr, 'beach')
                avg_rel = beach_df['RelMAE'].mean() if len(beach_df) > 0 else np.nan
                
                all_results.append({
                    'Model': 'TFT', 'Dataset': ds_name, 'Type': 'Darts',
                    'MAE': m['MAE'], 'RMSE': m['RMSE'], 'R2': m['R2'],
                    'AvgRelMAE': avg_rel, 'Time': elapsed
                })
                print(f"    {elapsed:.1f}s | MAE={m['MAE']:.1f} | RelMAE={avg_rel:.1f}% | R2={m['R2']:.3f}")
            else:
                print("    No valid predictions")
        except Exception as e:
            print(f"    ERROR: {e}")
            import traceback; traceback.print_exc()

## TFT Interpretability

One of TFT's key advantages is built-in interpretability via variable selection networks and attention.

In [None]:
if RUN_DARTS and HAS_DARTS:
    try:
        from darts.explainability import TFTExplainer
        
        # Use the last trained TFT model
        explainer = TFTExplainer(
            tft_model,
            background_series=train_series_scaled[0],
            background_past_covariates=train_covs_scaled[0] if train_covs_scaled else None,
        )
        result = explainer.explain()
        
        # Encoder importance
        if hasattr(explainer, '_encoder_importance') and explainer._encoder_importance is not None:
            plt.figure(figsize=(10, 5))
            imp = explainer._encoder_importance.melt().sort_values(by='value').tail(15)
            plt.barh(data=imp, y='variable', width='value')
            plt.xlabel('Importance')
            plt.title('TFT Encoder Variable Importance')
            plt.tight_layout()
            plt.show()
        
        # Attention
        explainer.plot_attention(result, plot_type='all')
        plt.show()
        
        print("TFT interpretability plots generated")
    except Exception as e:
        print(f"TFT interpretability not available: {e}")

## Optuna Hyperparameter Optimization

Optimize on Night0 dataset:
- **XGBoost**: n_estimators, max_depth, learning_rate, subsample, colsample_bytree
- **LightGBM**: n_estimators, num_leaves, learning_rate, feature_fraction, bagging_fraction
- **CatBoost**: iterations, depth, learning_rate, l2_leaf_reg
- **LSTM (Darts)**: hidden_dim, n_rnn_layers, learning_rate, dropout

In [None]:
def calc_avg_rel_mae(y_true, y_pred, groups):
    rel_maes = []
    for g in np.unique(groups):
        mask = groups == g
        if mask.sum() < 3:
            continue
        yt = y_true[mask]
        yp = y_pred[mask]
        max_count = yt.max()
        if max_count > 0:
            rel_maes.append(mean_absolute_error(yt, yp) / max_count * 100)
    return np.mean(rel_maes) if rel_maes else 999.0

def create_xgb_objective(X_train, y_train, X_val, y_val, groups_val):
    def objective(trial):
        params = {
            'n_estimators': trial.suggest_int('n_estimators', 100, 1000),
            'max_depth': trial.suggest_int('max_depth', 3, 10),
            'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3, log=True),
            'subsample': trial.suggest_float('subsample', 0.5, 1.0),
            'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),
            'min_child_weight': trial.suggest_int('min_child_weight', 1, 10),
            'reg_alpha': trial.suggest_float('reg_alpha', 1e-8, 10.0, log=True),
            'reg_lambda': trial.suggest_float('reg_lambda', 1e-8, 10.0, log=True),
            'random_state': 42, 'n_jobs': -1, 'verbosity': 0,
        }
        model = XGBRegressor(**params)
        model.fit(X_train, y_train)
        y_pred = np.clip(model.predict(X_val), 0, None)
        return calc_avg_rel_mae(y_val.values, y_pred, groups_val.values)
    return objective

def create_lgbm_objective(X_train, y_train, X_val, y_val, groups_val):
    def objective(trial):
        params = {
            'n_estimators': trial.suggest_int('n_estimators', 100, 1000),
            'num_leaves': trial.suggest_int('num_leaves', 20, 150),
            'max_depth': trial.suggest_int('max_depth', 3, 12),
            'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3, log=True),
            'feature_fraction': trial.suggest_float('feature_fraction', 0.5, 1.0),
            'bagging_fraction': trial.suggest_float('bagging_fraction', 0.5, 1.0),
            'bagging_freq': trial.suggest_int('bagging_freq', 1, 7),
            'min_child_samples': trial.suggest_int('min_child_samples', 5, 100),
            'random_state': 42, 'n_jobs': -1, 'verbose': -1,
        }
        model = LGBMRegressor(**params)
        model.fit(X_train, y_train)
        y_pred = np.clip(model.predict(X_val), 0, None)
        return calc_avg_rel_mae(y_val.values, y_pred, groups_val.values)
    return objective

def create_catboost_objective(X_train, y_train, X_val, y_val, groups_val):
    def objective(trial):
        params = {
            'iterations': trial.suggest_int('iterations', 100, 1000),
            'depth': trial.suggest_int('depth', 4, 10),
            'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3, log=True),
            'l2_leaf_reg': trial.suggest_float('l2_leaf_reg', 1e-8, 10.0, log=True),
            'bagging_temperature': trial.suggest_float('bagging_temperature', 0.0, 1.0),
            'random_strength': trial.suggest_float('random_strength', 1e-8, 10.0, log=True),
            'border_count': trial.suggest_int('border_count', 32, 255),
            'random_state': 42, 'verbose': 0,
        }
        model = CatBoostRegressor(**params)
        model.fit(X_train, y_train)
        y_pred = np.clip(model.predict(X_val), 0, None)
        return calc_avg_rel_mae(y_val.values, y_pred, groups_val.values)
    return objective

In [None]:
best_models_per_dataset = {}

if RUN_OPTUNA:
    for ds_name in ['Night0']:
        data = prepared_data[ds_name]
        feature_cols = [c for c in data['train'].columns if c not in ['unique_id', 'ds', 'y']]
        
        train_nf = data['train']
        split_idx = int(len(train_nf) * 0.82)
        X_tr = train_nf[feature_cols].iloc[:split_idx]
        y_tr = train_nf['y'].iloc[:split_idx]
        X_va = train_nf[feature_cols].iloc[split_idx:]
        y_va = train_nf['y'].iloc[split_idx:]
        groups_va = train_nf['unique_id'].iloc[split_idx:]
        
        best_models = {}
        
        print(f"\n{'=' * 60}")
        print(f"OPTUNA - {ds_name} (trials={OPTUNA_TRIALS})")
        print(f"{'=' * 60}")
        
        if HAS_XGB:
            print("\n  XGBoost...")
            study = optuna.create_study(direction='minimize')
            study.optimize(create_xgb_objective(X_tr, y_tr, X_va, y_va, groups_va),
                          n_trials=OPTUNA_TRIALS, timeout=OPTUNA_TIMEOUT, show_progress_bar=True)
            best_models['XGBoost_Optuna'] = study.best_params
            print(f"    Best AvgRelMAE: {study.best_value:.2f}%")
        
        if HAS_LGBM:
            print("\n  LightGBM...")
            study = optuna.create_study(direction='minimize')
            study.optimize(create_lgbm_objective(X_tr, y_tr, X_va, y_va, groups_va),
                          n_trials=OPTUNA_TRIALS, timeout=OPTUNA_TIMEOUT, show_progress_bar=True)
            best_models['LightGBM_Optuna'] = study.best_params
            print(f"    Best AvgRelMAE: {study.best_value:.2f}%")
        
        if HAS_CATBOOST:
            print("\n  CatBoost...")
            study = optuna.create_study(direction='minimize')
            study.optimize(create_catboost_objective(X_tr, y_tr, X_va, y_va, groups_va),
                          n_trials=OPTUNA_TRIALS, timeout=OPTUNA_TIMEOUT, show_progress_bar=True)
            best_models['CatBoost_Optuna'] = study.best_params
            print(f"    Best AvgRelMAE: {study.best_value:.2f}%")
        
        best_models_per_dataset[ds_name] = best_models

In [None]:
if RUN_OPTUNA and best_models_per_dataset:
    print("\n" + "=" * 60)
    print("EVALUATING OPTUNA MODELS ACROSS ALL DATASETS")
    print("=" * 60)
    
    for ds_name in datasets.keys():
        data = prepared_data[ds_name]
        feature_cols = [c for c in data['train'].columns if c not in ['unique_id', 'ds', 'y']]
        
        X_train_full = data['train'][feature_cols]
        y_train_full = data['train']['y']
        X_test = data['test'][feature_cols]
        y_test = data['test']['y']
        
        best_models = best_models_per_dataset.get('Night0', {})
        
        print(f"\n  --- {ds_name} ---")
        
        for name, params in best_models.items():
            t0 = time.time()
            
            if 'XGBoost' in name:
                model = XGBRegressor(**params, random_state=42, n_jobs=-1, verbosity=0)
            elif 'LightGBM' in name:
                model = LGBMRegressor(**params, random_state=42, n_jobs=-1, verbose=-1)
            elif 'CatBoost' in name:
                model = CatBoostRegressor(**params, random_state=42, verbose=0)
            else:
                continue
            
            model.fit(X_train_full, y_train_full)
            y_pred = np.clip(model.predict(X_test), 0, None)
            elapsed = time.time() - t0
            
            m = calc_metrics(y_test.values, y_pred, y_test.max())
            
            eval_df = data['test'][['unique_id', 'y']].copy()
            eval_df['count'] = eval_df['y']
            eval_df['beach'] = eval_df['unique_id']
            beach_df = eval_per_beach(eval_df, y_pred, 'beach')
            avg_rel = beach_df['RelMAE'].mean()
            
            all_results.append({
                'Model': name, 'Dataset': ds_name, 'Type': 'Optuna',
                'MAE': m['MAE'], 'RMSE': m['RMSE'], 'R2': m['R2'],
                'AvgRelMAE': avg_rel, 'Time': elapsed
            })
            print(f"    {name:20s} | {elapsed:5.1f}s | MAE={m['MAE']:.1f} | RelMAE={avg_rel:.1f}% | R2={m['R2']:.3f}")

In [None]:
def create_lstm_darts_objective(train_series, train_covs, val_series, val_covs, scaler_t, horizon):
    def objective(trial):
        hidden_dim = trial.suggest_int('hidden_dim', 32, 256)
        n_rnn_layers = trial.suggest_int('n_rnn_layers', 1, 3)
        lr = trial.suggest_float('learning_rate', 1e-4, 1e-2, log=True)
        dropout = trial.suggest_float('dropout', 0.0, 0.5)
        
        model = BlockRNNModel(
            model='LSTM',
            input_chunk_length=INPUT_CHUNK_LENGTH,
            output_chunk_length=horizon,
            hidden_dim=hidden_dim,
            n_rnn_layers=n_rnn_layers,
            dropout=dropout,
            batch_size=BATCH_SIZE,
            n_epochs=20,
            optimizer_kwargs={'lr': lr},
            random_state=42,
            pl_trainer_kwargs=PL_TRAINER_KWARGS,
        )
        
        model.fit(series=train_series, past_covariates=train_covs, verbose=False)
        
        preds_scaled = model.predict(n=horizon, series=train_series, past_covariates=train_covs)
        preds = scaler_t.inverse_transform(preds_scaled)
        
        # Simple MAE across all series
        total_mae = 0
        count = 0
        for i in range(min(len(preds), len(val_series))):
            pred_vals = np.clip(preds[i].values().flatten(), 0, None)
            actual_vals = val_series[i].values().flatten()[:len(pred_vals)]
            n = min(len(pred_vals), len(actual_vals))
            if n > 0:
                total_mae += mean_absolute_error(actual_vals[:n], pred_vals[:n])
                count += 1
        
        return total_mae / max(count, 1)
    return objective

if RUN_OPTUNA and HAS_DARTS:
    print("\n" + "=" * 60)
    print("OPTUNA LSTM (Darts) OPTIMIZATION")
    print("=" * 60)
    
    ds_name = 'Night0'
    nf_tr = prepared_data[ds_name]['train']
    
    # Split train into sub-train and sub-val
    split_idx = int(len(nf_tr) * 0.82)
    nf_tr_sub = nf_tr.iloc[:split_idx]
    nf_va_sub = nf_tr.iloc[split_idx:]
    
    tr_series, tr_covs = df_to_darts_series(nf_tr_sub, cov_cols=ALL_FEATURES)
    va_series, va_covs = df_to_darts_series(nf_va_sub, cov_cols=ALL_FEATURES)
    
    scaler_t_opt = Scaler()
    scaler_c_opt = Scaler()
    tr_series_s = scaler_t_opt.fit_transform(tr_series)
    tr_covs_s = scaler_c_opt.fit_transform(tr_covs) if tr_covs else None
    
    horizon_opt = min(OUTPUT_CHUNK_LENGTH, 12)
    
    print(f"  Optimizing LSTM (horizon={horizon_opt})...")
    study = optuna.create_study(direction='minimize')
    
    try:
        study.optimize(
            create_lstm_darts_objective(tr_series_s, tr_covs_s, va_series, va_covs, scaler_t_opt, horizon_opt),
            n_trials=min(10, OPTUNA_TRIALS),
            timeout=OPTUNA_TIMEOUT,
            show_progress_bar=True
        )
        print(f"    Best MAE: {study.best_value:.2f}")
        print(f"    Best params: {study.best_params}")
        best_models_per_dataset.setdefault('Night0', {})['LSTM_Optuna'] = study.best_params
    except Exception as e:
        print(f"    ERROR: {e}")

In [None]:
lstm_params = best_models_per_dataset.get('Night0', {}).get('LSTM_Optuna')

if RUN_OPTUNA and HAS_DARTS and lstm_params:
    for ds_name in ['Full24h', 'Night0', 'NightQ1', 'NightMin']:
        nf_train = prepared_data[ds_name]['train']
        nf_test = prepared_data[ds_name]['test']
        nf_all = pd.concat([nf_train, nf_test]).sort_values(['unique_id', 'ds']).reset_index(drop=True)
        
        train_s, train_c = df_to_darts_series(nf_train, cov_cols=ALL_FEATURES)
        full_s, full_c = df_to_darts_series(nf_all, cov_cols=ALL_FEATURES)
        
        scaler_t2 = Scaler()
        scaler_c2 = Scaler()
        train_s_sc = scaler_t2.fit_transform(train_s)
        train_c_sc = scaler_c2.fit_transform(train_c) if train_c else None
        full_s_sc = scaler_t2.transform(full_s)
        full_c_sc = scaler_c2.transform(full_c) if full_c else None
        
        horizon = min(OUTPUT_CHUNK_LENGTH, nf_test.groupby('unique_id').size().min())
        
        print(f"\n  Evaluating LSTM_Optuna on {ds_name} (horizon={horizon})...")
        
        t0 = time.time()
        model = BlockRNNModel(
            model='LSTM',
            input_chunk_length=INPUT_CHUNK_LENGTH,
            output_chunk_length=horizon,
            hidden_dim=lstm_params.get('hidden_dim', 64),
            n_rnn_layers=lstm_params.get('n_rnn_layers', 2),
            dropout=lstm_params.get('dropout', 0.1),
            batch_size=BATCH_SIZE,
            n_epochs=N_EPOCHS,
            optimizer_kwargs={'lr': lstm_params.get('learning_rate', 1e-3)},
            random_state=42,
            pl_trainer_kwargs=PL_TRAINER_KWARGS,
        )
        
        model.fit(series=train_s_sc, past_covariates=train_c_sc, verbose=False)
        preds_scaled = model.predict(n=horizon, series=full_s_sc, past_covariates=full_c_sc)
        preds = scaler_t2.inverse_transform(preds_scaled)
        elapsed = time.time() - t0
        
        series_ids = sorted(nf_all['unique_id'].unique())
        y_true_all, y_pred_all, uids_all = [], [], []
        
        for i, uid in enumerate(series_ids):
            if i >= len(preds): break
            pred_vals = np.clip(preds[i].values().flatten(), 0, None)
            test_sub = nf_test[nf_test['unique_id'] == uid].sort_values('ds').tail(len(pred_vals))
            actual = test_sub['y'].values[:len(pred_vals)]
            n_match = min(len(actual), len(pred_vals))
            y_true_all.extend(actual[:n_match])
            y_pred_all.extend(pred_vals[:n_match])
            uids_all.extend([uid] * n_match)
        
        y_true_arr = np.array(y_true_all)
        y_pred_arr = np.array(y_pred_all)
        
        if len(y_true_arr) > 0:
            m = calc_metrics(y_true_arr, y_pred_arr, y_true_arr.max())
            eval_df = pd.DataFrame({'unique_id': uids_all, 'y': y_true_arr, 'beach': uids_all})
            beach_df = eval_per_beach(eval_df, y_pred_arr, 'beach')
            avg_rel = beach_df['RelMAE'].mean()
            
            all_results.append({
                'Model': 'LSTM_Optuna', 'Dataset': ds_name, 'Type': 'Optuna+Darts',
                'MAE': m['MAE'], 'RMSE': m['RMSE'], 'R2': m['R2'],
                'AvgRelMAE': avg_rel, 'Time': elapsed
            })
            print(f"    {elapsed:.1f}s | MAE={m['MAE']:.1f} | RelMAE={avg_rel:.1f}% | R2={m['R2']:.3f}")

## Results

In [None]:
results_df = pd.DataFrame(all_results)
beach_results_df = pd.concat(all_beach_results, ignore_index=True) if all_beach_results else pd.DataFrame()

save_dir = Path(SAVE_DIR)
save_dir.mkdir(parents=True, exist_ok=True)
results_df.to_csv(save_dir / 'results.csv', index=False)
if len(beach_results_df) > 0:
    beach_results_df.to_csv(save_dir / 'beach_results.csv', index=False)

print("\n" + "=" * 70)
print("RESULTS BY DATASET")
print("=" * 70)
for ds in datasets.keys():
    sub = results_df[results_df['Dataset'] == ds].sort_values('AvgRelMAE')
    if len(sub) == 0:
        continue
    print(f"\n{ds}:")
    print(sub[['Model', 'Type', 'MAE', 'R2', 'AvgRelMAE', 'Time']].to_string(index=False))

In [None]:
pivot = results_df.pivot_table(index='Model', columns='Dataset', values='AvgRelMAE')
print("\nRelMAE (%) by Model x Dataset:")
print(pivot.round(1).to_string())

In [None]:
if len(results_df) > 0:
    fig, axes = plt.subplots(1, 2, figsize=(14, 5))
    
    pivot = results_df.pivot_table(index='Model', columns='Dataset', values='AvgRelMAE')
    pivot = pivot.loc[pivot.mean(axis=1).sort_values().index]
    pivot.plot(kind='bar', ax=axes[0], width=0.8)
    axes[0].set_ylabel('Avg RelMAE (%)')
    axes[0].set_title('Model Performance (lower is better)')
    axes[0].legend(title='Dataset')
    axes[0].tick_params(axis='x', rotation=45)
    
    pivot_r2 = results_df.pivot_table(index='Model', columns='Dataset', values='R2')
    pivot_r2 = pivot_r2.loc[pivot_r2.mean(axis=1).sort_values(ascending=False).index]
    pivot_r2.plot(kind='bar', ax=axes[1], width=0.8)
    axes[1].set_ylabel('R²')
    axes[1].set_title('R² Score (higher is better)')
    axes[1].legend(title='Dataset')
    axes[1].tick_params(axis='x', rotation=45)
    
    plt.tight_layout()
    plt.savefig(save_dir / 'comparison.png', dpi=150)
    plt.show()

In [None]:
print("\n" + "=" * 70)
print("BEST MODEL PER DATASET")
print("=" * 70)

for ds in datasets.keys():
    sub = results_df[results_df['Dataset'] == ds].dropna(subset=['AvgRelMAE'])
    if len(sub) == 0:
        continue
    best = sub.loc[sub['AvgRelMAE'].idxmin()]
    print(f"\n{ds}: Best = {best['Model']} ({best['Type']})")
    print(f"  MAE: {best['MAE']:.2f}")
    print(f"  RelMAE: {best['AvgRelMAE']:.1f}%")
    print(f"  R²: {best['R2']:.3f}")
    print(f"  Time: {best['Time']:.1f}s")