 # Time Series Experiment Runner



 This notebook compares multiple forecasting approaches:

 - **Baselines:** Naive, Moving Average

 - **ML Models:** Random Forest, XGBoost

 - **Deep Learning:** LSTM, GRU

 - **Strategies:** Direct vs Recursive forecasting

In [5]:
import tensorflow as tf
print(f"TensorFlow version: {tf.__version__}")
print(f"GPU available: {tf.config.list_physical_devices('GPU')}")

TensorFlow version: 2.19.0
GPU available: [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [6]:
# === IMPORTS ===
import pandas as pd
import numpy as np
import os
from datetime import datetime
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.preprocessing import StandardScaler
from xgboost import XGBRegressor
import warnings
warnings.filterwarnings('ignore')

# Deep Learning imports
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, GRU, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
tf.get_logger().setLevel('ERROR')  # Suppress TF warnings

# Progress bar (falls back to simple print if not installed)
try:
    from tqdm.notebook import tqdm
    HAS_TQDM = True
except ImportError:
    HAS_TQDM = False
    def tqdm(iterable, desc="", total=None):
        for i, item in enumerate(iterable):
            print(f"  {desc} [{i+1}/{total or '?'}]", end='\r')
            yield item
        print()

print("Imports complete")


Imports complete


 ## 1. Configuration

In [7]:
# === CONFIGURATION ===
# Modify these to adjust your experiments

TARGET = 'log_price'
HORIZONS = [1, 3, 6, 12]
SEQ_LENGTHS = [4, 6, 12]
TRAIN_END_YEAR = 2019

# Deep learning settings
DL_EPOCHS = 50
DL_BATCH_SIZE = 32
DL_PATIENCE = 10  # Early stopping patience

CHECKPOINT_FILE = 'experiment_checkpoint.csv'

print(f"Configuration:")
print(f"  Target: {TARGET}")
print(f"  Horizons: {HORIZONS}")
print(f"  Sequence lengths: {SEQ_LENGTHS}")
print(f"  Train end year: {TRAIN_END_YEAR}")

train_df = pd.read_csv("tsa_train.csv")
test_df = pd.read_csv("tsa_test.csv")
train_df['split'] = 'train'
test_df['split'] = 'test'
full_df = pd.concat([train_df, test_df], ignore_index=True)
full_df = full_df.sort_values(['region', 'period_begin']).reset_index(drop=True)

Configuration:
  Target: log_price
  Horizons: [1, 3, 6, 12]
  Sequence lengths: [4, 6, 12]
  Train end year: 2019


 ## 2. Helper Functions

In [8]:
# === CORE HELPERS ===

def log(msg):
    """Print with timestamp."""
    print(f"[{datetime.now().strftime('%H:%M:%S')}] {msg}")

def evaluate(y_true, y_pred):
    """Calculate all metrics."""
    return {
        'rmse': np.sqrt(mean_squared_error(y_true, y_pred)),
        'mae': mean_absolute_error(y_true, y_pred),
        'r2': r2_score(y_true, y_pred)
    }

def create_features_for_horizon(df, target_col, seq_length, horizon):
    """Create lag/trend features for time series forecasting."""
    feature_dfs = []
    for zip_code in df['region'].unique():
        zip_df = df[df['region'] == zip_code].sort_values('period_begin').copy()
        if len(zip_df) < seq_length + horizon:
            continue

        # Lag features
        for lag in range(1, seq_length + 1):
            shift_amount = lag + horizon - 1
            zip_df[f'lag_{lag}'] = zip_df[target_col].shift(shift_amount)

        # Trend features (log-spaced)
        n_trends = min(int(np.ceil(np.log2(seq_length))), 5)
        trend_points = np.unique(np.geomspace(2, seq_length, n_trends, dtype=int))
        for n in trend_points:
            if n <= seq_length:
                zip_df[f'trend_{n}'] = zip_df['lag_1'] - zip_df[f'lag_{n}']

        # Derived features
        if seq_length >= 3:
            zip_df['momentum'] = zip_df['lag_1'] - 2*zip_df['lag_2'] + zip_df['lag_3']
        else:
            zip_df['momentum'] = zip_df['lag_1'] - zip_df['lag_2']
        zip_df['volatility'] = zip_df[target_col].rolling(window=seq_length).std().shift(horizon)
        zip_df['rolling_mean'] = zip_df[target_col].rolling(window=seq_length).mean().shift(horizon)

        zip_df['target'] = zip_df[target_col]
        feature_dfs.append(zip_df)

    if not feature_dfs:
        return None
    return pd.concat(feature_dfs, ignore_index=True).dropna().reset_index(drop=True)

def get_feature_columns(df):
    """Return only engineered feature columns."""
    lag_cols = sorted([c for c in df.columns if c.startswith('lag_')],
                      key=lambda x: int(x.split('_')[1]))
    trend_cols = sorted([c for c in df.columns if c.startswith('trend_')],
                        key=lambda x: int(x.split('_')[1]))
    derived_cols = [c for c in ['momentum', 'volatility', 'rolling_mean'] if c in df.columns]
    return lag_cols + trend_cols + derived_cols

def get_lag_columns(df, seq_length):
    """Return only lag columns (for LSTM sequence input)."""
    return [f'lag_{i}' for i in range(1, seq_length + 1)]

def train_test_split_temporal(df, train_end_year):
    """Split by year (temporal)."""
    train = df[df['year'] <= train_end_year].copy()
    test = df[df['year'] > train_end_year].copy()
    return train, test

print("Core helpers defined")


Core helpers defined


 ## 3. Model Definitions

In [9]:
# === BASELINE MODELS ===

def naive_forecast(test_df):
    """Naive: predict lag_1 (most recent known value)."""
    return test_df['lag_1'].values

def moving_avg_forecast(test_df, seq_length):
    """Moving average of all lags."""
    lag_cols = [f'lag_{i}' for i in range(1, seq_length + 1)]
    return test_df[lag_cols].mean(axis=1).values

# === ML MODELS ===

def get_ml_models():
    """Return dict of ML model_name -> model_instance."""
    return {
        'RF_default': RandomForestRegressor(n_estimators=100, random_state=42, n_jobs=-1),
        'RF_tuned': RandomForestRegressor(n_estimators=200, max_depth=10, random_state=42, n_jobs=-1),
        'XGB_default': XGBRegressor(n_estimators=100, random_state=42, verbosity=0),
        'XGB_tuned': XGBRegressor(n_estimators=200, learning_rate=0.05, max_depth=6, random_state=42, verbosity=0),
    }

# === DEEP LEARNING MODELS ===

def build_lstm_small(seq_length, n_features=1):
    """LSTM: 32 units, 1 layer."""
    model = Sequential([
        LSTM(32, input_shape=(seq_length, n_features)),
        Dense(1)
    ])
    model.compile(optimizer='adam', loss='mse')
    return model

def build_lstm_medium(seq_length, n_features=1):
    """LSTM: 64 units, 2 layers."""
    model = Sequential([
        LSTM(64, return_sequences=True, input_shape=(seq_length, n_features)),
        LSTM(32),
        Dense(1)
    ])
    model.compile(optimizer='adam', loss='mse')
    return model

def build_lstm_large(seq_length, n_features=1):
    """LSTM: 128 units, 2 layers + dropout."""
    model = Sequential([
        LSTM(128, return_sequences=True, input_shape=(seq_length, n_features)),
        Dropout(0.2),
        LSTM(64),
        Dropout(0.2),
        Dense(1)
    ])
    model.compile(optimizer='adam', loss='mse')
    return model

def build_gru_small(seq_length, n_features=1):
    """GRU: 32 units, 1 layer."""
    model = Sequential([
        GRU(32, input_shape=(seq_length, n_features)),
        Dense(1)
    ])
    model.compile(optimizer='adam', loss='mse')
    return model

def build_gru_medium(seq_length, n_features=1):
    """GRU: 64 units, 2 layers."""
    model = Sequential([
        GRU(64, return_sequences=True, input_shape=(seq_length, n_features)),
        GRU(32),
        Dense(1)
    ])
    model.compile(optimizer='adam', loss='mse')
    return model

def get_dl_models():
    """Return dict of DL model builders."""
    return {
        'LSTM_small': build_lstm_small,
        'LSTM_medium': build_lstm_medium,
        'LSTM_large': build_lstm_large,
        'GRU_small': build_gru_small,
        'GRU_medium': build_gru_medium,
    }

print("Model definitions complete")


Model definitions complete


## 4. Checkpoint Functions

In [10]:
def load_checkpoint():
    if os.path.exists(CHECKPOINT_FILE):
        df = pd.read_csv(CHECKPOINT_FILE)
        print(f"Loaded checkpoint: {len(df)} experiments complete")
        return df.to_dict('records')
    return []

def save_checkpoint(results):
    pd.DataFrame(results).to_csv(CHECKPOINT_FILE, index=False)

def experiment_exists(results, model, horizon, seq_length):
    for r in results:
        if r['model'] == model and r['horizon'] == horizon and r['seq_length'] == seq_length:
            return True
    return False

print("Checkpoint functions defined")

Checkpoint functions defined


 ## 6. Main Experiment Runner

In [11]:
def run_all_experiments(full_df, run_baselines=True, run_ml=True, run_dl=True):
    """Run all experiments with checkpointing."""

    results = load_checkpoint()
    starting_count = len(results)

    n_configs = len(HORIZONS) * len(SEQ_LENGTHS)
    n_models = (2 if run_baselines else 0) + (len(get_ml_models()) if run_ml else 0) + (len(get_dl_models()) if run_dl else 0)
    total = n_configs * n_models

    log(f"Starting experiments ({starting_count} already complete, {total - starting_count} remaining)")
    print("=" * 60)

    exp_count = starting_count

    for horizon in HORIZONS:
        for seq_length in SEQ_LENGTHS:
            print(f"\n{'='*60}")
            log(f"HORIZON={horizon}, SEQ_LENGTH={seq_length}")
            print("=" * 60)

            df_features = create_features_for_horizon(full_df, TARGET, seq_length, horizon)
            if df_features is None:
                log("‚ö†Ô∏è  SKIP: Not enough data")
                continue

            train_df, test_df = train_test_split_temporal(df_features, TRAIN_END_YEAR)
            feature_cols = get_feature_columns(train_df)
            lag_cols = get_lag_columns(train_df, seq_length)

            X_train = train_df[feature_cols].values
            y_train = train_df['target'].values
            X_test = test_df[feature_cols].values
            y_test = test_df['target'].values

            log(f"Train: {len(train_df):,} | Test: {len(test_df):,} | Features: {len(feature_cols)}")

            # --- BASELINES ---
            if run_baselines:
                for name, pred_fn in [('Naive', lambda: naive_forecast(test_df)),
                                       ('MovingAvg', lambda: moving_avg_forecast(test_df, seq_length))]:
                    if experiment_exists(results, name, horizon, seq_length):
                        continue
                    y_pred = pred_fn()
                    metrics = evaluate(y_test, y_pred)
                    results.append({
                        'strategy': 'direct', 'model': name, 'horizon': horizon,
                        'seq_length': seq_length, **metrics, 'n_train': len(train_df), 'n_test': len(test_df)
                    })
                    save_checkpoint(results)
                    exp_count += 1
                    print(f"  ‚úì [{exp_count}/{total}] {name:<15} RMSE={metrics['rmse']:.4f}  R¬≤={metrics['r2']:.4f}")

            # --- ML MODELS ---
            if run_ml:
                for name, model in get_ml_models().items():
                    if experiment_exists(results, name, horizon, seq_length):
                        continue
                    model.fit(X_train, y_train)
                    y_pred = model.predict(X_test)
                    metrics = evaluate(y_test, y_pred)
                    results.append({
                        'strategy': 'direct', 'model': name, 'horizon': horizon,
                        'seq_length': seq_length, **metrics, 'n_train': len(train_df), 'n_test': len(test_df)
                    })
                    save_checkpoint(results)
                    exp_count += 1
                    print(f"  ‚úì [{exp_count}/{total}] {name:<15} RMSE={metrics['rmse']:.4f}  R¬≤={metrics['r2']:.4f}")

            # --- DEEP LEARNING ---
            if run_dl:
                X_train_seq = train_df[lag_cols].values.reshape(-1, seq_length, 1)
                X_test_seq = test_df[lag_cols].values.reshape(-1, seq_length, 1)

                scaler = StandardScaler()
                X_train_scaled = scaler.fit_transform(X_train_seq.reshape(-1, seq_length)).reshape(-1, seq_length, 1)
                X_test_scaled = scaler.transform(X_test_seq.reshape(-1, seq_length)).reshape(-1, seq_length, 1)

                early_stop = EarlyStopping(monitor='val_loss', patience=DL_PATIENCE, restore_best_weights=True, verbose=0)

                for name, build_fn in get_dl_models().items():
                    if experiment_exists(results, name, horizon, seq_length):
                        continue

                    print(f"  ‚è≥ [{exp_count+1}/{total}] {name:<15} training...", end='\r')
                    model = build_fn(seq_length, n_features=1)
                    model.fit(X_train_scaled, y_train, validation_split=0.2,
                              epochs=DL_EPOCHS, batch_size=DL_BATCH_SIZE,
                              callbacks=[early_stop], verbose=0)
                    y_pred = model.predict(X_test_scaled, verbose=0).flatten()
                    metrics = evaluate(y_test, y_pred)
                    results.append({
                        'strategy': 'direct', 'model': name, 'horizon': horizon,
                        'seq_length': seq_length, **metrics, 'n_train': len(train_df), 'n_test': len(test_df)
                    })
                    save_checkpoint(results)
                    exp_count += 1
                    print(f"  ‚úì [{exp_count}/{total}] {name:<15} RMSE={metrics['rmse']:.4f}  R¬≤={metrics['r2']:.4f}")

                    tf.keras.backend.clear_session()

    print(f"\n{'='*60}")
    log(f"‚úÖ COMPLETE! {len(results)} experiments")
    print("=" * 60)

    return pd.DataFrame(results)

print("‚úÖ Experiment runner ready")

‚úÖ Experiment runner ready


 ## 7. Run Experiments



 Adjust flags to run subsets:

 - `run_baselines=True` - Naive, Moving Average

 - `run_ml=True` - Random Forest, XGBoost

 - `run_dl=True` - LSTM, GRU (slower)

 - `run_recursive=True` - Compare direct vs recursive

In [12]:
# === RUN ===
# Adjust flags to control what runs

results_df = run_all_experiments(
    full_df,
    run_baselines=True,
    run_ml=True,
    run_dl=True
)

[03:54:51] Starting experiments (0 already complete, 132 remaining)

[03:54:51] HORIZON=1, SEQ_LENGTH=4
[03:54:52] Train: 8,638 | Test: 2,443 | Features: 9
  ‚úì [1/132] Naive           RMSE=0.1694  R¬≤=0.8330
  ‚úì [2/132] MovingAvg       RMSE=0.1856  R¬≤=0.7995
  ‚úì [3/132] RF_default      RMSE=0.1734  R¬≤=0.8250
  ‚úì [4/132] RF_tuned        RMSE=0.1711  R¬≤=0.8295
  ‚úì [5/132] XGB_default     RMSE=0.1877  R¬≤=0.7949
  ‚úì [6/132] XGB_tuned       RMSE=0.1788  R¬≤=0.8140
  ‚úì [7/132] LSTM_small      RMSE=0.1636  R¬≤=0.8442
  ‚úì [8/132] LSTM_medium     RMSE=0.1622  R¬≤=0.8469
  ‚úì [9/132] LSTM_large      RMSE=0.2048  R¬≤=0.7559
  ‚úì [10/132] GRU_small       RMSE=0.1651  R¬≤=0.8414
  ‚úì [11/132] GRU_medium      RMSE=0.1736  R¬≤=0.8245

[04:00:07] HORIZON=1, SEQ_LENGTH=6
[04:00:07] Train: 8,412 | Test: 2,443 | Features: 12
  ‚úì [12/132] Naive           RMSE=0.1694  R¬≤=0.8330
  ‚úì [13/132] MovingAvg       RMSE=0.1925  R¬≤=0.7844
  ‚úì [14/132] RF_default      RMSE=0.1738  R¬≤=0

 ## 8. Analyze Results

In [13]:
# === VIEW RESULTS ===

print("\nüìä ALL RESULTS")
display(results_df.sort_values(['horizon', 'rmse']))



üìä ALL RESULTS


Unnamed: 0,strategy,model,horizon,seq_length,rmse,mae,r2,n_train,n_test
20,direct,GRU_small,1,6,0.159101,0.082617,0.852656,8412,2443
29,direct,LSTM_medium,1,12,0.159607,0.086902,0.851716,7734,2443
31,direct,GRU_small,1,12,0.159686,0.092313,0.851570,7734,2443
7,direct,LSTM_medium,1,4,0.162191,0.083719,0.846877,8638,2443
27,direct,XGB_tuned,1,12,0.162647,0.087254,0.846014,7734,2443
...,...,...,...,...,...,...,...,...,...
107,direct,LSTM_large,12,4,0.294237,0.238711,0.496055,7395,2443
99,direct,Naive,12,4,0.303918,0.212045,0.462346,7395,2443
110,direct,Naive,12,6,0.303918,0.212045,0.462346,7169,2443
121,direct,Naive,12,12,0.303918,0.212045,0.462346,6491,2443


In [14]:
# === BEST MODEL BY HORIZON ===

print("\nüèÜ BEST MODEL BY HORIZON")
print("=" * 60)

for horizon in HORIZONS:
    subset = results_df[results_df['horizon'] == horizon]
    if len(subset) == 0:
        continue
    best = subset.loc[subset['rmse'].idxmin()]
    print(f"\nHorizon {horizon}:")
    print(f"  Best: {best['model']} (seq={best['seq_length']})")
    print(f"  RMSE: {best['rmse']:.4f} | MAE: {best['mae']:.4f} | R¬≤: {best['r2']:.4f}")


üèÜ BEST MODEL BY HORIZON

Horizon 1:
  Best: GRU_small (seq=6)
  RMSE: 0.1591 | MAE: 0.0826 | R¬≤: 0.8527

Horizon 3:
  Best: GRU_small (seq=12)
  RMSE: 0.2034 | MAE: 0.1266 | R¬≤: 0.7591

Horizon 6:
  Best: RF_tuned (seq=12)
  RMSE: 0.2198 | MAE: 0.1463 | R¬≤: 0.7188

Horizon 12:
  Best: RF_tuned (seq=12)
  RMSE: 0.2327 | MAE: 0.1603 | R¬≤: 0.6848


In [15]:
# === MODEL CATEGORY COMPARISON ===
print("\nüìà MODEL CATEGORY COMPARISON")
print("=" * 60)

def categorize_model(name):
    if name in ['Naive', 'MovingAvg']:
        return 'Baseline'
    elif name in ['ARIMA', 'ExpSmoothing']:
        return 'Statistical'
    elif name.startswith(('RF', 'XGB')):
        return 'ML'
    else:
        return 'DL'

results_df['category'] = results_df['model'].apply(categorize_model)

for horizon in HORIZONS:
    print(f"\nHorizon {horizon}:")
    subset = results_df[results_df['horizon'] == horizon]

    for cat in ['Baseline', 'Statistical', 'ML', 'DL']:
        cat_subset = subset[subset['category'] == cat]
        if len(cat_subset) == 0:
            continue
        best = cat_subset.loc[cat_subset['rmse'].idxmin()]
        print(f"  {cat:<12} {best['model']:<15} RMSE={best['rmse']:.4f}")


üìà MODEL CATEGORY COMPARISON

Horizon 1:
  Baseline     Naive           RMSE=0.1694
  ML           XGB_tuned       RMSE=0.1626
  DL           GRU_small       RMSE=0.1591

Horizon 3:
  Baseline     MovingAvg       RMSE=0.2232
  ML           RF_tuned        RMSE=0.2146
  DL           GRU_small       RMSE=0.2034

Horizon 6:
  Baseline     MovingAvg       RMSE=0.2407
  ML           RF_tuned        RMSE=0.2198
  DL           GRU_medium      RMSE=0.2257

Horizon 12:
  Baseline     MovingAvg       RMSE=0.2814
  ML           RF_tuned        RMSE=0.2327
  DL           LSTM_small      RMSE=0.2353


 ## 9. Save Results

In [16]:
# === SAVE ===
results_df.to_csv('experiment_results.csv', index=False)
print("‚úÖ Results saved to experiment_results.csv")

# Summary table
summary = results_df.groupby('horizon').apply(
    lambda x: x.loc[x['rmse'].idxmin()][['model', 'seq_length', 'rmse', 'mae', 'r2']]
).reset_index()
print("\nüìã SUMMARY: Best model per horizon")
display(summary)

‚úÖ Results saved to experiment_results.csv

üìã SUMMARY: Best model per horizon


Unnamed: 0,horizon,model,seq_length,rmse,mae,r2
0,1,GRU_small,6,0.159101,0.082617,0.852656
1,3,GRU_small,12,0.20344,0.126581,0.759086
2,6,RF_tuned,12,0.219784,0.146263,0.718822
3,12,RF_tuned,12,0.232713,0.160254,0.684768
