In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from smooth.adam_general.core.adam import ADAM
import numpy as np
import pandas as pd

# Simulation-Based Prediction Intervals - Python Implementation

This notebook tests simulation-based prediction intervals in Python to compare with R.

Key points:
- Read the same test data generated by R
- Use `interval_method='simulation'` in predict()
- Compare lower/upper bounds with R results

**Note**: For 100% identical results, both R and Python must use:
- Same input data (from CSV)
- Same model parameters (may need to fix parameters to match)
- Same random seed for simulation

In [3]:
np.random.seed(33)
n_points = 100
time_series = np.random.normal(100, 10, n_points)
ts_df = pd.DataFrame({'value': time_series}, index=pd.date_range(start='2023-01-01', periods=n_points, freq='ME'))

### Test 1: Simple ETS(A,N,N) with simulation intervals

In [4]:
# Load test data from R
print(f'Loaded {len(ts_df)} data points')
print(ts_df.head())

Loaded 100 data points
                value
2023-01-31  96.811465
2023-02-28  83.970194
2023-03-31  84.647821
2023-04-30  94.295991
2023-05-31  97.832717


In [None]:
# Fit model
model = ADAM(model='ANN', lags=[12], distribution='dnorm',initial = 'optimal')
model.fit(ts_df)

<smooth.adam_general.core.adam.ADAM at 0x7fb6b814b490>

In [18]:
# Forecast with simulation intervals
# Fit model
model = ADAM(model='ANN', lags=[12], distribution='dnorm',initial = 'optimal')
model.fit(ts_df)
result = model.predict(h=12, interval_method='simulation', nsim=1000, level=0.95)

print('\nSimulation-based intervals (nsim=1000):')
print(model.forecast_results)


Simulation-based intervals (nsim=1000):
                  mean  lower_0.025  upper_0.975
2031-05-31  101.133962    81.700083   122.115989
2031-06-30  101.133962    79.901544   121.192665
2031-07-31  101.133962    78.965320   122.152483
2031-08-31  101.133962    81.324012   121.821652
2031-09-30  101.133962    80.791862   122.074475
2031-10-31  101.133962    80.826992   120.908738
2031-11-30  101.133962    81.339751   120.449026
2031-12-31  101.133962    81.890668   123.260999
2032-01-31  101.133962    79.425226   122.628063
2032-02-29  101.133962    80.693998   121.237474
2032-03-31  101.133962    79.433496   122.272683
2032-04-30  101.133962    80.074173   121.787503


In [7]:
# Display intervals more clearly
print('\nForecast mean:')
print(model.forecast_results['mean'].values)
print('\nLower bound (2.5%):')
print(model.forecast_results.filter(like='lower').values.flatten())
print('\nUpper bound (97.5%):')
print(model.forecast_results.filter(like='upper').values.flatten())


Forecast mean:
[101.13396235 101.13396235 101.13396235 101.13396235 101.13396235
 101.13396235 101.13396235 101.13396235 101.13396235 101.13396235
 101.13396235 101.13396235]

Lower bound (2.5%):
[80.54364312 81.24451166 80.0583313  79.81495681 79.24122507 80.87737694
 79.75791187 80.08659174 81.16583103 78.53624936 79.44947269 79.68876081]

Upper bound (97.5%):
[120.25488094 120.80267511 123.41248825 122.99071597 122.77666094
 121.93767108 122.37962218 122.46062654 123.53748445 121.73181497
 121.20814579 122.03669221]


### Test 2: ETS(A,A,N) with trend

In [8]:
# Forecast with simulation intervals
np.random.seed(123)
result = model.predict(h=12, interval_method='simulation', nsim=1000, level=0.95)

print('\nSimulation-based intervals:')
print('Mean:', model.forecast_results['mean'].values[:3])
print('Lower:', model.forecast_results.filter(like='lower').values.flatten()[:3])
print('Upper:', model.forecast_results.filter(like='upper').values.flatten()[:3])


Simulation-based intervals:
Mean: [101.13396235 101.13396235 101.13396235]
Lower: [81.30035971 79.81609321 79.84154047]
Upper: [122.36801058 123.64301707 121.38747254]


### Test 3: Seasonal ETS(A,N,A)

In [9]:
# Fit model
model = ADAM(model='ANA', lags=[12], distribution='dnorm', frequency='ME')
model.fit(ts_df)

print('ETS(A,N,A) Model fitted')
print('Alpha:', model.adam_estimated['B'][0])
print('Gamma:', model.adam_estimated['B'][1])

ETS(A,N,A) Model fitted
Alpha: 0.05539950418213001
Gamma: 0.001123442536020658


In [10]:
# Forecast with simulation intervals
np.random.seed(123)
result = model.predict(h=12, interval_method='simulation', nsim=1000, level=0.95)

print('\nSimulation-based intervals:')
print('Mean:', model.forecast_results['mean'].values[:3])
print('Lower:', model.forecast_results.filter(like='lower').values.flatten()[:3])
print('Upper:', model.forecast_results.filter(like='upper').values.flatten()[:3])


Simulation-based intervals:
Mean: [102.92650143 109.37646958  97.44564223]
Lower: [81.96626894 87.54769609 75.50703035]
Upper: [124.75467098 130.94484463 120.29987763]


### Test 4: Multiplicative ETS(M,A,M)

In [11]:
# Forecast with simulation intervals
np.random.seed(123)
result = model.predict(h=12, interval_method='simulation', nsim=1000, level=0.95)

print('\nSimulation-based intervals:')
print('Mean:', model.forecast_results['mean'].values[:3])
print('Lower:', model.forecast_results.filter(like='lower').values.flatten()[:3])
print('Upper:', model.forecast_results.filter(like='upper').values.flatten()[:3])


Simulation-based intervals:
Mean: [102.92650143 109.37646958  97.44564223]
Lower: [80.32421041 86.41120903 75.94873407]
Upper: [124.20149378 131.60395618 119.33725996]


### Test 5: Compare parametric vs simulation intervals

In [12]:
# Use the ANN data

model = ADAM(model='ANN', lags=[1], distribution='dnorm', frequency='ME')
model.fit(ts_df)

<smooth.adam_general.core.adam.ADAM at 0x7fb6b8149ff0>

In [13]:
# Parametric intervals
result_param = model.predict(h=12, interval_method='parametric', level=0.95)
print('Parametric intervals:')
print('Lower:', model.forecast_results.filter(like='lower').values.flatten()[:3])
print('Upper:', model.forecast_results.filter(like='upper').values.flatten()[:3])

Parametric intervals:
Lower: [80.55160478 80.54516092 80.53871908]
Upper: [121.71631992 121.72276378 121.72920562]


In [14]:
# Simulation intervals
np.random.seed(123)
result_sim = model.predict(h=12, interval_method='simulation', nsim=10000, level=0.95)
print('\nSimulation intervals (nsim=10000):')
print('Lower:', model.forecast_results.filter(like='lower').values.flatten()[:3])
print('Upper:', model.forecast_results.filter(like='upper').values.flatten()[:3])


Simulation intervals (nsim=10000):
Lower: [79.97205152 80.47694899 80.8528514 ]
Upper: [122.06693303 121.89472713 122.18570721]


### Test 6: Different nsim values

In [15]:
# Test with different nsim values
model = ADAM(model='ANN', lags=[1], distribution='dnorm', frequency='ME')
model.fit(ts_df)

for nsim_val in [100, 500, 1000, 5000]:
    np.random.seed(123)
    model.predict(h=5, interval_method='simulation', nsim=nsim_val, level=0.95)
    lower = model.forecast_results.filter(like='lower').values.flatten()[0]
    upper = model.forecast_results.filter(like='upper').values.flatten()[0]
    print(f'nsim={nsim_val}: Lower={lower:.2f}, Upper={upper:.2f}')

nsim=100: Lower=81.96, Upper=120.07
nsim=500: Lower=79.79, Upper=122.48
nsim=1000: Lower=80.39, Upper=122.74
nsim=5000: Lower=79.77, Upper=122.48


## Summary

Python simulation intervals work by:
1. Generating random errors from the fitted distribution
2. Running multiple simulation paths through the state-space model (via C++ adam_simulator)
3. Taking quantiles of the simulated values

Key parameters:
- `interval_method='simulation'` in predict()
- `nsim` controls number of simulations (default 10000)
- `level` sets the confidence level

### Comparison Notes

To get 100% identical results with R:
1. Use the same input data (CSV files)
2. Ensure model parameters match (may need to fix some parameters)
3. Random number generation differs between R and Python - exact matches require using the same error matrix