In [1]:
import numpy as np
import pandas as pd

%load_ext rpy2.ipython

import rpy2.robjects as ro
from rpy2.robjects.packages import importr
import os

def load_smooth_dev():
    """Load the smooth package in development mode"""
    smooth_path = "/home/filtheo/smooth/"
    
    ro.r(f'''
    if (!requireNamespace("devtools", quietly=TRUE)) {{
        install.packages("devtools", repos="https://cran.rstudio.com/")
    }}
    devtools::load_all("{smooth_path}")
    ''')
    
    print("Smooth package loaded in development mode")

load_smooth_dev()

ℹ Loading smooth


R[write to console]: Loading required package: greybox

R[write to console]: Package "greybox", v2.0.3 loaded.


R[write to console]: This is package "smooth", v4.3.2.41001


R[write to console]: In addition: 

R[write to console]: In (function (package, help, pos = 2, lib.loc = NULL, character.only = FALSE,  :
R[write to console]: 
 
R[write to console]:  library ‘/usr/lib/R/site-library’ contains no packages



Smooth package loaded in development mode


# Simulation-Based Prediction Intervals - R Implementation

This notebook tests simulation-based prediction intervals in R to compare with Python.

Key points:
- Generate test data and save to CSV for Python comparison
- Use `interval="simulated"` in forecast()
- Compare lower/upper bounds with Python results

### Test 1: Simple ETS(A,N,N) with simulation intervals

In [2]:
np.random.seed(33)
n_points = 100
time_series = np.random.normal(100, 10, n_points)
ts_df = pd.DataFrame({'value': time_series}, index=pd.date_range(start='2023-01-01', periods=n_points, freq='ME'))

In [3]:
%%R -i ts_df
set.seed(33)  # Set seed for reproducible simulation

model <- adam(ts_df, model = "ANN", lags = c(12), initial = 'optimal')
cat('ETS(A,N,N) with optimal initial:\n')
cat('Parameters:', model$B, '\n')
forecast(model, h = 12)


ETS(A,N,N) with optimal initial:
Parameters: 0.02502502 98.50918 
Time Series:
Start = 101 
End = 112 
Frequency = 1 
 [1] 101.134 101.134 101.134 101.134 101.134 101.134 101.134 101.134 101.134
[10] 101.134 101.134 101.134


In [4]:
%%R -i ts_df

set.seed(33)  # Set seed for reproducible simulation
model <- adam(ts_df, model = "ANN", lags = c(12), initial = 'optimal')
cat('ETS(A,N,N) Model fitted\n')
cat('Alpha:', model$persistence, '\n')
cat('Scale:', model$B, '\n')

# Forecast with simulation intervals
fc <- forecast(model, h = 12, interval = 'simulated', nsim = 1000, level = 0.95)
cat('\nSimulation-based intervals (nsim=1000):\n')
print(fc)

ETS(A,N,N) Model fitted
Alpha: 0.02502502 
Scale: 0.02502502 98.50918 

Simulation-based intervals (nsim=1000):
Time Series:
Start = 101 
End = 112 
Frequency = 1 
    Point forecast Lower bound (2.5%) Upper bound (97.5%)
101        101.134           81.09449            121.9199
102        101.134           79.80241            121.4456
103        101.134           79.50804            121.4794
104        101.134           81.90750            119.9924
105        101.134           79.18010            120.9231
106        101.134           79.64952            122.2891
107        101.134           81.24092            121.0802
108        101.134           81.14436            120.9785
109        101.134           79.07400            120.7668
110        101.134           78.46660            122.0875
111        101.134           80.87832            121.5000
112        101.134           82.68913            122.6287


In [5]:
%%R

# Extract and display the intervals more clearly
cat('\nForecast mean:\n')
print(as.vector(fc$mean))
cat('\nLower bound (2.5%):\n')
print(as.vector(fc$lower))
cat('\nUpper bound (97.5%):\n')
print(as.vector(fc$upper))


Forecast mean:
 [1] 101.134 101.134 101.134 101.134 101.134 101.134 101.134 101.134 101.134
[10] 101.134 101.134 101.134

Lower bound (2.5%):
 [1] 81.09449 79.80241 79.50804 81.90750 79.18010 79.64952 81.24092 81.14436
 [9] 79.07400 78.46660 80.87832 82.68913

Upper bound (97.5%):
 [1] 121.9199 121.4456 121.4794 119.9924 120.9231 122.2891 121.0802 120.9785
 [9] 120.7668 122.0875 121.5000 122.6287


### Test 2: ETS(A,A,N) with trend

In [6]:
# Generate trending data
np.random.seed(42)
n_points = 120
trend = np.linspace(100, 150, n_points)
noise = np.random.normal(0, 5, n_points)
time_series = trend + noise
ts_df = pd.DataFrame({'value': time_series})

# Save for Python
ts_df.to_csv('/home/filtheo/smooth/python/smooth/adam_general/tests/simulation_intervals/test_data_aan.csv', index=False)
print('Test data saved to test_data_aan.csv')

Test data saved to test_data_aan.csv


In [7]:
%%R -i ts_df

set.seed(123)
model <- adam(ts_df, model = "AAN", lags = c(1), distribution = 'dnorm')
cat('ETS(A,A,N) Model fitted\n')
cat('Alpha:', model$persistence[1], '\n')
cat('Beta:', model$persistence[2], '\n')

# Forecast with simulation intervals
fc <- forecast(model, h = 12, interval = 'simulated', nsim = 1000, level = 0.95)
cat('\nSimulation-based intervals:\n')
cat('Mean:', as.vector(fc$mean)[1:3], '\n')
cat('Lower:', as.vector(fc$lower)[1:3], '\n')
cat('Upper:', as.vector(fc$upper)[1:3], '\n')

ETS(A,A,N) Model fitted
Alpha: 0 
Beta: 0 

Simulation-based intervals:
Mean: 150.5764 151.005 151.4336 
Lower: 141.0581 141.82 

142.3377 
Upper: 160.2055 160.2713 160.6993 


### Test 3: Seasonal ETS(A,N,A)

In [8]:
# Generate seasonal data
np.random.seed(42)
n_points = 120
seasonal = 20 * np.sin(2 * np.pi * np.arange(n_points) / 12)
noise = np.random.normal(0, 5, n_points)
time_series = 100 + seasonal + noise
ts_df = pd.DataFrame({'value': time_series})

# Save for Python
ts_df.to_csv('/home/filtheo/smooth/python/smooth/adam_general/tests/simulation_intervals/test_data_ana.csv', index=False)
print('Test data saved to test_data_ana.csv')

Test data saved to test_data_ana.csv


In [9]:
%%R -i ts_df

set.seed(123)
model <- adam(ts_df, model = "ANA", lags = c(12), distribution = 'dnorm')
cat('ETS(A,N,A) Model fitted\n')
cat('Alpha:', model$persistence[1], '\n')
cat('Gamma:', model$persistence[2], '\n')

# Forecast with simulation intervals
fc <- forecast(model, h = 12, interval = 'simulated', nsim = 1000, level = 0.95)
cat('\nSimulation-based intervals:\n')
cat('Mean:', as.vector(fc$mean)[1:3], '\n')
cat('Lower:', as.vector(fc$lower)[1:3], '\n')
cat('Upper:', as.vector(fc$upper)[1:3], '\n')

ETS(A,N,A) Model fitted
Alpha: 0 
Gamma: 0.07005792 



Simulation-based intervals:
Mean: 100.0263 107.7195 113.5396 
Lower: 90.80256 98.81879 104.7252 
Upper: 109.3575 116.6991 122.5186 


### Test 4: Multiplicative ETS(M,A,M)

In [10]:
# Generate multiplicative seasonal data
np.random.seed(42)
n_points = 120
trend = np.linspace(100, 150, n_points)
seasonal = 1 + 0.2 * np.sin(2 * np.pi * np.arange(n_points) / 12)
noise = np.random.normal(1, 0.05, n_points)
time_series = trend * seasonal * noise
ts_df = pd.DataFrame({'value': time_series})

# Save for Python
ts_df.to_csv('/home/filtheo/smooth/python/smooth/adam_general/tests/simulation_intervals/test_data_mam.csv', index=False)
print('Test data saved to test_data_mam.csv')

Test data saved to test_data_mam.csv


In [11]:
%%R -i ts_df

set.seed(123)
model <- adam(ts_df, model = "MAM", lags = c(12), distribution = 'dnorm')
cat('ETS(M,A,M) Model fitted\n')
cat('Alpha:', model$persistence[1], '\n')
cat('Beta:', model$persistence[2], '\n')
cat('Gamma:', model$persistence[3], '\n')

# Forecast with simulation intervals
fc <- forecast(model, h = 12, interval = 'simulated', nsim = 1000, level = 0.95)
cat('\nSimulation-based intervals:\n')
cat('Mean:', as.vector(fc$mean)[1:3], '\n')
cat('Lower:', as.vector(fc$lower)[1:3], '\n')
cat('Upper:', as.vector(fc$upper)[1:3], '\n')

ETS(M,A,M) Model fitted
Alpha: 0.01311173 
Beta: 0.000496196 
Gamma: 0.06976944 

Simulation-based intervals:
Mean: 150.7887 162.5222 171.1362 
Lower: 136.5161 147.6459 155.7597 
Upper: 165.2276 177.4271 187.1197 


### Test 5: Compare parametric vs simulation intervals

In [12]:
%%R -i ts_df

# Use the ANN data
ts_ann <- read.csv('/home/filtheo/smooth/python/smooth/adam_general/tests/simulation_intervals/test_data_ann.csv')

model <- adam(ts_ann, model = "ANN", lags = c(1), distribution = 'dnorm')

# Parametric intervals
fc_param <- forecast(model, h = 12, interval = 'approximate', level = 0.95)
cat('Parametric intervals:\n')
cat('Lower:', as.vector(fc_param$lower)[1:3], '\n')
cat('Upper:', as.vector(fc_param$upper)[1:3], '\n\n')

# Simulation intervals
set.seed(123)
fc_sim <- forecast(model, h = 12, interval = 'simulated', nsim = 10000, level = 0.95)
cat('Simulation intervals (nsim=10000):\n')
cat('Lower:', as.vector(fc_sim$lower)[1:3], '\n')
cat('Upper:', as.vector(fc_sim$upper)[1:3], '\n')

Error in file(file, "rt") : cannot open the connection
In file(file, "rt") :
  cannot open file '/home/filtheo/smooth/python/smooth/adam_general/tests/simulation_intervals/test_data_ann.csv': No such file or directory
Error in file(file, "rt") : cannot open the connection


RInterpreterError: Failed to parse and evaluate line '\n# Use the ANN data\nts_ann <- read.csv(\'/home/filtheo/smooth/python/smooth/adam_general/tests/simulation_intervals/test_data_ann.csv\')\n\nmodel <- adam(ts_ann, model = "ANN", lags = c(1), distribution = \'dnorm\')\n\n# Parametric intervals\nfc_param <- forecast(model, h = 12, interval = \'approximate\', level = 0.95)\ncat(\'Parametric intervals:\\n\')\ncat(\'Lower:\', as.vector(fc_param$lower)[1:3], \'\\n\')\ncat(\'Upper:\', as.vector(fc_param$upper)[1:3], \'\\n\\n\')\n\n# Simulation intervals\nset.seed(123)\nfc_sim <- forecast(model, h = 12, interval = \'simulated\', nsim = 10000, level = 0.95)\ncat(\'Simulation intervals (nsim=10000):\\n\')\ncat(\'Lower:\', as.vector(fc_sim$lower)[1:3], \'\\n\')\ncat(\'Upper:\', as.vector(fc_sim$upper)[1:3], \'\\n\')\n'.
R error message: 'Error in file(file, "rt") : cannot open the connection'
R stdout:
Error in file(file, "rt") : cannot open the connection
In addition: Warning message:
In file(file, "rt") :
  cannot open file '/home/filtheo/smooth/python/smooth/adam_general/tests/simulation_intervals/test_data_ann.csv': No such file or directory

### Test 6: Different nsim values

In [None]:
%%R

# Test with different nsim values
ts_ann <- read.csv('/home/filtheo/smooth/python/smooth/adam_general/tests/simulation_intervals/test_data_ann.csv')
model <- adam(ts_ann, model = "ANN", lags = c(1), distribution = 'dnorm')

for (nsim_val in c(100, 500, 1000, 5000)) {
    set.seed(123)
    fc <- forecast(model, h = 5, interval = 'simulated', nsim = nsim_val, level = 0.95)
    cat(sprintf('nsim=%d: Lower=%.2f, Upper=%.2f\n', 
                nsim_val, as.vector(fc$lower)[1], as.vector(fc$upper)[1]))
}

## Summary

R simulation intervals work by:
1. Generating random errors from the fitted distribution
2. Running multiple simulation paths through the state-space model
3. Taking quantiles of the simulated values

Key parameters:
- `interval = 'simulated'` in forecast()
- `nsim` controls number of simulations (default 10000)
- `level` sets the confidence level

Test data has been saved to CSV files for Python comparison.