In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from smooth.adam_general.core.adam import ADAM
import numpy as np
import pandas as pd

# Two-Stage Initialization Tests

Two-stage initialization works by:
1. First running a model with `initial="complete"` (full backcasting) to get good starting values
2. Using those values as initial guesses for optimization, allowing parameter refinement

This should produce results that are:
- Different from pure backcasting (since parameters are refined)
- Different from pure optimal (since starting values are better)
- Generally better or similar quality to both methods

### Test 1: Global level ETS(A,N,N) - two-stage vs optimal vs backcasting

In [3]:
np.random.seed(33)
n_points = 100
time_series = np.random.normal(100, 10, n_points)
ts_df = pd.DataFrame({'value': time_series}, index=pd.date_range(start='2023-01-01', periods=n_points, freq='ME'))

In [4]:
model_optimal = ADAM(model='ANN', lags=[12], initial='optimal')
model_optimal.fit(ts_df)
print('ETS(A,N,N) with optimal initial:')
print('Parameters:', model_optimal.adam_estimated['B'])
print('Forecast:', model_optimal.predict(h=12)['mean'].values[:3])

ETS(A,N,N) with optimal initial:
Parameters: [2.50250224e-02 9.85091808e+01]
Forecast: [101.13396235 101.13396235 101.13396235]


In [5]:
model_backcasting = ADAM(model='ANN', lags=[12], initial='backcasting', n_iterations=2)
model_backcasting.fit(ts_df)
print('ETS(A,N,N) with backcasting (n_iterations=2):')
print('Parameters:', model_backcasting.adam_estimated['B'])
print('Forecast:', model_backcasting.predict(h=12)['mean'].values[:3])

ETS(A,N,N) with backcasting (n_iterations=2):
Parameters: [0.]
Forecast: [99.61910829 99.61910829 99.61910829]


In [6]:
model_two_stage = ADAM(model='ANN', lags=[12], initial='complete', n_iterations=2)
model_two_stage.fit(ts_df)
print('ETS(A,N,N) with complete initialization:')
print('Parameters:', model_two_stage.adam_estimated['B'])
print('Forecast:', model_two_stage.predict(h=12)['mean'].values[:3])

ETS(A,N,N) with complete initialization:
Parameters: [0.]
Forecast: [99.61910829 99.61910829 99.61910829]


In [7]:
model_two_stage = ADAM(model='ANN', lags=[12], initial='two-stage', n_iterations=2)
model_two_stage.fit(ts_df)
print('ETS(A,N,N) with two-stage initialization:')
print('Parameters:', model_two_stage.adam_estimated['B'])
print('Forecast:', model_two_stage.predict(h=12)['mean'].values[:3])

ETS(A,N,N) with two-stage initialization:
Parameters: [ 0.         99.61910829]
Forecast: [99.61910829 99.61910829 99.61910829]


### Test 2: Local trend ETS(A,A,N)

In [8]:
np.random.seed(42)
n_points = 120
errors = np.random.normal(0, 10, n_points)
trend = np.random.normal(0.5, 2, n_points)
time_series = np.zeros(n_points)
time_series[0] = 100
for i in range(n_points-1):
    time_series[i+1] = time_series[i] + (0.1-1) * errors[i] + trend[i] + errors[i+1]
ts_df = pd.DataFrame({'value': time_series}, index=pd.date_range(start='2023-01-01', periods=n_points, freq='ME'))

In [9]:
model_optimal = ADAM(model='AAN', lags=[12], initial='optimal')
model_optimal.fit(ts_df)
print('ETS(A,A,N) with optimal initial:')
print('Parameters:', model_optimal.adam_estimated['B'])
print('Forecast:', model_optimal.predict(h=12)['mean'].values[:3])

ETS(A,A,N) with optimal initial:
Parameters: [ 0.19788966  0.         99.80698153  0.5372809 ]
Forecast: [164.79687983 165.33416073 165.87144163]


In [10]:
model_optimal = ADAM(model='AAN', lags=[12], initial='complete')
model_optimal.fit(ts_df)
print('ETS(A,A,N) with complete initial:')
print('Parameters:', model_optimal.adam_estimated['B'])
print('Forecast:', model_optimal.predict(h=12)['mean'].values[:3])

ETS(A,A,N) with complete initial:
Parameters: [0.19818009 0.        ]
Forecast: [165.24829022 165.87452829 166.50076635]


In [11]:
model_optimal = ADAM(model='AAN', lags=[12], initial='complete')
model_optimal.fit(ts_df)
print('ETS(A,A,N) with complete initial:')
print('Parameters:', model_optimal.adam_estimated['B'])
print('Forecast:', model_optimal.predict(h=12)['mean'].values[:3])

ETS(A,A,N) with complete initial:
Parameters: [0.19818009 0.        ]
Forecast: [165.24829022 165.87452829 166.50076635]


In [12]:
import os
os.environ['DEBUG_TWOSTAGE'] = '1'

In [15]:
import os
os.environ['DEBUG_TWOSTAGE'] = '1'

model = ADAM(model='AAN', lags=[12], initial='two-stage', n_iterations=2)
model.fit(ts_df)

print('Python two-stage B:', model.adam_estimated['B'])
print('Python two-stage forecast:', model.predict(h=3)[:3])

DEBUG Two-Stage S1: Running Stage 1 with initial='complete', return_matrices=True
DEBUG Two-Stage S1: self.lags_dict = {'lags': [1, 12], 'lags_model': [1, 12], 'lags_model_seasonal': [12], 'lags_model_arima': [], 'lags_length': 2, 'lags_model_max': 12, 'lags_model_all': [1, 12]}
DEBUG Two-Stage S1: self.lags = [12]
DEBUG Two-Stage S1: stage1_initials = {'initial': None, 'initial_type': 'complete', 'initial_estimate': True, 'initial_level': None, 'initial_level_estimate': True, 'initial_trend': None, 'initial_trend_estimate': True, 'initial_seasonal': None, 'initial_seasonal_estimate': True, 'initial_arima': None, 'initial_arima_estimate': True, 'initial_arima_number': 0, 'initial_xreg_estimate': True, 'initial_xreg_provided': False, 'n_iterations': 2}
DEBUG Two-Stage S1: self.initials_results (original) = {'initial': None, 'initial_type': 'two-stage', 'initial_estimate': True, 'initial_level': None, 'initial_level_estimate': True, 'initial_trend': None, 'initial_trend_estimate': True, 

In [18]:
import os
os.environ['DEBUG_TWOSTAGE'] = '1'

model = ADAM(model='AAN', lags=[12], initial='two-stage', n_iterations=2)
model.fit(ts_df)
print('Python two-stage B:', model.adam_estimated['B'])
print('Python two-stage CF:', model.adam_estimated['CF_value'])

DEBUG Two-Stage S1: Running Stage 1 with initial='complete', return_matrices=True
DEBUG Two-Stage S1: self.lags_dict = {'lags': [1, 12], 'lags_model': [1, 12], 'lags_model_seasonal': [12], 'lags_model_arima': [], 'lags_length': 2, 'lags_model_max': 12, 'lags_model_all': [1, 12]}
DEBUG Two-Stage S1: self.lags = [12]
DEBUG Two-Stage S1: stage1_initials = {'initial': None, 'initial_type': 'complete', 'initial_estimate': True, 'initial_level': None, 'initial_level_estimate': True, 'initial_trend': None, 'initial_trend_estimate': True, 'initial_seasonal': None, 'initial_seasonal_estimate': True, 'initial_arima': None, 'initial_arima_estimate': True, 'initial_arima_number': 0, 'initial_xreg_estimate': True, 'initial_xreg_provided': False, 'n_iterations': 2}
DEBUG Two-Stage S1: self.initials_results (original) = {'initial': None, 'initial_type': 'two-stage', 'initial_estimate': True, 'initial_level': None, 'initial_level_estimate': True, 'initial_trend': None, 'initial_trend_estimate': True, 

### Test 3: Seasonal data ETS(A,A,A)

In [13]:
np.random.seed(42)
n_points = 120
errors = (1+np.random.normal(0, 0.1, n_points))
trend = np.random.normal(0.5, 2, n_points)
seasonal_sd = 0.2
seasonal_pattern = np.exp(np.random.normal(0, seasonal_sd, 12))
seasonal_pattern = seasonal_pattern / np.mean(seasonal_pattern)
time_series = np.zeros(n_points)
time_series[0] = 200 * seasonal_pattern[0] * errors[0]
for i in range(n_points-1):
    time_series[i+1] = ((time_series[i] / seasonal_pattern[(i) % 12]-trend[i]) * errors[i] ** (0.1-1) + trend[i+1]) * seasonal_pattern[(i+1) % 12] * errors[i+1]
ts_df = pd.DataFrame({'value': time_series}, index=pd.date_range(start='2023-01-01', periods=n_points, freq='ME'))

In [14]:
model_optimal = ADAM(model='AAA', lags=[12], distribution='dnorm', initial='optimal')
model_optimal.fit(ts_df)
print('ETS(A,A,A) with optimal initial:')
print('Alpha, Beta, Gamma:', model_optimal.adam_estimated['B'][:3])
print('Forecast:', model_optimal.   predict(h=12)['mean'].values[:3])

ETS(A,A,A) with optimal initial:
Alpha, Beta, Gamma: [0.12624267 0.00535913 0.00389523]
Forecast: [146.11185118 158.71964843 172.56916841]


In [15]:
model_backcasting = ADAM(model='AAA', lags=[12], distribution='dnorm', initial='backcasting')
model_backcasting.fit(ts_df)
print('ETS(A,A,A) with backcasting initial:')
print('Alpha, Beta, Gamma:', model_backcasting.adam_estimated['B'][:3])
print('Forecast:', model_backcasting.   predict(h=12)['mean'].values[:3])

ETS(A,A,A) with backcasting initial:
Alpha, Beta, Gamma: [0.0136677  0.0136335  0.04507698]
Forecast: [138.43378929 152.15703105 172.58882034]


In [16]:
model_complete = ADAM(model='AAA', lags=[12], distribution='dnorm', initial='complete')
model_complete.fit(ts_df)
print('ETS(A,A,A) with complete initial:')
print('Alpha, Beta, Gamma:', model_complete.adam_estimated['B'][:3])
print('Forecast:', model_complete.   predict(h=12)['mean'].values[:3])

ETS(A,A,A) with complete initial:
Alpha, Beta, Gamma: [0.0136677  0.0136335  0.04507698]
Forecast: [138.43378929 152.15703105 172.58882034]


In [17]:
model_two_stage = ADAM(model='AAA', lags=[12], distribution='dnorm', initial='two-stage', n_iterations=2)
model_two_stage.fit(ts_df)
print('ETS(A,A,A) with two-stage initialization:')
print('Alpha, Beta, Gamma:', model_two_stage.adam_estimated['B'][:3])
print('Forecast:', model_two_stage.predict(h=12)['mean'].values[:3])

ETS(A,A,A) with two-stage initialization:
Alpha, Beta, Gamma: [0.01732819 0.01730004 0.05359664]
Forecast: [145.4176931  159.24598305 175.28802547]


### Test 4: Damped trend ETS(A,Ad,N)

In [18]:
model_optimal = ADAM(model='AAdN', lags=[12], initial='optimal')
model_optimal.fit(ts_df)
print('ETS(A,Ad,N) with optimal initial:')
print('Parameters:', model_optimal.adam_estimated['B'])
print('Forecast:', model_optimal.predict(h=12)['mean'].values[:3])

ETS(A,Ad,N) with optimal initial:
Parameters: [  0.           0.           0.94539063 214.799116    -2.42553216]
Forecast: [172.8556045  172.85303774 172.85061115]


In [19]:
model_backcasting = ADAM(model='AAdN', lags=[12], initial='backcasting')
model_backcasting.fit(ts_df)
print('ETS(A,Ad,N) with backcasting initial:')
print('Parameters:', model_backcasting.adam_estimated['B'])
print('Forecast:', model_backcasting.predict(h=12)['mean'].values[:3])

ETS(A,Ad,N) with backcasting initial:
Parameters: [0.01211439 0.0025459  0.98959392]
Forecast: [172.31092103 172.36896606 172.42640706]


In [20]:
model_two_stage = ADAM(model='AAdN', lags=[12], initial='two-stage', n_iterations=2)
model_two_stage.fit(ts_df)
print('ETS(A,Ad,N) with two-stage initialization:')
print('Parameters:', model_two_stage.adam_estimated['B'])
print('Forecast:', model_two_stage.predict(h=12)['mean'].values[:3])

ETS(A,Ad,N) with two-stage initialization:
Parameters: [ 3.69852752e-03  3.69624570e-03  9.66054335e-01  5.82260465e+02
 -1.78867692e+00]
Forecast: [172.35489018 172.25697933 172.16239214]


### Test 5: Multiplicative error ETS(M,N,N)

In [21]:
model_optimal = ADAM(model='MNN', lags=[12], distribution='dnorm', initial='optimal')
model_optimal.fit(ts_df)
print('ETS(M,N,N) with optimal initial:')
print('Parameters:', model_optimal.adam_estimated['B'])
print('Forecast:', model_optimal.predict(h=12)['mean'].values[:3])

ETS(M,N,N) with optimal initial:
Parameters: [3.29329547e-02 1.89756969e+02]
Forecast: [174.24319564 174.24319564 174.24319564]


In [22]:
model_backcasting = ADAM(model='MNN', lags=[12], distribution='dnorm', initial='backcasting')
model_backcasting.fit(ts_df)
print('ETS(M,N,N) with backcasting initial:')
print('Parameters:', model_backcasting.adam_estimated['B'])
print('Forecast:', model_backcasting.predict(h=12)['mean'].values[:3])

ETS(M,N,N) with backcasting initial:
Parameters: [0.03312988]
Forecast: [174.19715699 174.19715699 174.19715699]


In [23]:
model_two_stage = ADAM(model='MNN', lags=[12], distribution='dnorm', initial='two-stage', n_iterations=2)
model_two_stage.fit(ts_df)
print('ETS(M,N,N) with two-stage initialization:')
print('Parameters:', model_two_stage.adam_estimated['B'])
print('Forecast:', model_two_stage.predict(h=12)['mean'].values[:3])

ETS(M,N,N) with two-stage initialization:
Parameters: [3.29888732e-02 1.05878873e+03]
Forecast: [174.23830382 174.23830382 174.23830382]


### Test 6: Multiplicative seasonal ETS(M,A,M)

In [24]:
model_optimal = ADAM(model='MAM', lags=[12], distribution='dnorm', initial='optimal')
model_optimal.fit(ts_df)
print('ETS(M,A,M) with optimal initial:')
print('Alpha, Beta, Gamma:', model_optimal.adam_estimated['B'][:3])
print('Forecast:', model_optimal.predict(h=12)['mean'].values[:3])

ETS(M,A,M) with optimal initial:
Alpha, Beta, Gamma: [0.11440164 0.0028743  0.0009684 ]
Forecast: [141.91794616 159.25570587 176.02186568]


In [25]:
model_backcasting = ADAM(model='MAM', lags=[12], distribution='dnorm', initial='backcasting')
model_backcasting.fit(ts_df)
print('ETS(M,A,M) with backcasting initial:')
print('Alpha, Beta, Gamma:', model_backcasting.adam_estimated['B'][:3])
print('Forecast:', model_backcasting.predict(h=12)['mean'].values[:3])

ETS(M,A,M) with backcasting initial:
Alpha, Beta, Gamma: [1.23119102e-02 1.20863591e-02 6.26251477e-06]
Forecast: [141.32419838 148.93068856 177.07918742]


In [26]:
model_complete = ADAM(model='MAM', lags=[12], distribution='dnorm', initial='complete')
model_complete.fit(ts_df)
print('ETS(M,A,M) with complete initial:')
print('Alpha, Beta, Gamma:', model_complete.adam_estimated['B'][:3])
print('Forecast:', model_complete.predict(h=12)['mean'].values[:3])

ETS(M,A,M) with complete initial:
Alpha, Beta, Gamma: [1.23119102e-02 1.20863591e-02 6.26251477e-06]
Forecast: [141.32419838 148.93068856 177.07918742]


In [27]:
model_two_stage = ADAM(model='MAM', lags=[12], distribution='dnorm', initial='two-stage', n_iterations=2)
model_two_stage.fit(ts_df)
print('ETS(M,A,M) with two-stage initialization:')
print('Alpha, Beta, Gamma:', model_two_stage.adam_estimated['B'][:3])
print('Forecast:', model_two_stage.predict(h=12)['mean'].values[:3])

ETS(M,A,M) with two-stage initialization:
Alpha, Beta, Gamma: [1.92406864e-02 1.81405640e-02 8.60036816e-06]
Forecast: [143.36193817 158.93173416 174.55387681]


## Summary

Two-stage initialization successfully:
- Runs a complete backcasting model first
- Extracts parameters (persistence, damping, ARMA)
- Extracts and normalizes initial states
- Uses these as starting values for optimization
- Produces results that combine benefits of both approaches