# M-Competition Evaluation

This notebook evaluates ADAM and ES models on M1 and M3 competition datasets.

In [1]:
import numpy as np
import pandas as pd
import time
from concurrent.futures import ProcessPoolExecutor, as_completed
import multiprocessing
import warnings
warnings.filterwarnings('ignore')

from mcomp import M1, M3, load_m1, load_m3
from smooth import ADAM, ES

## Error Metrics

In [2]:
def RMSSE(holdout, forecast, actuals):
    """
    Root Mean Squared Scaled Error.
    
    Parameters
    ----------
    holdout : array-like
        Actual holdout values
    forecast : array-like
        Forecasted values
    actuals : array-like
        In-sample actual values (for scaling)
    
    Returns
    -------
    float
        RMSSE value
    """
    holdout = np.asarray(holdout)
    forecast = np.asarray(forecast)
    actuals = np.asarray(actuals)
    
    mse = np.mean((holdout - forecast) ** 2)
    scale = np.mean(np.diff(actuals) ** 2)
    
    if scale == 0:
        return np.nan
    
    return np.sqrt(mse / scale)

def SAME(holdout, forecast, actuals):
    """
    Scaled Absolute Mean Error.
    
    Parameters
    ----------
    holdout : array-like
        Actual holdout values
    forecast : array-like
        Forecasted values
    actuals : array-like
        In-sample actual values (for scaling)
    
    Returns
    -------
    float
        RMSSE value
    """
    holdout = np.asarray(holdout)
    forecast = np.asarray(forecast)
    actuals = np.asarray(actuals)
    
    ame = np.abs(np.mean(holdout - forecast))
    scale = np.mean(np.abs(np.diff(actuals)))
    
    if scale == 0:
        return np.nan
    
    return ame / scale

## Load Datasets

In [3]:
# Load M1 and M3 datasets
m1 = load_m1()
m3 = load_m3()

# Combine datasets into a list
datasets = []
for idx in m1.keys():
    datasets.append(m1[idx])
for idx in m3.keys():
    datasets.append(m3[idx])

print(f"Total series: {len(datasets)}")
print(f"M1: {len(m1)} series")
print(f"M3: {len(m3)} series")

Loaded M1 dataset: 1001 series
Loaded M3 dataset: 3003 series
Total series: 4004
M1: 1001 series
M3: 3003 series


## Define Methods

In [4]:
# Method names
methods_names = [
    "ADAM ETS Back",
    "ADAM ETS Opt", 
    "ADAM ETS Two",
    "ES Back",
    "ES Opt",
    "ES Two"
]

methods_number = len(methods_names)
dataset_length = len(datasets)

print(f"Methods: {methods_number}")
print(f"Datasets: {dataset_length}")

Methods: 6
Datasets: 4004


In [None]:
from smooth.adam_general.core.utils.utils import msdecompose

series = datasets[349]

print(series.x)

test = msdecompose(series.x, lags=[1])

# Fit model
model = ES(model="AAdN", lags=[1, series.period], initial="optimal")
model.fit(series.x)

print(model)

{'y': array([5.13, 5.16, 5.17, 5.2 , 5.22, 5.2 , 5.24, 5.26, 5.27, 5.3 , 5.32,
       5.34, 5.39, 5.43, 5.45, 5.49, 5.54, 5.57, 5.59, 5.59, 5.58, 5.54,
       5.53, 5.54, 5.54, 5.56, 5.6 , 5.6 , 5.6 , 5.59, 5.57, 5.53, 5.5 ,
       5.48, 5.44, 5.45, 5.49, 5.54, 5.58, 5.63, 5.68, 5.69, 5.71, 5.72,
       5.73, 5.74, 5.77, 5.79, 5.78, 5.78, 5.81, 5.83, 5.86, 5.9 , 5.91,
       5.94]), 'states': array([[5.14237701,        nan],
       [5.16041345, 0.01803643],
       [5.17838693, 0.01797348],
       [5.19629263, 0.0179057 ],
       [5.21412598, 0.01783334],
       [5.23188201, 0.01775603],
       [5.24955565, 0.01767364],
       [5.26714384, 0.01758819],
       [5.28464532, 0.01750148],
       [5.30206087, 0.01741555],
       [5.31939437, 0.0173335 ],
       [5.3366533 , 0.01725893],
       [5.3538501 , 0.0171968 ],
       [5.37100413, 0.01715404],
       [5.38814298, 0.01713885],
       [5.40530655, 0.01716356],
       [5.42255315, 0.0172466 ],
       [5.43995309, 0.01739994],
       [5.

## Evaluation Functions

In [5]:
def evaluate_single_series(series, method_name):
    """
    Evaluate a single method on a single series.
    
    Parameters
    ----------
    series : MCompSeries
        Series to evaluate
    method_name : str
        Name of the method to use
    
    Returns
    -------
    tuple
        (RMSSE, SAME, time_elapsed)
    """
    try:
        start_time = time.time()
        
        # Determine lags and model based on period
        period = series.period
        if period > 1:
            lags = [1, period]
            model_str = "ZXZ"  # Auto-select including seasonality
        else:
            lags = [1]
            model_str = "ZXN"  # Auto-select without seasonality for non-seasonal data
        
        # Select model class based on method
        if "ADAM" in method_name:
            model_class = ADAM
        else:
            model_class = ES
        
        if "Back" in method_name:
            initial = "backcasting"
        elif "Opt" in method_name:
            initial = "optimal"
        elif "Two" in method_name:
            initial = "two-stage"
        else:
            initial = "backcasting"
        
        # Create and fit model
        model = model_class(model=model_str, lags=lags, initial=initial)
        model.fit(series.x)
        
        # Generate forecasts
        forecasts = model.predict(h=series.h)
        forecast_values = forecasts['mean'].values
        
        time_elapsed = time.time() - start_time
        
        # Calculate RMSSE
        rmsse = RMSSE(series.xx, forecast_values, series.x)
        same = SAME(series.xx, forecast_values, series.x)
        
        return (rmsse, same, time_elapsed)
    
    except Exception as e:
        return (np.nan, np.nan, np.nan)


def evaluate_method_sequential(datasets, method_name, verbose=True):
    """
    Evaluate a method on all datasets sequentially.
    
    Parameters
    ----------
    datasets : list
        List of MCompSeries
    method_name : str
        Name of the method
    verbose : bool
        Whether to print progress
    
    Returns
    -------
    tuple
        Arrays of (RMSSE values, time values)
    """
    n = len(datasets)
    rmsse_values = np.full(n, np.nan)
    same_values = np.full(n, np.nan)
    time_values = np.full(n, np.nan)
    
    for i, series in enumerate(datasets):
        if verbose and (i + 1) % 100 == 0:
            print(f"  {method_name}: {i + 1}/{n}")
        
        rmsse, same, elapsed = evaluate_single_series(series, method_name)
        rmsse_values[i] = rmsse
        same_values[i] = same
        time_values[i] = elapsed
    
    return rmsse_values, same_values, time_values

## Run Evaluation

This may take a while depending on the number of series.

In [10]:
# First, test on a small subset to make sure everything works
test_datasets = datasets[:10]

print("Testing on first 10 series...")
for method in methods_names[:2]:  # Test first 2 methods
    rmsse_vals, same_vals, time_vals = evaluate_method_sequential(test_datasets, method, verbose=False)
    print(f"{method}: Mean RMSSE = {np.nanmean(rmsse_vals):.4f}, SAME = {np.nanmean(same_vals):.4f}, Time = {np.nanmean(time_vals):.3f}s")

Testing on first 10 series...
ADAM ETS Back: Mean RMSSE = 5.9556, SAME = 6.4621, Time = 0.059s
ADAM ETS Opt: Mean RMSSE = 6.4098, SAME = 6.9332, Time = 0.126s


In [11]:
# Initialize results array
# Shape: (methods, datasets, metrics) where metrics = [RMSSE, SAME, Time]
test_results = np.full((methods_number, dataset_length, 3), np.nan)

print(f"Results array shape: {test_results.shape}")
print(f"Methods: {methods_names}")

Results array shape: (6, 4004, 3)
Methods: ['ADAM ETS Back', 'ADAM ETS Opt', 'ADAM ETS Two', 'ES Back', 'ES Opt', 'ES Two']


In [12]:
# Run full evaluation sequentially (alternative to parallel)
# Skip this cell if using parallel evaluation above

# for j, method_name in enumerate(methods_names):
#     print(f"\nEvaluating {method_name} ({j+1}/{methods_number})...")
#     start = time.time()
#     
#     rmsse_values, same_values, time_values = evaluate_method_sequential(datasets, method_name)
#     
#     test_results[j, :, 0] = rmsse_values
#     test_results[j, :, 1] = same_values
#     test_results[j, :, 2] = time_values
#     
#     total_time = time.time() - start
#     print(f"  Completed in {total_time:.1f}s")
#     print(f"  Mean RMSSE: {np.nanmean(rmsse_values):.4f}")
#     print(f"  Mean SAME: {np.nanmean(same_values):.4f}")
#     print(f"  Mean Time per series: {np.nanmean(time_values):.3f}s")

## Parallel Evaluation

Run evaluation using all CPU cores for faster processing.

In [6]:
def _evaluate_task(args):
    """
    Worker function for parallel evaluation.
    Must be defined at module level for pickling.
    
    Parameters
    ----------
    args : tuple
        (series_idx, series_data, method_name) where series_data is a dict
        containing the series attributes needed for evaluation
    
    Returns
    -------
    tuple
        (series_idx, method_name, rmsse, same, time_elapsed)
    """
    import numpy as np
    import time
    from smooth import ADAM, ES
    
    series_idx, series_data, method_name = args
    
    try:
        start_time = time.time()
        
        # Reconstruct series data
        x = series_data['x']
        xx = series_data['xx']
        h = series_data['h']
        period = series_data['period']
        
        # Determine lags and model based on period
        if period > 1:
            lags = [1, period]
            model_str = "ZXZ"
        else:
            lags = [1]
            model_str = "ZXN"
        
        # Select model class based on method
        if "ADAM" in method_name:
            model_class = ADAM
        else:
            model_class = ES
        
        if "Back" in method_name:
            initial = "backcasting"
        elif "Opt" in method_name:
            initial = "optimal"
        elif "Two" in method_name:
            initial = "two-stage"
        else:
            initial = "backcasting"
        
        # Create and fit model
        model = model_class(model=model_str, lags=lags, initial=initial)
        model.fit(x)
        
        # Generate forecasts
        forecasts = model.predict(h=h)
        forecast_values = forecasts['mean'].values
        
        time_elapsed = time.time() - start_time
        
        # Calculate metrics
        holdout = np.asarray(xx)
        actuals = np.asarray(x)
        
        # RMSSE
        mse = np.mean((holdout - forecast_values) ** 2)
        scale = np.mean(np.diff(actuals) ** 2)
        rmsse = np.sqrt(mse / scale) if scale != 0 else np.nan
        
        # SAME
        ame = np.abs(np.mean(holdout - forecast_values))
        scale_same = np.mean(np.abs(np.diff(actuals)))
        same = ame / scale_same if scale_same != 0 else np.nan
        
        return (series_idx, method_name, rmsse, same, time_elapsed)
    
    except Exception as e:
        return (series_idx, method_name, np.nan, np.nan, np.nan)


def evaluate_parallel(datasets, methods_names, n_workers=None):
    """
    Evaluate all methods on all datasets in parallel.
    
    Parameters
    ----------
    datasets : list
        List of MCompSeries objects
    methods_names : list
        List of method names to evaluate
    n_workers : int, optional
        Number of parallel workers. Defaults to all CPU cores.
    
    Returns
    -------
    np.ndarray
        Results array of shape (n_methods, n_datasets, 3) containing
        [RMSSE, SAME, time] for each method-dataset combination
    """
    if n_workers is None:
        n_workers = multiprocessing.cpu_count()
    
    n_methods = len(methods_names)
    n_datasets = len(datasets)
    
    # Initialize results array
    results = np.full((n_methods, n_datasets, 3), np.nan)
    
    # Prepare tasks: convert series to picklable dicts
    tasks = []
    for j, method_name in enumerate(methods_names):
        for i, series in enumerate(datasets):
            series_data = {
                'x': np.asarray(series.x),
                'xx': np.asarray(series.xx),
                'h': series.h,
                'period': series.period
            }
            tasks.append((i, series_data, method_name))
    
    print(f"Starting parallel evaluation with {n_workers} workers...")
    print(f"Total tasks: {len(tasks)} ({n_methods} methods × {n_datasets} series)")
    
    start_time = time.time()
    completed = 0
    
    with ProcessPoolExecutor(max_workers=n_workers) as executor:
        futures = {executor.submit(_evaluate_task, task): task for task in tasks}
        
        for future in as_completed(futures):
            result = future.result()
            series_idx, method_name, rmsse, same, elapsed = result
            
            # Find method index
            method_idx = methods_names.index(method_name)
            
            # Store results
            results[method_idx, series_idx, 0] = rmsse
            results[method_idx, series_idx, 1] = same
            results[method_idx, series_idx, 2] = elapsed
            
            completed += 1
            if completed % 1000 == 0:
                elapsed_total = time.time() - start_time
                rate = completed / elapsed_total
                remaining = (len(tasks) - completed) / rate
                print(f"  Progress: {completed}/{len(tasks)} ({100*completed/len(tasks):.1f}%) - "
                      f"ETA: {remaining/60:.1f} min")
    
    total_time = time.time() - start_time
    print(f"\nCompleted in {total_time/60:.1f} minutes ({total_time:.1f}s)")
    print(f"Average time per task: {total_time/len(tasks)*1000:.1f}ms")
    
    return results

In [14]:
# Run parallel evaluation using all CPU cores
# This is much faster than sequential evaluation

print(f"Available CPU cores: {multiprocessing.cpu_count()}")

# Run parallel evaluation
test_results = evaluate_parallel(datasets, methods_names)

# Print summary
print("\nPer-method summary:")
for j, method in enumerate(methods_names):
    rmsse_mean = np.nanmean(test_results[j, :, 0])
    same_mean = np.nanmean(test_results[j, :, 1])
    time_mean = np.nanmean(test_results[j, :, 2])
    failed = np.sum(np.isnan(test_results[j, :, 0]))
    print(f"  {method}: RMSSE={rmsse_mean:.4f}, SAME={same_mean:.4f}, "
          f"Time={time_mean:.3f}s, Failed={failed}")

np.save('2026-01-18-Mcomp-test.npy', test_results)

Available CPU cores: 32
Starting parallel evaluation with 32 workers...
Total tasks: 24024 (6 methods × 4004 series)
  Progress: 1000/24024 (4.2%) - ETA: 9.8 min
  Progress: 2000/24024 (8.3%) - ETA: 6.6 min
  Progress: 3000/24024 (12.5%) - ETA: 7.7 min
  Progress: 4000/24024 (16.7%) - ETA: 8.3 min
  Progress: 5000/24024 (20.8%) - ETA: 10.9 min
  Progress: 6000/24024 (25.0%) - ETA: 9.8 min
  Progress: 7000/24024 (29.1%) - ETA: 11.3 min
  Progress: 8000/24024 (33.3%) - ETA: 12.5 min
  Progress: 9000/24024 (37.5%) - ETA: 12.4 min
  Progress: 10000/24024 (41.6%) - ETA: 11.0 min
  Progress: 11000/24024 (45.8%) - ETA: 10.9 min
  Progress: 12000/24024 (50.0%) - ETA: 10.8 min
  Progress: 13000/24024 (54.1%) - ETA: 9.5 min
  Progress: 14000/24024 (58.3%) - ETA: 8.2 min
  Progress: 15000/24024 (62.4%) - ETA: 7.2 min
  Progress: 16000/24024 (66.6%) - ETA: 6.3 min
  Progress: 17000/24024 (70.8%) - ETA: 5.7 min
  Progress: 18000/24024 (74.9%) - ETA: 4.7 min
  Progress: 19000/24024 (79.1%) - ETA: 4.

## Results Summary

In [8]:
test_results = np.load('2026-01-18-Mcomp-test.npy')

# Create summary DataFrame
summary = pd.DataFrame({
    'Method': methods_names,
    'Min': [np.nanmin(test_results[j, :, 0]) for j in range(methods_number)],
    'Q1': [np.nanquantile(test_results[j, :, 0], 0.25) for j in range(methods_number)],
    'Mean': [np.nanmean(test_results[j, :, 0]) for j in range(methods_number)],
    'Med': [np.nanmedian(test_results[j, :, 0]) for j in range(methods_number)],
    'Q3': [np.nanquantile(test_results[j, :, 0], 0.75) for j in range(methods_number)],
    'Max': [np.nanmax(test_results[j, :, 0]) for j in range(methods_number)],
    'Mean SAME': [np.nanmean(test_results[j, :, 1]) for j in range(methods_number)],
    'Med SAME': [np.nanmedian(test_results[j, :, 1]) for j in range(methods_number)],
    'Mean Time (s)': [np.nanmean(test_results[j, :, 2]) for j in range(methods_number)],
    'Failed': [np.sum(np.isnan(test_results[j, :, 0])) for j in range(methods_number)]
})


print("\n" + "="*60)
print("EVALUATION RESULTS")
print("="*60)
print(summary.to_string(index=False))


EVALUATION RESULTS
       Method      Min       Q1     Mean      Med       Q3       Max  Mean SAME  Med SAME  Mean Time (s)  Failed
ADAM ETS Back 0.018252 0.707183 2.085643 1.241253 2.547780 50.258736   2.103068  1.084022       0.803426       0
 ADAM ETS Opt 0.024155 0.695568 2.078898 1.268761 2.558321 51.616184   2.092810  1.105280       2.241774       0
 ADAM ETS Two 0.024925 0.700068 2.093054 1.265177 2.532407 51.616184   2.110408  1.103959       2.409651       0
      ES Back 0.018252 0.705564 2.084956 1.244262 2.540733 50.258736   2.104058  1.079399       0.813481       0
       ES Opt 0.024155 0.706472 2.081472 1.266547 2.556744 51.616184   2.096443  1.101234       2.163924       0
       ES Two 0.024925 0.711859 2.103972 1.273581 2.576052 51.616184   2.126635  1.105244       2.315632       0


In [None]:
# Results by series type
series_types = [s.type for s in datasets]
unique_types = list(set(series_types))

print("\n" + "="*60)
print("RESULTS BY SERIES TYPE")
print("="*60)

for stype in unique_types:
    mask = np.array([s.type == stype for s in datasets])
    print(f"\n{stype.upper()} ({np.sum(mask)} series):")
    
    for j, method in enumerate(methods_names):
        rmsse_type = test_results[j, mask, 0]
        print(f"  {method}: Mean RMSSE = {np.nanmean(rmsse_type):.4f}")

In [None]:
# Save results
import datetime
import joblib

date_str = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")

# Save as numpy array
np.save(f'test_results_{date_str}.npy', test_results)

# Save summary as CSV
summary.to_csv(f'test_summary_{date_str}.csv', index=False)

# Save complete results with metadata using joblib
results_dict = {
    'test_results': test_results,
    'methods_names': methods_names,
    'dataset_info': [(s.sn, s.type, s.period, len(s.x), s.h) for s in datasets],
    'summary': summary
}
joblib.dump(results_dict, f'test_results_full_{date_str}.joblib')

print(f"Results saved:")
print(f"  - test_results_{date_str}.npy (raw array)")
print(f"  - test_summary_{date_str}.csv (summary table)")
print(f"  - test_results_full_{date_str}.joblib (complete with metadata)")

## Single Series Example

In [5]:
series = datasets[349]
print(series.x)

# Fit model
model = ES(model="AAdN", lags=[1, series.period], initial="backcasting")
model.fit(series.x)

print(model)

[5.13 5.16 5.17 5.2  5.22 5.2  5.24 5.26 5.27 5.3  5.32 5.34 5.39 5.43
 5.45 5.49 5.54 5.57 5.59 5.59 5.58 5.54 5.53 5.54 5.54 5.56 5.6  5.6
 5.6  5.59 5.57 5.53 5.5  5.48 5.44 5.45 5.49 5.54 5.58 5.63 5.68 5.69
 5.71 5.72 5.73 5.74 5.77 5.79 5.78 5.78 5.81 5.83 5.86 5.9  5.91 5.94]
Time elapsed: 0.02 seconds
Model estimated using ES() function: ETS(AAdN)
With backcasting initialisation
Distribution assumed in the model: Normal
Loss function type: likelihood; Loss function value: -138.2533
Persistence vector g:
 alpha   beta
1.0000 0.6923
Damping parameter: 1.0000
Sample size: 56
Number of estimated parameters: 4
Number of degrees of freedom: 52
Information criteria:
      AIC      AICc       BIC      BICc
-268.5066 -267.7222 -260.4052 -258.8266


In [None]:
# Test on a single series to see detailed output
series = M3[203]
print(f"Series: {series}")
print(f"Training length: {len(series.x)}")
print(f"Test length: {len(series.xx)}")
print(f"Period: {series.period}")

# Fit model
model = ES(model="ZXZ", lags=[1, series.period], initial="optimal")
model.fit(series.x)

print("\n" + str(model))

# Forecast
forecasts = model.predict(h=series.h)
print("\nForecasts vs Actuals:")
comparison = pd.DataFrame({
    'Forecast': forecasts['mean'].values,
    'Actual': series.xx,
    'Error': forecasts['mean'].values - series.xx
})
print(comparison)

# Calculate error metrics
rmsse = RMSSE(series.xx, forecasts['mean'].values, series.x)
print(f"\nRMSSE: {rmsse:.4f}")

same = SAME(series.xx, forecasts['mean'].values, series.x)
print(f"\nSAME: {same:.4f}")

## Parameter Distribution Analysis

Record model types, loss values, smoothing and dampening parameters for distribution analysis.

In [6]:
def _extract_params_task(args):
    """
    Worker function for parallel parameter extraction.
    
    Parameters
    ----------
    args : tuple
        (series_idx, series_data, method_name)
    
    Returns
    -------
    tuple
        (series_idx, method_name, params_dict)
    """
    import numpy as np
    import time
    from smooth import ADAM, ES
    
    series_idx, series_data, method_name = args
    
    # Initialize result dict with NaN/None defaults
    params = {
        'model_type': None,
        'loss_value': np.nan,
        'alpha': np.nan,
        'beta': np.nan,
        'gamma': np.nan,
        'phi': np.nan,
        'error': None
    }
    
    try:
        # Reconstruct series data
        x = series_data['x']
        period = series_data['period']
        
        # Determine lags and model based on period
        if period > 1:
            lags = [1, period]
            model_str = "ZXZ"
        else:
            lags = [1]
            model_str = "ZXN"
        
        # Select model class based on method
        if "ADAM" in method_name:
            model_class = ADAM
        else:
            model_class = ES
        
        if "Back" in method_name:
            initial = "backcasting"
        elif "Opt" in method_name:
            initial = "optimal"
        elif "Two" in method_name:
            initial = "two-stage"
        else:
            initial = "backcasting"
        
        # Create and fit model
        model = model_class(model=model_str, lags=lags, initial=initial)
        model.fit(x)
        
        # Extract the SELECTED model type (not the input model string)
        # Construct from error_type, trend_type, season_type, and damped flag
        if hasattr(model, 'model_type_dict') and model.model_type_dict:
            error_type = model.model_type_dict.get('error_type', '')
            trend_type = model.model_type_dict.get('trend_type', '')
            season_type = model.model_type_dict.get('season_type', '')
            damped = model.model_type_dict.get('damped', False)
            
            # Construct model string: e.g., "MAN", "AAdA", "ANN"
            if error_type and trend_type is not None and season_type is not None:
                selected_model = error_type + trend_type
                if damped and trend_type != 'N':
                    selected_model += 'd'
                selected_model += season_type
                params['model_type'] = selected_model
        
        # Extract loss value
        if hasattr(model, 'adam_estimated') and model.adam_estimated:
            params['loss_value'] = model.adam_estimated.get('CF_value', np.nan)
        
        # Extract smoothing parameters
        if hasattr(model, 'persistence_level_') and model.persistence_level_ is not None:
            params['alpha'] = float(model.persistence_level_)
        
        if hasattr(model, 'persistence_trend_') and model.persistence_trend_ is not None:
            params['beta'] = float(model.persistence_trend_)
        
        if hasattr(model, 'persistence_seasonal_') and model.persistence_seasonal_ is not None:
            gamma = model.persistence_seasonal_
            if isinstance(gamma, (list, np.ndarray)) and len(gamma) > 0:
                params['gamma'] = float(gamma[0])
            elif isinstance(gamma, (int, float)):
                params['gamma'] = float(gamma)
        
        # Extract dampening parameter
        if hasattr(model, 'phi_') and model.phi_ is not None:
            params['phi'] = float(model.phi_)
        
        return (series_idx, method_name, params)
    
    except Exception as e:
        params['error'] = str(e)
        return (series_idx, method_name, params)


def extract_params_parallel(datasets, methods_names, n_workers=None):
    """
    Extract model parameters for all methods on all datasets in parallel.
    
    Parameters
    ----------
    datasets : list
        List of MCompSeries objects
    methods_names : list
        List of method names to evaluate
    n_workers : int, optional
        Number of parallel workers. Defaults to all CPU cores.
    
    Returns
    -------
    dict
        Nested dictionary with structure:
        {
            method_name: {
                'model_types': [...],      # list of selected model types (e.g., 'AAN', 'MAdM')
                'loss_values': [...],      # list of loss values
                'alpha': [...],            # list of alpha (level smoothing) values
                'beta': [...],             # list of beta (trend smoothing) values
                'gamma': [...],            # list of gamma (seasonal smoothing) values
                'phi': [...],              # list of phi (dampening) values
                'errors': [...],           # list of error messages (None if success)
            }
        }
    """
    if n_workers is None:
        n_workers = multiprocessing.cpu_count()
    
    n_methods = len(methods_names)
    n_datasets = len(datasets)
    
    # Initialize results dictionary
    results = {
        method: {
            'model_types': [None] * n_datasets,
            'loss_values': [np.nan] * n_datasets,
            'alpha': [np.nan] * n_datasets,
            'beta': [np.nan] * n_datasets,
            'gamma': [np.nan] * n_datasets,
            'phi': [np.nan] * n_datasets,
            'errors': [None] * n_datasets,
        }
        for method in methods_names
    }
    
    # Prepare tasks
    tasks = []
    for method_name in methods_names:
        for i, series in enumerate(datasets):
            series_data = {
                'x': np.asarray(series.x),
                'period': series.period
            }
            tasks.append((i, series_data, method_name))
    
    print(f"Starting parallel parameter extraction with {n_workers} workers...")
    print(f"Total tasks: {len(tasks)} ({n_methods} methods × {n_datasets} series)")
    
    start_time = time.time()
    completed = 0
    
    with ProcessPoolExecutor(max_workers=n_workers) as executor:
        futures = {executor.submit(_extract_params_task, task): task for task in tasks}
        
        for future in as_completed(futures):
            series_idx, method_name, params = future.result()
            
            # Store results
            results[method_name]['model_types'][series_idx] = params['model_type']
            results[method_name]['loss_values'][series_idx] = params['loss_value']
            results[method_name]['alpha'][series_idx] = params['alpha']
            results[method_name]['beta'][series_idx] = params['beta']
            results[method_name]['gamma'][series_idx] = params['gamma']
            results[method_name]['phi'][series_idx] = params['phi']
            results[method_name]['errors'][series_idx] = params['error']
            
            completed += 1
            if completed % 1000 == 0:
                elapsed_total = time.time() - start_time
                rate = completed / elapsed_total
                remaining = (len(tasks) - completed) / rate
                print(f"  Progress: {completed}/{len(tasks)} ({100*completed/len(tasks):.1f}%) - "
                      f"ETA: {remaining/60:.1f} min")
    
    total_time = time.time() - start_time
    print(f"\nCompleted in {total_time/60:.1f} minutes ({total_time:.1f}s)")
    
    # Convert lists to numpy arrays for easier analysis
    for method in methods_names:
        results[method]['loss_values'] = np.array(results[method]['loss_values'])
        results[method]['alpha'] = np.array(results[method]['alpha'])
        results[method]['beta'] = np.array(results[method]['beta'])
        results[method]['gamma'] = np.array(results[method]['gamma'])
        results[method]['phi'] = np.array(results[method]['phi'])
    
    return results

In [7]:
# Run parallel parameter extraction
param_results = extract_params_parallel(datasets, methods_names)

np.save('2026-01-19-Mcomp-param_results.npy', param_results)

# Save results
joblib.dump(param_results, '2026-01-19-Mcomp-params.joblib')
print("Parameter results saved to 2026-01-19-Mcomp-params.joblib")

Starting parallel parameter extraction with 32 workers...
Total tasks: 24024 (6 methods × 4004 series)
  Progress: 1000/24024 (4.2%) - ETA: 9.8 min
  Progress: 2000/24024 (8.3%) - ETA: 6.7 min
  Progress: 3000/24024 (12.5%) - ETA: 7.8 min
  Progress: 4000/24024 (16.7%) - ETA: 8.4 min
  Progress: 5000/24024 (20.8%) - ETA: 11.1 min
  Progress: 6000/24024 (25.0%) - ETA: 9.9 min
  Progress: 7000/24024 (29.1%) - ETA: 11.4 min
  Progress: 8000/24024 (33.3%) - ETA: 12.6 min




  Progress: 9000/24024 (37.5%) - ETA: 12.7 min


  Progress: 10000/24024 (41.6%) - ETA: 11.2 min










  Progress: 11000/24024 (45.8%) - ETA: 11.2 min






  Progress: 12000/24024 (50.0%) - ETA: 11.2 min

  Progress: 13000/24024 (54.1%) - ETA: 9.8 min
  Progress: 14000/24024 (58.3%) - ETA: 8.4 min
  Progress: 15000/24024 (62.4%) - ETA: 7.4 min
  Progress: 16000/24024 (66.6%) - ETA: 6.4 min
  Progress: 17000/24024 (70.8%) - ETA: 5.8 min
  Progress: 18000/24024 (74.9%) - ETA: 4.8 min
  Progress: 19000/24024 (79.1%) 

NameError: name 'joblib' is not defined

In [8]:
# param_results = np.load('2026-01-19-Mcomp-param_results.npy')

# Analyze model type distributions
print("=" * 70)
print("MODEL TYPE DISTRIBUTION BY METHOD")
print("=" * 70)

for method in methods_names:
    model_types = param_results[method]['model_types']
    # Count occurrences of each model type
    type_counts = pd.Series(model_types).value_counts()
    print(f"\n{method}:")
    print(type_counts.head(10).to_string())

MODEL TYPE DISTRIBUTION BY METHOD

ADAM ETS Back:
MAN     754
MAA     434
AAN     421
MAM     375
MNM     370
MNN     243
ANN     226
ANA     219
AAA     188
MAdN    168

ADAM ETS Opt:
MAN     775
MNN     605
ANN     490
AAN     392
MAM     331
MAdN    237
MNA     226
AAdN    213
MNM     169
ANM     148

ADAM ETS Two:
MAN     775
MNN     605
ANN     490
AAN     392
MAM     331
MAdN    237
MNA     226
AAdN    213
MNM     169
ANM     148

ES Back:
MAN     754
MAA     435
AAN     417
MAM     388
MNM     375
ANN     239
MNN     229
ANA     225
AAA     190
MAdN    166

ES Opt:
MAN     771
MNN     621
ANN     475
AAN     380
MAM     312
MNA     232
MAdN    227
AAdN    225
MNM     209
ANA     126

ES Two:
MAN     771
MNN     621
ANN     475
AAN     380
MAM     312
MNA     232
MAdN    227
AAdN    225
MNM     209
ANA     126


In [9]:
# Parameter distribution summary
print("=" * 70)
print("PARAMETER DISTRIBUTION SUMMARY")
print("=" * 70)

param_names = ['alpha', 'beta', 'gamma', 'phi', 'loss_values']
param_labels = ['Alpha (level)', 'Beta (trend)', 'Gamma (seasonal)', 'Phi (damping)', 'Loss value']

for method in methods_names:
    print(f"\n{method}:")
    print("-" * 60)
    
    summary_data = []
    for param, label in zip(param_names, param_labels):
        values = param_results[method][param]
        valid = values[~np.isnan(values)]
        if len(valid) > 0:
            summary_data.append({
                'Parameter': label,
                'Count': len(valid),
                'Mean': np.mean(valid),
                'Std': np.std(valid),
                'Min': np.min(valid),
                'Q25': np.percentile(valid, 25),
                'Median': np.median(valid),
                'Q75': np.percentile(valid, 75),
                'Max': np.max(valid),
                'Where': np.argmax(values)
            })
    
    if summary_data:
        df = pd.DataFrame(summary_data)
        print(df.to_string(index=False, float_format=lambda x: f'{x:.4f}'))

PARAMETER DISTRIBUTION SUMMARY

ADAM ETS Back:
------------------------------------------------------------
    Parameter  Count     Mean      Std       Min      Q25   Median      Q75       Max  Where
Phi (damping)     69   0.8913   0.1611    0.2581   0.8554   0.9470   1.0000    1.0000      0
   Loss value   4004 399.4577 279.3122 -349.9066 169.2847 321.5593 600.6941 1401.8902    474

ADAM ETS Opt:
------------------------------------------------------------
    Parameter  Count     Mean      Std       Min      Q25   Median      Q75       Max  Where
Phi (damping)     70   0.8420   0.1383    0.4607   0.8180   0.8875   0.9447    0.9976      0
   Loss value   4004 399.7592 278.6689 -349.3583 169.2995 320.7640 601.3232 1398.6604    474

ADAM ETS Two:
------------------------------------------------------------
    Parameter  Count                                                                                                                                                                  

In [None]:
# Example: Easy access to individual values from the dictionary
print("=" * 70)
print("EXAMPLE: ACCESSING INDIVIDUAL VALUES")
print("=" * 70)

# Get all alpha values for ADAM ETS Back method
method = "ADAM ETS Back"
print(f"\n1. All alpha values for {method}:")
print(f"   param_results['{method}']['alpha'][:10] = {param_results[method]['alpha'][:10]}")

# Get model types for specific series indices
print(f"\n2. Model types for first 5 series ({method}):")
for i in range(5):
    print(f"   Series {i}: {param_results[method]['model_types'][i]}")

# Filter by model type
print(f"\n3. Alpha values for models with trend (containing 'A' or 'M' in position 2):")
model_types = param_results[method]['model_types']
alpha_vals = param_results[method]['alpha']
trend_mask = [m is not None and len(m) >= 2 and m[1] in ['A', 'M'] for m in model_types]
alpha_with_trend = alpha_vals[trend_mask]
print(f"   Count: {len(alpha_with_trend)}, Mean: {np.nanmean(alpha_with_trend):.4f}")

# Compare parameters across methods
print(f"\n4. Mean alpha across all methods:")
for method in methods_names:
    mean_alpha = np.nanmean(param_results[method]['alpha'])
    print(f"   {method}: {mean_alpha:.4f}")

In [None]:
# Dictionary structure reference
print("=" * 70)
print("PARAM_RESULTS DICTIONARY STRUCTURE")
print("=" * 70)
print("""
param_results = {
    'ADAM ETS Back': {
        'model_types': [...],      # list[str]: Model types (e.g., 'AAN', 'MAdM', 'ANN')
        'loss_values': np.array,   # float: Loss function values (CF_value)
        'alpha': np.array,         # float: Level smoothing parameter
        'beta': np.array,          # float: Trend smoothing parameter (NaN if no trend)
        'gamma': np.array,         # float: Seasonal smoothing parameter (NaN if no season)
        'phi': np.array,           # float: Dampening parameter (1.0 if not damped)
        'errors': [...],           # list[str|None]: Error messages (None if success)
    },
    'ADAM ETS Opt': { ... },
    'ADAM ETS Two': { ... },
    'ES Back': { ... },
    'ES Opt': { ... },
    'ES Two': { ... },
}

Each array has length = number of datasets (4004 for M1+M3).
Access patterns:
  - Single value:  param_results['ADAM ETS Back']['alpha'][0]
  - All values:    param_results['ADAM ETS Back']['alpha']
  - By condition:  param_results['ADAM ETS Back']['alpha'][mask]
""")

# Show actual structure
print("\nActual keys in param_results:")
print(f"  Methods: {list(param_results.keys())}")
print(f"  Per-method keys: {list(param_results[methods_names[0]].keys())}")
print(f"  Array lengths: {len(param_results[methods_names[0]]['alpha'])}")