In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import statsmodels.api as sm
from statsmodels.tsa.arima_process import ArmaProcess


In [2]:
# Initialize a DataFrame to store MAPE values
mape_df = pd.DataFrame(columns=['MAPE_BTF', 'MAPE_AR1', 'MAPE_AR2'])
mse_df = pd.DataFrame(columns=['MSE_BTF', 'MSE_AR1', 'MSE_AR2'])

# Loop through time series
for seed in range(0, 1000):

    # Generate synthetic AR(p) series
    np.random.seed(seed)
    ar_params = np.array([1, np.random.uniform(-0.5, 0.5)])
    ma_params = np.array([1])
    ar_process = ArmaProcess(ar_params, ma_params)
    synthetic_ar_data = ar_process.generate_sample(nsample=100)
    synthetic_ar_series = pd.Series(synthetic_ar_data)
    synthetic_ar_series = synthetic_ar_series + 10

    # Create shifts
    y = synthetic_ar_series
    y_plus_1 = y.shift(-1)
    y_minus_1 = y.shift(1)
    y_minus_2 = y.shift(2)

    # Prepare data for linear regression (drop NaN values due to shifting)
    df = pd.DataFrame({
        'y_plus_1': y_plus_1,
        'y': y,
        'y_minus_1': y_minus_1,
        'y_minus_2': y_minus_2
    }).dropna()

    # Estimation: BTF
    btf = sm.add_constant(df[['y_minus_1', 'y_plus_1']])
    model_btf = sm.OLS(df['y'], btf)
    results_btf = model_btf.fit()
    y_btf = results_btf.predict(btf)
    mape_btf = np.mean(np.abs((df['y'] - y_btf) / df['y'])) * 100

    # Estimation: AR(1)
    ar_1 = sm.add_constant(df[['y_minus_1']])
    model_ar_1 = sm.OLS(df['y'], ar_1)
    results_ar_1 = model_ar_1.fit()
    y_ar_1 = results_ar_1.predict(ar_1)
    mape_ar_1 = np.mean(np.abs((df['y'] - y_ar_1) / df['y'])) * 100

    # Estimation: AR(2)
    ar_2 = sm.add_constant(df[['y_minus_1', 'y_minus_2']])
    model_ar_2 = sm.OLS(df['y'], ar_2)
    results_ar_2 = model_ar_2.fit()
    y_ar_2 = results_ar_2.predict(ar_2)
    mape_ar_2 = np.mean(np.abs((df['y'] - y_ar_2) / df['y'])) * 100

    # Store MAPE
    mape_df.loc[seed] = [mape_btf, mape_ar_1, mape_ar_2]

    # Store MSE
    mse_df.loc[seed] = [results_btf.mse_model, results_ar_1.mse_model, results_ar_2.mse_model]
    
# # Display the MAPE DataFrame
# display(mape_df)
# display(mse_df)


In [3]:
# Find the column with the minimum MAPE value for each row
min_mape_columns = mape_df.idxmin(axis=1)

# Count the occurrences of each column being the minimum
min_mape_counts = min_mape_columns.value_counts()

# Display the result
print(min_mape_counts)


MAPE_BTF    808
MAPE_AR2    168
MAPE_AR1     24
Name: count, dtype: int64


In [4]:
# Find the column with the minimum MASE value for each row
min_mse_columns = mse_df.idxmin(axis=1)

# Count the occurrences of each column being the minimum
min_mse_counts = min_mse_columns.value_counts()

# Display the result
print(min_mse_counts)

MSE_AR2    828
MSE_AR1    104
MSE_BTF     68
Name: count, dtype: int64
