In [1]:
import pandas as pd

In [2]:
df = pd.read_csv('../data/processed/federal_df.csv', parse_dates=['EventDate'], low_memory=False)

In [3]:
y = pd.Series(
    df.set_index("EventDate")
    .resample("MS")["Hospitalized"]
    .sum()
)

y.head()


EventDate
2015-01-01    673
2015-02-01    623
2015-03-01    664
2015-04-01    612
2015-05-01    611
Freq: MS, Name: Hospitalized, dtype: int64

In [4]:
y.index = y.index.to_period("M")

In [5]:
y.index.name = "Date"

In [6]:
y.index

PeriodIndex(['2015-01', '2015-02', '2015-03', '2015-04', '2015-05', '2015-06',
             '2015-07', '2015-08', '2015-09', '2015-10',
             ...
             '2024-05', '2024-06', '2024-07', '2024-08', '2024-09', '2024-10',
             '2024-11', '2024-12', '2025-01', '2025-02'],
            dtype='period[M]', name='Date', length=122)

In [19]:
from sktime.split import temporal_train_test_split

y_train, y_test = temporal_train_test_split(y, train_size=108, test_size=12)
print(y_train.shape, y_test.shape)

(108,) (12,)


In [20]:
y_test

Date
2024-01    611
2024-02    544
2024-03    548
2024-04    535
2024-05    624
2024-06    634
2024-07    669
2024-08    673
2024-09    613
2024-10    604
2024-11    534
2024-12    544
Freq: M, Name: Hospitalized, dtype: int64

In [9]:
import numpy as np

In [21]:
from sktime.forecasting.base import ForecastingHorizon

fh = ForecastingHorizon(np.arange(1, 13), is_relative=True)

In [11]:
fh

ForecastingHorizon([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], dtype='int64', is_relative=True)

In [12]:
from sktime.forecasting.theta import ThetaForecaster

forecaster = ThetaForecaster(sp=12)

forecaster.fit(y_train)

In [13]:
y_pred = forecaster.predict(fh)

In [14]:
print(y_test, y_pred)

Date
2024-03    548
2024-04    535
2024-05    624
2024-06    634
2024-07    669
2024-08    673
2024-09    613
2024-10    604
2024-11    534
2024-12    544
2025-01    654
2025-02    588
Freq: M, Name: Hospitalized, dtype: int64 2024-03    567.002250
2024-04    534.113214
2024-05    545.815332
2024-06    622.533356
2024-07    653.412778
2024-08    647.385405
2024-09    579.083835
2024-10    581.606789
2024-11    529.876677
2024-12    513.899357
2025-01    571.385262
2025-02    557.781149
Freq: M, Name: Hospitalized, dtype: float64


In [16]:
from sktime.performance_metrics.forecasting import (
    mean_absolute_error,
    mean_squared_error,
    mean_absolute_percentage_error,
)

# Make sure indices align (just in case)
#y_test_eval = y_test[y_pred.index]

mae = mean_absolute_error(y_test, y_pred)
rmse = mean_squared_error(y_test, y_pred, square_root=True)
mape = mean_absolute_percentage_error(y_test, y_pred)

print("MAE :", mae)
print("RMSE:", rmse)
print("MAPE:", mape)


MAE : 29.509091235660275
RMSE: 38.53402565453665
MAPE: 0.047853867551807365


In [15]:
print(y_test.index)
print(y_pred.index)


PeriodIndex(['2024-03', '2024-04', '2024-05', '2024-06', '2024-07', '2024-08',
             '2024-09', '2024-10', '2024-11', '2024-12', '2025-01', '2025-02'],
            dtype='period[M]', name='Date')
PeriodIndex(['2024-03', '2024-04', '2024-05', '2024-06', '2024-07', '2024-08',
             '2024-09', '2024-10', '2024-11', '2024-12', '2025-01', '2025-02'],
            dtype='period[M]')


In [26]:
from sktime.forecasting.exp_smoothing import ExponentialSmoothing

# y_train: last-12-months-left-out training set (with PeriodIndex)
# fh: ForecastingHorizon(range(1, 13), is_relative=True)

ets_forecaster = ExponentialSmoothing(
    trend="add",        # or "mul" or None
    seasonal="add",     # or "mul" or None
    sp=12              # monthly data with yearly seasonality,
)

ets_forecaster.fit(y_train)

y_pred_ets = ets_forecaster.predict(fh)


In [25]:
y_test

Date
2024-01    611
2024-02    544
2024-03    548
2024-04    535
2024-05    624
2024-06    634
2024-07    669
2024-08    673
2024-09    613
2024-10    604
2024-11    534
2024-12    544
Freq: M, Name: Hospitalized, dtype: int64

In [27]:
mae_ets = mean_absolute_error(y_test, y_pred_ets)
rmse_ets = mean_squared_error(y_test, y_pred_ets, square_root=True)
mape_ets = mean_absolute_percentage_error(y_test, y_pred_ets)

print("MAE :", mae_ets)
print("RMSE:", rmse_ets)


MAE : 22.92543654056216
RMSE: 30.50585677179016


In [51]:
from sklearn.linear_model import Ridge
from sktime.forecasting.compose import make_reduction

ridge_reg = Ridge(alpha=1.0)

ridge_forecaster = make_reduction(
    ridge_reg,
    strategy='recursive',
    window_length=12
)

In [52]:
ridge_fit = ridge_forecaster.fit(y_train,fh=fh)
y_pred_ridge = ridge_forecaster.predict(fh)



In [53]:
transformer = ridge_forecaster.get_fitted_params()

In [54]:
transformer

{'estimator': Ridge(),
 'transformers': None,
 'window_length': 12,
 'estimator__coef': array([ 0.24963479,  0.11208931, -0.02332862,  0.08980293, -0.15367937,
         0.07560553, -0.11315511,  0.2022133 , -0.03021723, -0.08776801,
         0.12004193,  0.49229714]),
 'estimator__intercept': np.float64(41.10677399764768),
 'estimator__n_features_in': 12,
 'estimator__n_iter': None,
 'estimator__solver': 'cholesky'}

In [55]:
mae_ridge = mean_absolute_error(y_test, y_pred_ridge)
rmse_ridge = mean_squared_error(y_test, y_pred_ridge, square_root=True)

print(mae_ridge)
print(rmse_ridge)

32.025068862889334
37.28211882890261
