# 0. Analize simple time series models

> What are simple baselines you could compare your model to? Can any univariate / vector AR models be used to achieve comparable MAE? Are there simple time series methods that you could use to quantify whether infection counts are dependent on the lagged covariates?

In [None]:
import sys
sys.path.append('../')
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import statsmodels.api as sm
from src.d01_data.dengue_data_api import DengueDataApi

In [None]:
from scipy import signal
dda = DengueDataApi()
x1, x2, y1, y2 = dda.split_data(random=False)
z1, z2, pct_var = dda.get_svd(x1, x2, num_components=5)

## ARX

For this first model we are going to use **linear interpolation** to fill in missing values. We also normalize the covariates to zero mean and unit variance.

In [None]:
plot_data = y1.to_frame()
plot_data['log(y+1)'] = plot_data['total_cases'].apply(lambda x: np.log(x+1))
fig, ax = plt.subplots(ncols=2, nrows=2, figsize=(16, 12))
c = 0
for city in city_arr:
    t = plot_data.loc[city].index.get_level_values(WEEK_START_DATE_COL)
    ax[0][c].plot(t, plot_data.loc[city]['total_cases'])
    if c == 0:
        ax[0][c].set_ylabel('$y_t$')
    ax[1][c].plot(t, plot_data.loc[city]['log(y+1)'])
    if c == 0:
        ax[1][c].set_ylabel('$\\log(y_t+1)$')
    ax[0][c].set_title(city)
    c += 1
plt.show()


In [None]:
from src.d04_modeling.arx import ARX

In [None]:
ols_model = ARX(x_train=z1, y_train=y1, p=None)
ols_model.fit()
ols_model.plot_prediction(z1, y1)
ols_model.analyze_residuals(z1, y1)
print("MAE OLS: %.4f" % ols_model.get_mae(z2, y2))

In [None]:
res_ols = ols_model.get_model_results(city)
res_ols.summary()

### ARX(2)

In [None]:
p = 2
arx_model = ARX(x_train=z1, y_train=y1, p=p)
arx_model.fit()
arx_model.plot_prediction(z1, y1)
arx_model.analyze_residuals(z1, y1)
print("MAE ARX: %.4f" % arx_model.get_mae(z2, y2))
arx_model.insample_model_evaluation()

### ARX(3)

In [None]:
p = 3
arx_model = ARX(x_train=z1, y_train=y1, p=p)
arx_model.fit()
arx_model.plot_prediction(z1, y1)
arx_model.analyze_residuals(z1, y1)
arx_model.insample_model_evaluation()

### Prediction Error

In [None]:
arx_model.plot_prediction(z2, y2)
print("MAE ARX: %.4f" % arx_model.get_mae(z2, y2))

In [None]:
city='sj'

In [None]:
res_arx = arx_model.get_model_results(city)
endog, exog = arx_model.format_data_arimax(z2.loc[city], y2.loc[city])
endog_nan = pd.Series(np.nan, index=endog.index, name=endog.name)
predictions = endog_nan.copy()

In [None]:
res_arx.summary()

In [None]:
model = sm.tsa.statespace.SARIMAX(endog=endog, exog=exog, order=(p, 0, 0))
filtered_results = model.filter(res_arx.params)
plt.plot(endog, label='obs')
plt.plot(filtered_results.predict(), label='prediction')
plt.legend()
plt.show()