## Sandbox for Pollen Forecasting

See [this article](https://towardsdatascience.com/an-end-to-end-project-on-time-series-analysis-and-forecasting-with-python-4835e6bf050b)

In [None]:
import pandas as pd
from matplotlib import pyplot as plt
import numpy as np
plt.style.use('fivethirtyeight')
import pandas as pd
import statsmodels.api as sm
import matplotlib
%matplotlib inline
import seaborn as sns
from datetime import datetime, timedelta


In [None]:
data = pd.read_csv('1996-2019.csv', index_col=['date'], parse_dates=['date'])
data.head()

In [None]:
data.info

In [None]:
data = data.loc[
    (data.index < datetime(2010, 1, 1)) &
    (data.index >= datetime(2007, 1, 1))
]
sns.lineplot(x=data.index, y='Gramineae', data=data)

In [None]:
daily = data.resample('D').mean().fillna(0.0)

In [None]:
gram = daily['Gramineae']

In [None]:
from pylab import rcParams
rcParams['figure.figsize'] = 18, 8
decomposition = sm.tsa.seasonal_decompose(daily['Gramineae'], model='additive')
decomposition.plot()

In [None]:
import itertools
p = d = q = [0, 1]
pdq = list(itertools.product(p, d, q))
seasonal_pdq = [(x[0], x[1], x[2], 12) for x in list(itertools.product(p, d, q))]

low_a = low_b = lowest = None
for param in pdq:
    for param_seasonal in seasonal_pdq:
        try:
            mod = sm.tsa.statespace.SARIMAX(
                daily['Gramineae'],
                order=param,
                seasonal_order=param_seasonal,
                enforce_stationarity=False,
                enforce_invertibility=False)
            results = mod.fit()
            if lowest is None or results.aic <= lowest:
                lowest = results.aic
                print("New lowpoint: ", lowest)
                print('ARIMA{}x{}12 - AIC:{}'.format(param, param_seasonal, results.aic))
                low_a, low_b = param, param_seasonal
        except Exception as exc:
            print(exc)
print('done', low_a, low_b)

In [None]:
mod = sm.tsa.statespace.SARIMAX(
    daily['Gramineae'],
    order=low_a,
    seasonal_order=low_b,
    enforce_stationarity=False,
    enforce_invertibility=False)
results = mod.fit()
print(results.summary().tables[1])

In [None]:
results.plot_diagnostics(figsize=(16, 8))
plt.show()

In [None]:
daily.info()

In [None]:
pred = results.get_prediction(start=pd.to_datetime('2009-01-01'), dynamic=False)
pred_ci = pred.conf_int()
ax = daily['Gramineae']['2008':].plot(label='observed')
pred.predicted_mean.plot(ax=ax, label='One-step ahead Forecast', alpha=.7, figsize=(14, 7))
ax.fill_between(pred_ci.index,
                pred_ci.iloc[:, 0],
                pred_ci.iloc[:, 1], color='k', alpha=.2)
ax.set_xlabel('Date')
ax.set_ylabel('Furniture Sales')
plt.legend()
plt.show()

In [None]:
daily['Gramineae']