In [None]:
import warnings

import matplotlib.pyplot as plt
import pandas as pd
import plotly.express as px
from IPython.display import VimeoVideo
from pymongo import MongoClient
from sklearn.metrics import mean_absolute_error
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.ar_model import AutoReg

warnings.simplefilter(action="ignore", category=FutureWarning)

In [None]:
client = MongoClient(host='localhost',port=27017)
db = client['air-quality']
nairobi =db['nairobi']

In [None]:
y = wrangle(nairobi)
y.head()

In [None]:
# get correlation between timestamps
fig, ax = plt.subplots(figsize=(15, 6))
plot_acf(y,ax)
plt.xlabel("Lag [hours]")
plt.ylabel("Correlation Coefficient");

In [None]:
fig, ax = plt.subplots(figsize=(15, 6))
plot_pacf(y,ax)
plt.xlabel("Lag [hours]")
plt.ylabel("Correlation Coefficient");

In [None]:
cutoff_test = int(0.95*len(y))

y_train = y.iloc[:cutoff_test]
y_test = y.iloc[cutoff_test:]

In [None]:
y_train_mean = y_train.mean()
y_pred_baseline = [y_train_mean] * len(y_train)
mae_baseline = mean_absolute_error(y_train, y_pred_baseline)

print("Mean P2 Reading:", round(y_train_mean, 2))
print("Baseline MAE:", round(mae_baseline, 2))

In [None]:
model = AutoReg(y_train, lags=26).fit()

In [None]:
y_pred = model.predict().dropna()
training_mae = mean_absolute_error(y_train.iloc[26:],y_pred)
print("Training MAE:", training_mae)

In [None]:
y_train_resid = model.resid
y_train_resid.tail()

In [None]:
fig, ax = plt.subplots(figsize=(15, 6))
plt.plot(y_train_resid)

In [None]:
plt.hist(y_train_resid)
plt.xlabel('Residual val')
plt.ylabel('frequencey')

In [None]:
fig, ax = plt.subplots(figsize=(15, 6))
plot_acf(y_train_resid,ax)

In [None]:
y_pred_test = model.predict(y_test.index.min(),y_test.index.max())
test_mae = mean_absolute_error(y_test,y_pred_test)
print("Test MAE:", test_mae)

In [None]:
df_pred_test = pd.DataFrame(
    {"y_test": y_test, "y_pred": y_pred_test}, index=y_test.index
)
fig = px.line(df_pred_test, labels={"value": "P2"})
fig.show()

In [None]:
%%capture

y_pred_wfv = pd.Series()
history = y_train.copy()
for i in range(len(y_test)):
    model=AutoReg(history,lags=26).fit()
    next_pred=model.forecast()
    y_pred_wfv=y_pred_wfv.append(next_pred)
    history=history.append(y_test[next_pred.index])
print(model.params)

In [None]:
df_pred_test=pd.DataFrame(
{"y_test":y_test,"y_pred_wfv":y_pred_wfv}
)
fig = px.line(df_pred_test,labels={"value":"PM2.5"})
fig.show()