In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pylab as plt
import scipy.stats as st

from statsmodels.tsa.stattools import kpss
from statsmodels.tsa.seasonal import seasonal_decompose

from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.tsa.arima_model import ARIMA

from statsmodels.stats.diagnostic import acorr_ljungbox

from statsmodels.tsa.holtwinters import ExponentialSmoothing

plt.rcParams['figure.figsize'] = (10, 8)

In [None]:
data = pd.read_csv('time.txt', parse_dates=[0], dayfirst=True)

In [None]:
data.head()

In [None]:
groups = data['Time'].groupby(data['Time']).count()

In [None]:
data = groups.cumsum()

In [None]:
_ = plt.plot(data.values)

STL:

In [None]:
stl_data = seasonal_decompose(data, period=7)
_= stl_data.plot()


# ARIMA

In [None]:
kpss(data, nlags='legacy')

In [None]:
new_data = {}
time_iter = pd.date_range('20190210','20190401',freq='D')
old_data = None
for i in time_iter:
    if i in data:
        new_data[i] = float(data[i])
        old_data = new_data[i] 
    else:
        new_data[i] = old_data

new_data = pd.DataFrame({'Time':new_data})


In [None]:
stl_data = seasonal_decompose(new_data['Time'], period=7)
stl_data.plot()

plt.show()

In [None]:
new_data_without_trend = new_data.copy()[3:-3]
new_data_without_trend['Time'] = (new_data['Time'] - stl_data.trend)[3:-3]
plt.plot(new_data_without_trend)

plt.show()

In [None]:
kpss(new_data['Time'], nlags='legacy')

In [None]:
kpss(new_data_without_trend['Time'], nlags='legacy')

In [None]:
plt.plot(new_data_without_trend.diff(7))

plt.show()

In [None]:
kpss(new_data_without_trend['Time'].diff(7)[7:], nlags='legacy')

In [None]:
arima = ARIMA(new_data_without_trend['Time'].values, order=(1,1,1)).fit()

plt.plot(new_data_without_trend['Time'].values, alpha=0.5, ls='--')
plt.plot(arima.predict(1, len(new_data_without_trend), dynamic=False))

plt.show()

In [None]:
_ =plt.plot(arima.resid)
plt.show()
_ = plt.hist(arima.resid)
plt.show()
st.shapiro(arima.resid)

In [None]:
arima = SARIMAX(new_data_without_trend['Time'].values, order=(1,0,1), seasonal_order=(0,2,1,7)).fit()

arima.summary()


In [None]:
plt.plot(new_data_without_trend['Time'].values, alpha=0.5, ls='--')
plt.plot(arima.predict(0, len(new_data['Time'].values)))

plt.show()

In [None]:
_ =plt.plot(arima.resid)
plt.show()
_ = plt.hist(arima.resid)
plt.show()
st.shapiro(arima.resid)

In [None]:
plt.scatter(range(30), acorr_ljungbox(arima.resid, lags=30)[1])

plt.show()

In [None]:
stl_data.trend

In [None]:
predictition = arima.get_prediction(0, len(new_data_without_trend)-1).conf_int()
intervals = stl_data.trend.values.flatten()[3:-3] + predictition[:, 0], stl_data.trend.values.flatten()[3:-3] + predictition[:, 1]

plt.fill_between(range(len(intervals[0])), intervals[0], intervals[1], alpha=0.2)
plt.ylim(0, 250)
plt.plot(new_data['Time'].values[3:-3], alpha=0.5, ls='--', c='r')
plt.plot(stl_data.trend.values.flatten()[3:-3]+arima.predict(0, len(new_data_without_trend)-1), c='g')

plt.show()

# ETS

In [None]:
ets = ExponentialSmoothing(new_data['Time'].values, trend='add', seasonal='add', seasonal_periods=7).fit()
plt.plot(new_data['Time'].values, alpha=0.5, ls='--', c='r')
predicted = ets.predict(0, 60)

plt.plot(predicted)

plt.show()