
# Time series seasonal - exercises

--- 

In [1]:
# this will filter out a lot of future warnings from statsmodels
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

In [2]:
import numpy as np
import pandas as pd
from statsmodels.tsa.stattools import acf, pacf
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.arima_model import ARMA
from statsmodels.tsa.arima_model import ARIMA
from statsmodels.tsa.seasonal import seasonal_decompose
import statsmodels.api as sm
import seaborn as sns
import datetime
from matplotlib import pyplot as plt
%matplotlib inline

sns.set(font_scale=1.5)
plt.style.use('fivethirtyeight')

%matplotlib inline
%config InlineBackend.figure_format = 'retina'

The following dataset contains seasonally non-adjusted quarterly data on income and consumption in the UK.

In [3]:
df = pd.read_csv('datasets/IncomeUK.csv')

In [4]:
df.head()

Unnamed: 0,income,consumption
0,9014,8016
1,9659,8798
2,9848,9184
3,10316,9755
4,10254,9113


### 0. The values represent quarterly data from 01/01/1971 to 30/06/1985.

Create the corresponding datetime index.

### 1. Plot the time series for consumption.

### 2. Which order of differencing would you recommend?

### 3. Look at the autocorrelations and partial autocorrelations of consumption and its differences. Describe.

### 4. Decompose into seasonal and non-seasonal parts.

### 5. Perform a Dickey-Fuller test for stationarity on consumption and its differences.

In [None]:
# define Dickey-Fuller test
from statsmodels.tsa.stattools import adfuller

def test_stationarity(timeseries,maxlag=None,regression='c',autolag='AIC'):
    '''Perform Dickey-Fuller test and print out results'''
    
    print('Results of Dickey-Fuller Test:')
    dftest = adfuller(timeseries,maxlag=maxlag,regression=regression,autolag=autolag)
    dfoutput = pd.Series(dftest[0:4], index=['Test Statistic','p-value','#Lags Used','Number of Observations Used'])
    for key,value in list(dftest[4].items()):
        dfoutput['Critical Value (%s)'%key] = value
    print(dfoutput) 

def rolling(timeseries,window=12,center=True,figsize=(12,8)):
    '''Plot original timeseries, rolling mean over given window size and rolling 
    mean plus/minus standard deviation'''
    
    rolmean = timeseries.rolling(window=window, center=center).mean()
    rolstd = timeseries.rolling(window=window, center=center).std()

    #Plot rolling statistics:
    fig = plt.figure(figsize=figsize)
    orig = plt.plot(timeseries, color='blue',label='Original')
    mean = plt.plot(rolmean, color='red', label='Rolling Mean')
    std_plus = plt.plot(rolmean+rolstd, color='red',ls='--', label = 'Rolling Mean +/- Rolling Std')
    std_minus = plt.plot(rolmean-rolstd, color='red',ls='--',label= '')
    plt.legend(loc='best')
    plt.title('Rolling Mean & Standard Deviation')
    plt.show()  

def autocorrelation_plot(timeseries,lags=None):
    '''Plot autocorrelations and partial autocorrelations'''
    fig, ax = plt.subplots(ncols=2,figsize=(12,4))
    plot_acf(timeseries, lags=lags, ax=ax[0])
    plot_pacf(timeseries, lags=lags, ax=ax[1], method='ywmle')
    plt.show()

def test_and_vis(timeseries):
    '''Perform Dickey-Fuller test, plot timeseries with rolling mean and autocorrelations'''
    test_stationarity(timeseries.dropna())
    rolling(timeseries)
    autocorrelation_plot(timeseries.dropna())

### 6. Do you get better results in the Dickey-Fuller test if you take into account seasonality effects?

### 7. Irrespective of your previous answers, fit a SARIMA model to consumption. Which parameters did you choose for the non-seasonal and seasonal components?

### 8. Describe your diagnostic plots of the residuals. Check for residual autocorrelations.

### 9. Obtain fitted values with one-step ahead forecasts, in-sample forecasts and out-of sample forecasts. Compare the forecasts of different models.