In [None]:
# Import required packages

In [None]:
%matplotlib inline
import matplotlib.pylab as plt

import pandas as pd
import numpy as np
import statsmodels as sm

In [None]:
# Import the dataset as DataFrame
singapore_df = pd.read_csv("singapore_lfpr.csv", header = 0,
                     names = ['index', 'LabourForcePR'],
                     index_col = 0)

In [None]:
singapore_df.head()

In [None]:
# Converting to a Series object
# Generating a proper date for the index
singapore_lfpr = pd.Series(singapore_df['LabourForcePR'].values,
                 index = pd.DatetimeIndex(data = 
                                          (tuple(pd.date_range('31/12/1980',
                                                                periods = 28,
                                                                freq = 'A-DEC'))),
                                          freq = 'A-DEC'))

In [None]:
singapore_lfpr

In [None]:
plt.figure(figsize=(12,8))
plt.plot(singapore_lfpr)

In [None]:
# Test for Stationarity
def stationarity_test(timeseries):
    """"Augmented Dickey-Fuller Test
    Test for Stationarity"""
    from statsmodels.tsa.stattools import adfuller
    print("Results of Dickey-Fuller Test:")
    df_test = adfuller(timeseries, autolag = "AIC")
    df_output = pd.Series(df_test[0:4],
                          index = ["Test Statistic", "p-value", "#Lags Used",
                                   "Number of Observations Used"])
    print(df_output)

In [None]:
stationarity_test(singapore_lfpr)

In [None]:
# Differencing
singapore_1diff = singapore_lfpr - singapore_lfpr.shift(1)

In [None]:
singapore_1diff = singapore_1diff.dropna(inplace = False)

In [None]:
plt.figure(figsize=(12,8))
plt.plot(singapore_1diff)

In [None]:
stationarity_test(singapore_1diff)

In [None]:
# ACF and PACF plots
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf

In [None]:
# Visual Test for Autocorrelation and Partial-Autocorrelation
%matplotlib inline
fig = plt.figure(figsize=(12,8))
ax1 = fig.add_subplot(211)
fig = plot_acf(singapore_1diff, lags=20, ax=ax1)
ax2 = fig.add_subplot(212)
fig = plot_pacf(singapore_1diff, lags=20, ax=ax2)

In [None]:
# ARIMA Model Setup
from statsmodels.tsa.arima_model import ARIMA

In [None]:
# The model order is 1,0,0, since we already differenced the dataset
# ARIMA(1,1,0)
model_singapore = ARIMA(singapore_1diff, order=(1, 0, 0))  
result_AR = model_singapore.fit()
plt.figure(figsize=(12,8))
plt.plot(singapore_1diff)
plt.plot(result_AR.fittedvalues, color='red')

In [None]:
result_AR.fittedvalues

In [None]:
# Model Diagnostics
result_AR.summary()

In [None]:
# ACF on Residuals of Our Model
%matplotlib inline
fig = plt.figure(figsize=(12,8))
ax1 = fig.add_subplot(211)
fig = plot_acf(result_AR.resid, lags=20, ax=ax1)

In [None]:
# Histogram of the Residuals
from scipy.stats import norm

plt.figure(figsize = (12, 8))
plt.hist(result_AR.resid, bins = 'auto', density = True, rwidth = 0.85,
         label = 'Residuals') #density TRUE - norm.dist bell curve
mu, std = norm.fit(result_AR.resid)
xmin, xmax = plt.xlim()
x = np.linspace(xmin, xmax, 100) #linspace returns evenly spaced numbers over a specified interval
p = norm.pdf(x, mu, std) #pdf = probability density function
plt.plot(x, p, 'm', linewidth = 2)
plt.grid(axis='y', alpha = 0.2)
plt.xlabel('Residuals')
plt.ylabel('Density')
plt.title('Residuals 1,1,0 vs Normal Distribution - Mean = '+str(round(mu,2))+', Std = '+str(round(std,2)))
plt.show()

In [None]:
# Compare further models like ARIMA(0,1,1) or ARIMA (2,1,0)
# Don't forget: The dataset is already differenced