In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.stattools import adfuller

In [None]:
usd_to_pound = pd.read_csv("BOE-XUDLERD.csv")
usd_to_pound.head()

In [None]:
# Step1 --- PLOT THE TIME SERIES...
usd_to_pound['Date'] = pd.to_datetime(usd_to_pound['Date'])
ts_data = usd_to_pound.set_index('Date')
ts_data.head()

In [None]:
plt.figure(figsize=(8,8))
plt.plot(ts_data)

In [None]:
plt.plot(usd_to_pound['Date'],usd_to_pound['Value'])

In [None]:
############################ MOVING AVERAGE TECHNIQUE

ts_data_weekly = ts_data.resample('W').mean()
plt.figure(figsize=(10,8))
plt.plot(ts_data_weekly)

In [None]:
#ts_data_weekly.head()

In [None]:
### To check for stationarity - WE WILL CHECK WHETHER TIME SERIES IS STATIONARY OR NOT
# Do a ADF TEST
adf_result = adfuller(ts_data_weekly['Value'])
p_value = adf_result[1]
print("adf_p_value " + str(p_value))

In [None]:
# because p-val > 0.05 We understand that this is a non-stationary time series

#adf_result  
# Test statistic ( Z/t - statistic, p-value,lags,no of. obs, )

In [None]:
# TO CONVERT IT INTO A STATIONARY TIME SERIES
# 1. take log
ts_data_weekly_log = np.log(ts_data_weekly)
# 2. take lag difference
plt.plot(ts_data_weekly_log)

In [None]:
# Take the difference
ts_data_weekly_log_diff_order1 = ts_data_weekly_log - ts_data_weekly_log.shift(periods=1)
plt.plot(ts_data_weekly_log_diff_order1)

In [None]:
ts_data_weekly_log_diff_order1.dropna(inplace=True)
ts_data_weekly_log_diff_order1.head()

In [None]:
adf_result = adfuller(ts_data_weekly_log_diff_order1['Value'])
p_value = adf_result[1]
print("adf_p_value " + str(p_value))

In [None]:
# p-value < 0.05 ---> stationary time series
# ORDER OF DIFFERENCING REQUIRED TO CONVERT IT INTO STATIONARY =1

In [None]:
#### ACF and PACF 
from statsmodels.graphics.tsaplots import plot_acf,plot_pacf
from statsmodels.tsa.stattools import acf,pacf

In [None]:
acf_output =acf(ts_data_weekly_log_diff_order1,nlags=10)
pacf_output = pacf(ts_data_weekly_log_diff_order1,nlags = 10)

In [None]:
acf_output

In [None]:
pacf_output

In [None]:
ul = 7.96 / np.sqrt(len(ts_data_weekly_log_diff_order1))
ul

In [None]:
ll = -7.96 / np.sqrt(len(ts_data_weekly_log_diff_order1))
ll

In [None]:
# Any value of ACF or PACF that lies between 0.16 and -0.16 is not significant


In [None]:
plt.figure(figsize=(10,8))
plot_acf(ts_data_weekly_log_diff_order1,lags = 10)

In [None]:
plt.figure(figsize=(10,8))
plot_pacf(ts_data_weekly_log_diff_order1,lags = 10)

In [None]:
# Auto correlation graph will give you order of MA model ---> value of q =1
# PACF graph will give you order of AR model ---> value of p = 1

In [None]:
# p=1
# q=1
# order of differencing to convert a non stationary time series to stationary = 1(d)

In [None]:
### ARIMA MODEL
## AR = Auto regressive (p)
## Integrated (d)
## MA = MOVING AVERAGE (q)
from statsmodels.tsa.arima_model import ARIMA
arima_model = ARIMA(ts_data_weekly_log,order = (1,1,1))
results = arima_model.fit(disp=0)
plt.plot(ts_data_weekly_log_diff_order1)
plt.plot(results.fittedvalues, color='red')
#ARIMA(ts_data_weekly_log_diff_order1,order = (1,0,1))

In [None]:
print(results.summary()) #-13732.987   -13592.901

In [None]:
### PRediction using ARIMA MODE
# Predicted values for the existing time periods
fitted_val_ARIMA = pd.Series(results.fittedvalues)
fitted_val_ARIMA.head()

In [None]:
-0.002418 +0.001002+ 0.000055

In [None]:
### Back in the original scale
fitted_val_ARIMA_cumsum = fitted_val_ARIMA.cumsum()
fitted_val_ARIMA_cumsum_orig_scale = np.exp(fitted_val_ARIMA_cumsum)
plt.figure( figsize=(10,8))
plt.plot(ts_data_weekly)
plt.plot(fitted_val_ARIMA_cumsum_orig_scale)

In [None]:
# POINT IN TIME FORECAST
results.forecast(steps = 12)[0]

In [None]:
ts_data_weekly_log.shape

In [None]:
ts_data_weekly_log.head()

In [None]:
# ROLLING FORECAST
#size = int(len(X) * 0.66)
train, test = ts_data_weekly_log[0:2225], ts_data_weekly_log[2225:len(ts_data_weekly_log)]
test.shape

In [None]:
test.head()

In [None]:
#history = [x for x in train['Value']]
#history

In [None]:
from sklearn.metrics import mean_squared_error
history = [x for x in train['Value']]
predictions = list()
for t in range(len(test)):
    model = ARIMA(history, order=(1,1,1))
    model_fit = model.fit(disp=0)
    output = model_fit.forecast()
    yhat = output[0]
    #predictions.append(float(yhat))
    obs = test['Value'][t]
    history.append(obs)
    print('predicted=%f, expected=%f' % (np.exp(yhat), np.exp(obs)))
    predictions.append(np.exp(yhat))
error = mean_squared_error(test['Value'], predictions)
print('Test MSE: %.3f' % error)

In [None]:
# Autocorrelation in the error ( Durbin watson test)
# shapiro test
# ERROR in the predicted random