## FX pricing data 

Query EDP to get end of SPOT FX pricing data for GBP/USD over a two year period and then perform standard time-series analysis & prediction.

**"Prediction is very difficult, especially if it's about the future."** *Nils Bohr*

In [5]:
#Global imports

import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import requests
import json
import numpy as np
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.arima_model import ARIMA
from statsmodels.tsa.holtwinters import ExponentialSmoothing
from statsmodels.tsa.stattools import acf, pacf
from statsmodels.tsa.stattools import adfuller

%matplotlib inline
plt.style.use('seaborn')

In [6]:
def get_data_request(url, requestData):
    '''make HTTP GET request'''
    dResp = requests.get(url, headers = {'X-api-key': access_token}, params = requestData);       

    
    if dResp.status_code != 200:
        print("Unable to get data. Code %s, Message: %s" % (dResp.status_code, dResp.text));
    else:
        print("Data access successful")
        jResp = json.loads(dResp.text);
        return jResp

#### API call to retrieve the data

In [9]:
ric = '=GBP' # GBP/USD
start_date = '2016-11-01'
end_date = '2018-10-31'

RESOURCE_ENDPOINT = "https://dsa-stg-edp-api.fr-nonprod.aws.thomsonreuters.com/data/historical-pricing/beta1/views/summaries/" + ric
access_token = '26GtCASt7F4X37PrBL1Ml8fcxFwZoCc84afAZThY'  # your personal key for Data Science Accelerator access to Pricing Data
requestData = {
    "interval": "P1D",
    "start": start_date,
    "end": end_date,
};

jResp = get_data_request(RESOURCE_ENDPOINT, requestData)


if jResp is not None:
    data = jResp[0]['data']
    headers = jResp[0]['headers']  
    names = [headers[x]['name'] for x in range(len(headers))]
    spot_df = pd.DataFrame(data, columns=names )
    
spot_df.head()

Data access successful


Unnamed: 0,DATE,TRDPRC_1,OPEN_PRC,HIGH_1,LOW_1,ASK,BID
0,2018-01-01,78.2,78.1,78.2,78.1,,
1,2017-12-29,78.2,78.1,78.2,78.1,,
2,2017-12-28,78.0,78.1,78.1,77.9,,
3,2017-12-27,78.0,78.1,78.1,78.0,,
4,2017-12-26,78.1,78.1,78.2,78.1,,


In [None]:
# set the date as the index of the dataframe

spot_df.DATE = pd.to_datetime(spot_df.DATE)
spot_df = spot_df.set_index('DATE')

#### Plot the Spot price Time-Series

In [None]:
spot_df.plot(title= 'GBP Spot Price',figsize=(16, 7))
plt.xlabel('Date', fontsize=15)
plt.ylabel('Price', fontsize=15)

#### Calculate 12 days rolling average

In [None]:
spot_df['SPOT'].rolling(12).mean().plot(figsize=(16,7), color='red', label='Rolling 12')
plt.plot(spot_df.SPOT, label='Original')
plt.legend(loc='best')
plt.xlabel('Date', fontsize=15)
plt.ylabel('Price', fontsize=15)

#### Time-series decomposition

Decompose the Time-Series into Trend, Seasonality & Residuals Time-Series.

In [None]:
decomposition = seasonal_decompose(spot_df.SPOT, freq=24)
trend = decomposition.trend
seasonal = decomposition.seasonal
residual = decomposition.resid

plt.figure(figsize=(16,7))
plt.subplot(411)
plt.plot(spot_df.SPOT, label='Original')
plt.legend(loc='best')
plt.subplot(412)
plt.plot(trend, label='Trend')
plt.legend(loc='best')
plt.subplot(413)
plt.plot(seasonal, label='Seasonality')
plt.legend(loc='best')
plt.subplot(414)
plt.plot(residual, label='Residuals')
plt.legend(loc='best')
plt.tight_layout()

#### First order differencing

Perform a first order differencing to test for stationarity

In [None]:
spot_df['spot_1diff'] = spot_df['SPOT'].diff()
spot_df = spot_df[spot_df['spot_1diff'].notnull()] # drop null rows
test_stationarity(spot_df['spot_1diff'])

The first order difference time-series seems to be stationary

#### Plot autocorrelation & partial autocorrelation

To identify the parameters for our ARIMA model

In [None]:
lag_acf = acf(spot_df['spot_1diff'], nlags=50)
lag_pacf = pacf(spot_df['spot_1diff'], nlags=50, method='ols')

In [None]:
# q
plt.figure(figsize=(16,7))
plt.subplot(211)
plt.axhline(y=0, linestyle='--', color='gray')
plt.axhline(y=-1.96/np.sqrt(len(spot_df.spot_1diff)), linestyle='--', color='gray')
plt.axhline(y=1.96/np.sqrt(len(spot_df.spot_1diff)), linestyle='--', color='gray')
plt.axvline(x=1, color='red')
plt.title('Autocorrelation')
plt.plot(lag_acf)

In [None]:
# p
plt.figure(figsize=(16,7))
plt.subplot(211)
plt.axhline(y=0, linestyle='--', color='gray')
plt.axhline(y=-1.96/np.sqrt(len(spot_df.spot_1diff)), linestyle='--', color='gray')
plt.axhline(y=1.96/np.sqrt(len(spot_df.spot_1diff)), linestyle='--', color='gray')
plt.axvline(x=1, color='red')
plt.title('Partial Autocorrelation')
plt.plot(lag_pacf)

#### Create an ARMA model

In [None]:
new_spot = spot_df['spot_1diff'].resample('D').ffill() # resample per day and fill the gaps
# new_spot = spot_df['SPOT'].resample('D').ffill()
# new_spot = residual.resample('D').ffill().dropna()

In [None]:
test_stationarity(new_spot)

In [None]:
#ARIMA(p,d,q) d = 0 as we use the 1st order differenciated time-series

arma_model = ARIMA(new_spot, order=(1,0,1)) 
results = arma_model.fit(disp=0)
plt.figure(figsize=(16,7))
plt.plot(new_spot) # comparing against the 1st order difference time-series
plt.plot(results.fittedvalues, color='red')
plt.title('RSS: %.4f' % sum((results.fittedvalues-new_spot)**2))

In [None]:
print(results.summary())

Plot the residuals to see if there is any trend in the data

In [None]:
residuals = pd.DataFrame(results.resid)
residuals.plot(figsize=(16,7), title='Residuals')

Check that distribution of residual values is Gaussian

In [None]:
residuals.plot(figsize=(8,5), kind='kde', title='Distribution of residual error values')

#### Bring data back to original scale

Calculate the cumulative sum and add it to a log series with a base value (first SPOT value) since we used the first order difference time-series

In [None]:
predictions_ARIMA = pd.Series(results.fittedvalues, copy=True)
predictions_ARIMA_cumsum = predictions_ARIMA.cumsum()
predictions_ARIMA_final = pd.Series(spot_df['SPOT'].iloc[0], index=new_spot.index)
predictions_ARIMA_final = predictions_ARIMA_final.add(predictions_ARIMA_cumsum, fill_value=0)
predictions_ARIMA_final.head()

In [None]:
plt.figure(figsize=(16,7))
plt.plot(spot_df.SPOT.resample('D').ffill(), label='Original SPOT')
plt.plot(predictions_ARIMA_final, color='red', label='Fitted')
plt.legend(loc='best')
plt.title('RSS: %.4f' % sum((results.fittedvalues-new_spot)**2))

#### Exponential Smoothing

We will use triple exponential smoothing (Holt-Winters) for forecasting

In [None]:
new_spot = spot_df.SPOT.resample('D',label='right').ffill()

In [None]:
es_model = ExponentialSmoothing(new_spot, 
                             trend='add', damped=False, seasonal='mul', seasonal_periods=12)

es_results = es_model.fit()

In [None]:
plt.figure(figsize=(16,7))
plt.plot(new_spot, label='Original SPOT')
plt.plot(es_results.fittedvalues, color='red', label='Fitted')
plt.legend(loc='best')
plt.title('RSS: %.4f' % sum((es_results.fittedvalues-new_spot)**2))

In [None]:
predicted_values = es_model.predict(params=es_results.params, start='2017-12-30', end='2018-01-20')
preds = pd.DataFrame(index= pd.date_range(start='2017-12-30', end='2018-01-20'), data=predicted_values, columns=['SPOT'])

In [None]:
plt.figure(figsize=(16,7))
plt.plot(new_spot[new_spot.index>'2016-09-01'], label='Original')
plt.plot(preds, label='Predicted', color='red')
plt.legend(loc='best')
plt.show()