In [1]:
import fxcmpy
import pandas as pd
import numpy as np
import datetime as dt

# Allows for printing the whole data frame
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

from pyti.accumulation_distribution import accumulation_distribution as ad
from pyti.aroon import aroon_up
from pyti.aroon import aroon_down
from pyti.average_true_range import average_true_range as atr
from pyti.chande_momentum_oscillator import chande_momentum_oscillator as cmo
from pyti.chaikin_money_flow import chaikin_money_flow as cmf 
from pyti.commodity_channel_index import commodity_channel_index as cci
from pyti.exponential_moving_average import exponential_moving_average as ema
from pyti.hull_moving_average import hull_moving_average as hma
from pyti.money_flow_index import money_flow_index as mfi
from pyti.on_balance_volume import on_balance_volume as obv
from pyti.simple_moving_average import simple_moving_average as sma
from pyti.stochastic import percent_k as percent_k
from pyti.stochastic import percent_d as percent_d
from pyti.smoothed_moving_average import smoothed_moving_average as smoothed_ma
from pyti.true_range import true_range as tr
from pyti.ultimate_oscillator import ultimate_oscillator as uo
from pyti.volatility import volatility as volat
from pyti.relative_strength_index import relative_strength_index as rsi
from pyti.williams_percent_r import williams_percent_r as wpr 

In [2]:
#set connection
con = fxcmpy.fxcmpy(config_file='fxcm.cfg')

#get candle data 2016-01-01 to 2018-06-19
df = con.get_candles('GBP/JPY', period='D1',start= dt.datetime(2014, 1, 1),end = dt.datetime(2018, 7, 6))

#check connection
con.is_connected()

True

## Indicators

In [3]:
# Accumulation distribution
df['accum_dist'] = ad(df['askclose'], df['askhigh'], df['asklow'], df['tickqty'])

In [4]:
# Averagre true range
df['atr'] = atr(df['askclose'], 10)

In [5]:
# Chande momentum oscillator
df['cmo'] = cmo(df['askclose'], 10)

In [6]:
# chaikin_money_flow 
df['cmf'] = cmf(df['askclose'], df['askhigh'], df['asklow'], df['tickqty'], 10) 

In [7]:
# commodity channel index
df['cci'] = cci(df['askclose'], df['askhigh'], df['asklow'], 10)

In [8]:
# exponential moving average 
fast = 8
slow = 16

# EMA fast and slow calculation
df['ema_fast'] = ema(df['askclose'], fast)
df['ema_slow'] = ema(df['askclose'], slow)

In [9]:
# hull moving average 
df['hma_fast'] = hma(df['askclose'], fast)
df['hma_slow'] = hma(df['askclose'], slow)

In [10]:
# money flow index
df['mfi'] = mfi(df['askclose'], df['askhigh'], df['asklow'], df['tickqty'], 10)

In [11]:
#From Ben
df['obv'] = obv(df['askclose'], df['tickqty'])
df['sma'] = sma(df['askclose'], period = 10)
df['percent_k'] = percent_k(df['askclose'], period = 10)
df['percent_d'] = percent_d(df['askclose'], period = 10)
df['smoothed_ma'] = smoothed_ma(df['askclose'], period = 10)
df['true_range'] = tr(df['askclose'], period = 10)
df['ulti_osc'] = uo(df['askclose'], df['asklow'])
df['volatility'] = volat(df['askclose'], period = 10)
df['rsi'] = rsi(df['askclose'], period = 10)
df['williams'] = df['true_range'] = wpr(df['askclose']) 

In [12]:
df.head(100)

Unnamed: 0_level_0,bidopen,bidclose,bidhigh,bidlow,askopen,askclose,askhigh,asklow,tickqty,accum_dist,atr,cmo,cmf,cci,ema_fast,ema_slow,hma_fast,hma_slow,mfi,obv,sma,percent_k,percent_d,smoothed_ma,true_range,ulti_osc,volatility,rsi,williams
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1
2014-01-02 22:00:00,174.398,172.345,174.834,171.823,174.436,172.504,174.863,171.858,215078,0.0,,,,,,,,,,1.0,,,,172.504,-32.936774,,,,-32.936774
2014-01-03 22:00:00,172.345,172.069,172.496,171.034,172.504,172.199,172.527,171.076,198734,108886.0,,,,,,,,,,-198733.0,,,,172.343474,-33.37237,,,,-33.37237
2014-01-05 22:00:00,172.069,171.822,172.084,171.772,172.199,171.917,172.225,171.877,1311,107876.3,,,,,,,,,,-200044.0,,,,172.186103,-33.775118,,,,-33.775118
2014-01-06 22:00:00,171.822,170.96,172.226,170.397,171.917,171.001,172.251,170.428,234773,20689.66,,,,,,,,,,-434817.0,,,,171.841496,-35.083335,,,,-35.083335
2014-01-07 22:00:00,170.96,171.522,171.695,170.823,171.001,171.58,171.723,170.85,215445,165553.6,,,,,,,,,,-219372.0,,,,171.77764,-34.256416,,,,-34.256416
2014-01-08 22:00:00,171.522,172.446,172.831,171.494,171.58,172.498,172.857,171.515,242409,278268.4,,,,,,,,,,23037.0,,,,171.93138,-32.945343,,,,-32.945343
2014-01-09 22:00:00,172.446,172.712,173.122,172.182,172.498,172.778,173.155,172.208,202977,319635.4,,,,,,,,,,226014.0,,,,172.09366,-32.545452,,,,-32.545452
2014-01-10 22:00:00,172.712,171.545,173.022,171.24,172.778,171.729,173.047,171.264,244847,202498.8,,,,,172.046027,,,,,-18833.0,,,,172.029632,-34.043617,,,,-34.043617
2014-01-12 22:00:00,171.545,171.357,171.496,171.26,171.729,171.468,171.58,171.361,1190,202471.6,,,,,171.881978,,172.063235,,,-20023.0,,,,171.937949,-34.416373,,,,-34.416373
2014-01-13 22:00:00,171.357,168.813,171.656,168.336,171.468,168.83,171.693,168.362,213043,49293.02,3.948,-50.830105,-0.041424,-8.553451,171.087996,,170.624209,,,-233066.0,171.6504,0.0,,171.460773,-38.183922,,0.884215,,-38.183922


## Data Preparation

In [13]:
#save to csv for convenience
df.to_csv('MLmodel2.csv')

In [14]:
df = pd.read_csv('MLmodel2.csv')

In [15]:
#Split into training and testing data

train = df[(pd.to_datetime(df.date) < pd.to_datetime('2017-07-01')) & (pd.to_datetime(df.date) >= pd.to_datetime('2014-01-01'))]

test = df[pd.to_datetime(df.date) >= pd.to_datetime('2017-07-01')]

## Machine Learning Model using Scikit Learn package