In [74]:
import fxcmpy
import pandas as pd
import numpy as np
import datetime as dt

# Allows for printing the whole data frame
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None) 

from pyti.hull_moving_average import hull_moving_average as hma
from pyti.accumulation_distribution import accumulation_distribution as ad
from pyti.aroon import aroon_up
from pyti.aroon import aroon_down
from pyti.average_true_range import average_true_range as atr
from pyti.chande_momentum_oscillator import chande_momentum_oscillator as cmo
from pyti.chaikin_money_flow import chaikin_money_flow as cmf 
from pyti.commodity_channel_index import commodity_channel_index as cci
from pyti.exponential_moving_average import exponential_moving_average as ema
from pyti.money_flow_index import money_flow_index as mfi
from pyti.on_balance_volume import on_balance_volume as obv
from pyti.simple_moving_average import simple_moving_average as sma
from pyti.stochastic import percent_k as percent_k
from pyti.stochastic import percent_d as percent_d
from pyti.smoothed_moving_average import smoothed_moving_average as smoothed_ma
from pyti.true_range import true_range as tr
from pyti.ultimate_oscillator import ultimate_oscillator as uo
from pyti.volatility import volatility as volat
from pyti.relative_strength_index import relative_strength_index as rsi
from pyti.williams_percent_r import williams_percent_r as wpr 

from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import Imputer, RobustScaler
from sklearn.pipeline import make_pipeline
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score, log_loss, roc_auc_score
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV
from pprint import pprint


In [10]:
#set connection
con = fxcmpy.fxcmpy(config_file='fxcm.cfg')

#get candle data 2016-01-01 to 2018-06-19
df = con.get_candles('GBP/JPY', period='H1',number = 10000)

#check connection
con.is_connected()

True

## Trades generated based on HMA strategy

In [13]:
# Define pip cost and lot size
pip_cost = .0911
lot_size = 10

# Define EMA Fast / Slow parameters
fast = 8
slow = 16

# HMA fast and slow calculation
df['hma_fast'] = hma(df['askclose'], fast)
df['hma_slow'] = hma(df['askclose'], slow)

# Entry signals when HMA(fast) corsses above the HMA(slow). Sell signals when HMA(fast) crossed below the HMA(slow).
df['signal'] = np.where(df['hma_fast'] > df['hma_slow'], 1, 0)
df['position'] = df['signal'].diff()

In [17]:
df.head()

Unnamed: 0_level_0,bidopen,bidclose,bidhigh,bidlow,askopen,askclose,askhigh,asklow,tickqty,hma_fast,hma_slow,signal,position
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2016-12-22 17:00:00,144.517,144.612,144.681,144.483,144.549,144.65,144.713,144.515,11116,,,0,
2016-12-22 18:00:00,144.612,144.628,144.658,144.556,144.65,144.657,144.686,144.585,9061,,,0,0.0
2016-12-22 19:00:00,144.628,144.414,144.643,144.414,144.657,144.447,144.673,144.445,8505,,,0,0.0
2016-12-22 20:00:00,144.414,144.437,144.471,144.315,144.447,144.472,144.501,144.348,8177,,,0,0.0
2016-12-22 21:00:00,144.437,144.306,144.467,144.3,144.472,144.42,144.506,144.337,5195,,,0,0.0


## New data manipulation

In [57]:
### 1. Feature Engineering

# Accumulation distribution
df['accum_dist'] = ad(df['askclose'], df['askhigh'], df['asklow'], df['tickqty'])

# Averagre true range
df['atr'] = atr(df['askclose'], 10)

# Chande momentum oscillator
df['cmo'] = cmo(df['askclose'], 10)

# chaikin_money_flow 
df['cmf'] = cmf(df['askclose'], df['askhigh'], df['asklow'], df['tickqty'], 10) 

# commodity channel index
df['cci'] = cci(df['askclose'], df['askhigh'], df['asklow'], 10)

# exponential moving average 
fast = 7
slow = 14
df['ema_fast'] = ema(df['askclose'], fast)
df['ema_slow'] = ema(df['askclose'], slow)

# money flow index
df['mfi'] = mfi(df['askclose'], df['askhigh'], df['asklow'], df['tickqty'], 10)

# on balance volume
df['obv'] = obv(df['askclose'], df['tickqty'])

# simple moving average
df['sma'] = sma(df['askclose'], period = 10)

# percent k
df['percent_k'] = percent_k(df['askclose'], period = 10)

# percent d
df['percent_d'] = percent_d(df['askclose'], period = 10)

# smoothed moving average
df['smoothed_ma'] = smoothed_ma(df['askclose'], period = 10)

# true range
df['true_range'] = tr(df['askclose'], period = 10)

# ultimate oscillator
df['ulti_osc'] = uo(df['askclose'], df['asklow'])

# volatility
df['volatility'] = volat(df['askclose'], period = 10)

# relative strength index
df['rsi'] = rsi(df['askclose'], period = 10)

# williams percent
df['williams'] = df['true_range'] = wpr(df['askclose']) 

  (high_data[idx] - low_data[idx]) *
  volume[idx+1-period:idx+1]) / sum(volume[idx+1-period:idx+1]) for idx in range(period-1, len(close_data))]


In [60]:
### 2. Build up trades data with entry and exit info together 
entry_df = df[df['position'] == 1.00].reset_index()

exit_df = df[df['position'] == -1.00].reset_index()

In [61]:
entry_df.shape

(814, 32)

In [62]:
exit_df.shape

(813, 32)

In [63]:
entry_df = entry_df[:-1]
#one open position without close action at the end of the trading period, we drop it for convenience 

In [70]:
trades.head()
#_x refers to open position data, _y refers to close position data.  

Unnamed: 0,date_x,bidopen_x,bidclose_x,bidhigh_x,bidlow_x,askopen_x,askclose_x,askhigh_x,asklow_x,tickqty_x,hma_fast_x,hma_slow_x,signal_x,position_x,accum_dist_x,atr_x,cmo_x,cmf_x,cci_x,ema_fast_x,ema_slow_x,mfi_x,obv_x,sma_x,percent_k_x,percent_d_x,smoothed_ma_x,true_range_x,ulti_osc_x,volatility_x,rsi_x,williams_x,date_y,bidopen_y,bidclose_y,bidhigh_y,bidlow_y,askopen_y,askclose_y,askhigh_y,asklow_y,tickqty_y,hma_fast_y,hma_slow_y,signal_y,position_y,accum_dist_y,atr_y,cmo_y,cmf_y,cci_y,ema_fast_y,ema_slow_y,mfi_y,obv_y,sma_y,percent_k_y,percent_d_y,smoothed_ma_y,true_range_y,ulti_osc_y,volatility_y,rsi_y,williams_y,profit
0,2016-12-23 17:00:00,143.577,143.769,143.786,143.575,143.608,143.806,143.822,143.606,4389,143.664437,143.612776,1,1.0,-14591.902705,0.510624,-3.007519,-0.057913,-1.195681,143.729082,143.829452,17.3835,-16701.0,143.8019,0.443946,0.152813,143.940878,-61.199598,,7.654583,38.679458,-61.199598,2016-12-27 02:00:00,144.078,143.925,144.106,143.922,144.112,143.957,144.149,143.957,14841,144.149952,144.183261,0,-1.0,-12944.42148,0.561692,23.903312,0.000454,1.420242,144.081598,143.988522,44.098609,-38566.0,143.9619,0.504335,0.703276,144.016291,-60.477352,,4.532721,45.319803,-60.477352,-45.9144
1,2016-12-27 14:00:00,143.752,144.138,144.14,143.732,143.783,144.179,144.211,143.763,12571,143.887607,143.816589,1,1.0,9820.984687,0.41302,13.73494,0.105994,2.104589,143.919478,143.940146,48.738735,-31620.0,143.9418,1.0,0.385744,143.960385,-59.415507,,6.846896,61.024251,-59.415507,2016-12-27 21:00:00,144.115,143.978,144.12,143.974,144.146,144.164,144.19,144.044,3991,144.177289,144.219328,0,-1.0,20207.954478,0.442977,49.302915,0.417215,1.551186,144.153654,144.083152,44.559526,-56872.0,144.0744,0.813808,0.794315,144.058761,-59.487253,,6.149059,57.158103,-59.487253,-33.0693
2,2016-12-28 00:00:00,144.187,144.461,144.554,144.07,144.229,144.495,144.586,144.101,13948,144.318567,144.27369,1,1.0,33542.018946,0.41651,55.294118,0.30062,4.068753,144.273241,144.183155,48.338348,-33999.0,144.1883,1.0,0.880176,144.125485,-57.904051,,8.332494,72.296088,-57.904051,2016-12-28 05:00:00,144.578,144.459,144.612,144.446,144.609,144.492,144.645,144.478,11433,144.595361,144.620265,0,-1.0,42537.176254,0.449016,40.420561,0.253246,2.758718,144.529902,144.421365,59.405671,-17120.0,144.3989,0.711934,0.801993,144.302524,-57.918401,26.547329,4.971967,61.03839,-57.918401,-34.618
3,2016-12-28 16:00:00,143.835,143.797,143.917,143.714,143.857,143.824,143.946,143.744,14686,143.738715,143.65308,1,1.0,19303.442679,0.634481,-42.243437,-0.164746,-2.445873,143.814639,143.967344,30.974541,-57981.0,143.9571,0.261474,0.171541,144.051516,-61.113503,19.849763,4.43447,39.526882,-61.113503,2016-12-28 18:00:00,143.706,143.111,143.735,143.106,143.736,143.137,143.766,143.137,15653,143.530209,143.553668,0,-1.0,1713.512446,0.661795,-57.369348,-0.241902,-8.511971,143.587255,143.785428,29.274645,-84045.0,143.7896,0.0,0.147002,143.931607,-64.399483,15.784362,3.625817,23.365221,-64.399483,11.0231
4,2016-12-28 23:00:00,143.049,143.072,143.301,142.991,143.101,143.113,143.356,143.028,8591,143.121459,143.107003,1,1.0,8839.21568,0.721881,-34.797073,-0.223329,-5.109197,143.212644,143.398121,50.030812,-45618.0,143.4238,0.015873,0.124272,143.632842,-64.514278,17.300841,3.239836,30.071989,-64.514278,2016-12-29 06:00:00,142.88,142.56,142.902,142.506,142.913,142.592,142.938,142.537,14188,142.794615,142.828546,0,-1.0,-2664.282851,0.695655,-72.77677,-0.098743,-5.921526,142.861526,142.989266,54.967103,-132723.0,143.0114,0.0,0.064494,143.253482,-67.006266,20.728083,4.943081,22.432897,-67.006266,17.1268


In [67]:
# Define pip cost and lot size
pip_cost = .0911
lot_size = 10

# Calculate Profit
trades['profit'] = (trades['askopen_x'] - trades['askopen_y']) * 100 * pip_cost * lot_size

In [71]:
# Define good trade and label them as 1, othewise 0
trades['label'] = (trades.profit > 0).astype(int)

In [72]:
### Look at return details ###
print('Percent of Target Trades:', trades.label.sum() / float(len(trades)))

Percent of Target Trades: 0.33210332103321033


In [140]:
del trades['date_y']

Unnamed: 0,date_x,bidopen_x,bidclose_x,bidhigh_x,bidlow_x,askopen_x,askclose_x,askhigh_x,asklow_x,tickqty_x,hma_fast_x,hma_slow_x,signal_x,position_x,accum_dist_x,atr_x,cmo_x,cmf_x,cci_x,ema_fast_x,ema_slow_x,mfi_x,obv_x,sma_x,percent_k_x,percent_d_x,smoothed_ma_x,true_range_x,ulti_osc_x,volatility_x,rsi_x,williams_x,bidopen_y,bidclose_y,bidhigh_y,bidlow_y,askopen_y,askclose_y,askhigh_y,asklow_y,tickqty_y,hma_fast_y,hma_slow_y,signal_y,position_y,accum_dist_y,atr_y,cmo_y,cmf_y,cci_y,ema_fast_y,ema_slow_y,mfi_y,obv_y,sma_y,percent_k_y,percent_d_y,smoothed_ma_y,true_range_y,ulti_osc_y,volatility_y,rsi_y,williams_y,profit,label
0,2016-12-23 17:00:00,143.577,143.769,143.786,143.575,143.608,143.806,143.822,143.606,4389,143.664437,143.612776,1,1.0,-14591.902705,0.510624,-3.007519,-0.057913,-1.195681,143.729082,143.829452,17.3835,-16701.0,143.8019,0.443946,0.152813,143.940878,-61.199598,,7.654583,38.679458,-61.199598,144.078,143.925,144.106,143.922,144.112,143.957,144.149,143.957,14841,144.149952,144.183261,0,-1.0,-12944.42148,0.561692,23.903312,0.000454,1.420242,144.081598,143.988522,44.098609,-38566.0,143.9619,0.504335,0.703276,144.016291,-60.477352,,4.532721,45.319803,-60.477352,-45.9144,0
1,2016-12-27 14:00:00,143.752,144.138,144.14,143.732,143.783,144.179,144.211,143.763,12571,143.887607,143.816589,1,1.0,9820.984687,0.41302,13.73494,0.105994,2.104589,143.919478,143.940146,48.738735,-31620.0,143.9418,1.0,0.385744,143.960385,-59.415507,,6.846896,61.024251,-59.415507,144.115,143.978,144.12,143.974,144.146,144.164,144.19,144.044,3991,144.177289,144.219328,0,-1.0,20207.954478,0.442977,49.302915,0.417215,1.551186,144.153654,144.083152,44.559526,-56872.0,144.0744,0.813808,0.794315,144.058761,-59.487253,,6.149059,57.158103,-59.487253,-33.0693,0
2,2016-12-28 00:00:00,144.187,144.461,144.554,144.07,144.229,144.495,144.586,144.101,13948,144.318567,144.27369,1,1.0,33542.018946,0.41651,55.294118,0.30062,4.068753,144.273241,144.183155,48.338348,-33999.0,144.1883,1.0,0.880176,144.125485,-57.904051,,8.332494,72.296088,-57.904051,144.578,144.459,144.612,144.446,144.609,144.492,144.645,144.478,11433,144.595361,144.620265,0,-1.0,42537.176254,0.449016,40.420561,0.253246,2.758718,144.529902,144.421365,59.405671,-17120.0,144.3989,0.711934,0.801993,144.302524,-57.918401,26.547329,4.971967,61.03839,-57.918401,-34.618,0
3,2016-12-28 16:00:00,143.835,143.797,143.917,143.714,143.857,143.824,143.946,143.744,14686,143.738715,143.65308,1,1.0,19303.442679,0.634481,-42.243437,-0.164746,-2.445873,143.814639,143.967344,30.974541,-57981.0,143.9571,0.261474,0.171541,144.051516,-61.113503,19.849763,4.43447,39.526882,-61.113503,143.706,143.111,143.735,143.106,143.736,143.137,143.766,143.137,15653,143.530209,143.553668,0,-1.0,1713.512446,0.661795,-57.369348,-0.241902,-8.511971,143.587255,143.785428,29.274645,-84045.0,143.7896,0.0,0.147002,143.931607,-64.399483,15.784362,3.625817,23.365221,-64.399483,11.0231,1
4,2016-12-28 23:00:00,143.049,143.072,143.301,142.991,143.101,143.113,143.356,143.028,8591,143.121459,143.107003,1,1.0,8839.21568,0.721881,-34.797073,-0.223329,-5.109197,143.212644,143.398121,50.030812,-45618.0,143.4238,0.015873,0.124272,143.632842,-64.514278,17.300841,3.239836,30.071989,-64.514278,142.88,142.56,142.902,142.506,142.913,142.592,142.938,142.537,14188,142.794615,142.828546,0,-1.0,-2664.282851,0.695655,-72.77677,-0.098743,-5.921526,142.861526,142.989266,54.967103,-132723.0,143.0114,0.0,0.064494,143.253482,-67.006266,20.728083,4.943081,22.432897,-67.006266,17.1268,1


In [141]:
# Define Features
features = list(trades.loc[:,'askopen_x':'williams_y'])
features

['askopen_x',
 'askclose_x',
 'askhigh_x',
 'asklow_x',
 'tickqty_x',
 'hma_fast_x',
 'hma_slow_x',
 'signal_x',
 'position_x',
 'accum_dist_x',
 'atr_x',
 'cmo_x',
 'cmf_x',
 'cci_x',
 'ema_fast_x',
 'ema_slow_x',
 'mfi_x',
 'obv_x',
 'sma_x',
 'percent_k_x',
 'percent_d_x',
 'smoothed_ma_x',
 'true_range_x',
 'ulti_osc_x',
 'volatility_x',
 'rsi_x',
 'williams_x',
 'bidopen_y',
 'bidclose_y',
 'bidhigh_y',
 'bidlow_y',
 'askopen_y',
 'askclose_y',
 'askhigh_y',
 'asklow_y',
 'tickqty_y',
 'hma_fast_y',
 'hma_slow_y',
 'signal_y',
 'position_y',
 'accum_dist_y',
 'atr_y',
 'cmo_y',
 'cmf_y',
 'cci_y',
 'ema_fast_y',
 'ema_slow_y',
 'mfi_y',
 'obv_y',
 'sma_y',
 'percent_k_y',
 'percent_d_y',
 'smoothed_ma_y',
 'true_range_y',
 'ulti_osc_y',
 'volatility_y',
 'rsi_y',
 'williams_y']

## Train and Testing Data Preparation

In [142]:
### Split the data into training, testing sets ### 
train = trades[0:570] #70% training
test = trades[570:] #30% testing

X_train = train.loc[:,features]
y_train = train.loc[:,'label']
y_train
X_test = test.loc[:,features]
y_test = test.loc[:,'label']

print('Length of Training Set: ' + str(len(train)))
print('Length of Testing Set: ' + str(len(test)))



Length of Training Set: 570
Length of Testing Set: 243


In [143]:
#change float number to integer for classification purpose  
y_train = y_train.astype(int)
y_test = y_test.astype(int)

In [144]:
### Remove null values and replace with the median of each column ###
transform = Imputer(missing_values='NaN',strategy='median')

### Find median of each column in training set and replace null values ###
X_train_features = transform.fit_transform(X_train)

X_train = pd.DataFrame(X_train_features, index =X_train.index, columns = X_train.columns)

### Apply median from training set to null values of test set ###
X_test_features = transform.transform(X_test)

X_test = pd.DataFrame(X_test_features, index = X_test.index, columns = X_test.columns)

## Logic

In [145]:
#step 1 import model you want to use
from sklearn.linear_model import LogisticRegression

In [146]:
#Step 2 Make an instance of the Model
logisticRegr = LogisticRegression()

In [147]:
# Step 3. Training the model on the data, storing the information learned from the data
logisticRegr.fit(X_train, y_train)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False)

In [148]:
# Step 4. Predict labels for new data
predictions = logisticRegr.predict(X_test)

In [149]:
# Use score method to get accuracy of model 
score = logisticRegr.score(X_test, y_test)
print(score)

0.7530864197530864
