In [1]:
from IPython.display import clear_output

import matplotlib.pyplot as plt
import seaborn as sns

import joblib
import pandas as pd
import numpy as np
import sys, os

### Tick Values

In [2]:
## Tick Increments
tick_increments = {
    "EURCHF" : 0.00005,
    "USDCHF" : 0.00005,
    "GBPUSD" : 0.00005,
    "USDJPY" : 0.005,
    "EURUSD" : 0.00005,
    "EURGBP" : 0.00005,
    "NZDUSD" : 0.00005,
    "USDCAD" : 0.00005,
    "EURJPY" : 0.005,
    "AUDUSD" : 0.00005,
    "GBPJPY" : 0.005,
    "CHFJPY" : 0.005,
    "AUDNZD" : 0.00005,
    "CADJPY" : 0.005
}
tick_increments = {key : value / 5 for key, value in tick_increments.items()}
tick_value = 10

### Trade data

In [3]:
trades = pd.read_csv("D:/AlgoMLData/AddTrades/EURJPY_trades.csv")
raw = pd.read_csv("D:/TickData_Agg/EURJPY.csv")

In [4]:
trades.head()

Unnamed: 0,TTC,Drawdown,Datetime,Direction,sig20,sig30,sig50
0,5.0,-0.0455,2009-05-01 05:00:00,-1.0,1,1,1
1,94.0,-0.7015,2009-05-01 06:30:00,-1.0,0,1,1
2,8.0,-0.0925,2009-05-01 19:25:00,1.0,0,1,0
3,1.0,-0.005,2009-05-03 21:10:00,1.0,1,1,1
4,1.0,-0.0065,2009-05-03 21:15:00,1.0,0,1,1


In [5]:
raw.head()

Unnamed: 0,Datetime,Open,High,Low,Close,Volume,Ticks,VWAP
0,2009-05-01 00:00:00,130.976,131.095,130.944,131.0505,74151.289,566,131.009345
1,2009-05-01 00:05:00,131.0535,131.4305,131.0405,131.377,94067.177,717,131.195505
2,2009-05-01 00:10:00,131.379,131.4225,131.178,131.2285,135518.167,1032,131.316053
3,2009-05-01 00:15:00,131.2295,131.2295,131.0595,131.066,75935.199,579,131.148876
4,2009-05-01 00:20:00,131.069,131.2985,131.0395,131.293,80666.4725,615,131.164996


### Filter all model trades

In [6]:
scaled = pd.read_csv("D:/AlgoMLData/Scaled/EURJPY_scaled.csv")

In [7]:
scaled.head()

Unnamed: 0,TTC,Drawdown,Datetime,Direction,sig20,sig30,sig50,Change,LongVol,ShortVol,...,ShortKurtosis,LongProg,ShortProg,LongApproximateEntropy,ShortApproximateEntropy,LongSpectralEntropy,LongAutocorrelation,ShortAutocorrelation,LongStationarity,ShortStationarity
0,5.0,-0.0455,2009-05-01 05:00:00,-1.0,1,1,1,1.123349,0.933389,0.516558,...,-0.038449,-0.066001,0.649528,0.019302,0.148477,0.607646,-0.541893,-1.484444,1.0,1.0
1,94.0,-0.7015,2009-05-01 06:30:00,-1.0,0,1,1,1.30672,0.856784,0.731667,...,-0.426454,1.192574,1.917475,0.822639,0.936911,0.61415,-0.724218,-0.712851,0.0,1.0
2,8.0,-0.0925,2009-05-01 19:25:00,1.0,0,1,0,-0.671085,0.347272,0.092248,...,-0.574401,-0.269518,-0.927289,-0.058207,-0.447969,0.533248,2.244885,-0.759869,0.0,1.0
3,29.0,-0.442,2009-05-04 14:00:00,-1.0,1,1,1,1.147075,0.999614,0.352696,...,0.041098,1.215356,2.819551,0.537668,0.148477,-0.620983,-0.146293,-1.994449,1.0,1.0
4,4.0,-0.1095,2009-05-04 15:40:00,1.0,0,0,1,-1.174831,1.251437,0.892683,...,-1.25435,2.031994,0.00887,0.552776,-0.481105,0.650449,-0.464969,0.085302,1.0,1.0


In [8]:
with open("../models/lgbm_2019-04-16", 'rb') as file:
    predictor = joblib.load(file)

In [9]:
trades = trades[trades.Datetime.isin(scaled.Datetime.values)]
trades['Pred'] = predictor.predict(scaled.iloc[:, 3:].values)
trades = trades[trades.Pred == 1]

### Calculate Drawdowns

In [10]:
#raw.drop(['Volume', 'Ticks', 'VWAP'], axis=1, inplace=True)
## First get the index in the raw dataframe
idc = raw[raw.Datetime.isin(trades.Datetime.values)].index.values
## Then get the runs with the ttc
idc = [np.arange(idx, idx+ttc+1).astype(int).tolist() for idx, ttc in zip(idc, trades.TTC.values)]
## Get the trade progressions as a dataframe
dfs = [raw.iloc[idx, :] for i, idx in enumerate(idc)]
## Assign the anchor to each of the DFs
[df.insert(1, 'Anchor', df.Open.values[0]) for df in dfs]
## Get the candle size of each
[df.insert(1, 'CandleSize', abs(df.Open.values[0] - df.Close.values[0])) for df in dfs]
## Assign the direction to each of the DFs
[df.insert(1, 'Direction', dir_) for df, dir_ in zip(dfs, trades.Direction.values)]
## Add a trade number
[df.insert(1, 'Trade_ID', i) for i, df in enumerate(dfs)]
clear_output()

In [11]:
## Get a big df to vectorize
main = pd.concat(dfs)
main = main.reset_index()
main = main.set_index(main['index'].astype(str) + ' ' + main.Trade_ID.astype(str))

In [12]:
## Get all last idc
idc_last = [str(idx[-1]) + ' ' + str(i) for i, idx in enumerate(idc)]

In [13]:
tmp = main.loc[idc_last, :]
long = tmp[tmp.Direction == 1]
short = tmp[tmp.Direction == -1]

In [14]:
long.loc[(long.Close > long.Anchor) | (long.High > long.Anchor), 'Close'] = long[(long.Close > long.Anchor) | (long.High > long.Anchor)].Anchor

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item] = s


In [15]:
short.loc[(short.Close < short.Anchor) | (short.Low < short.Anchor), 'Close'] = short[(short.Close < short.Anchor) | (short.Low < short.Anchor)].Anchor

In [16]:
main.loc[short.index, :] = short.values
main.loc[long.index, :] = long.values

In [17]:
main['RDD'] = abs(main.Close - main.Anchor)
main['MDD'] = 1
main.loc[main.Direction == -1, 'MDD'] = abs(main[main.Direction == -1].Anchor - main[main.Direction == -1].High)
main.loc[main.Direction == 1, 'MDD'] = abs(main[main.Direction == 1].Anchor - main[main.Direction == 1].Low)
main.loc[idc_last, 'MDD'] = [0 if min(x) == 0 else max(x) for x in main[main.index.isin(idc_last)][['RDD', 'MDD']].values]

### Split back up into equal DFs

In [18]:
trade_progs = [main[main.Trade_ID == i] for i in range(main.Trade_ID.nunique())]

In [32]:
drawdowns = {
    i : [] for i in range(100)
}
for df in trade_progs:
    for i, dd in enumerate(df.RDD[1:]):
        drawdowns[i].append(dd / tick_increments['EURJPY'])

In [33]:
candles = [df.CandleSize.values[0] / tick_increments['EURJPY'] for df in trade_progs]