In [None]:
from research_utilities import *

# import libraries
from datetime import date
import pandas as pd
import numpy as np

# ignore warnings
import warnings
warnings.filterwarnings('ignore')

import matplotlib.pyplot as plt
%matplotlib inline
plt.style.use("seaborn")

# set dataframe display options
pd.set_option("display.max.columns", None)
# pd.set_option("display.precision", 2)


## Data import 


In [None]:
#################
# Definitions
#################
# Where to read data from
s3_bucket    = 'mfx-sagemaker-dev'

s3_signal_data_key = "demo/XAUUSD_20210101-000000_20210131-000000/2021-01-01T00:00:06.165Z-2021-01-29T21:59:59.113Z-Input_SAXO-MS-UBS-JUMP-LMAX-POP-JPM_RETAIL-INVAST-MS_ECN-JPM_INST-EDGEWATER-HOTSPOT-FXCM-XTX_top1-1.csv.gz"


# s3_signal_data_key = "go-data/XBTUSD_20201115-210000_20201127-220000/2020-11-15T21:05:32.434Z-2020-11-27T21:59:00.452Z-Input_DVCHAIN-IG_top2-1.csv.gz"

# s3_signal_data_key = "go-data/XAUUSD_20210103-220000_20210115-220000/2021-01-03T23:00:00.250Z-2021-01-15T21:59:58.123Z-Input_FASTMATCH-SAXO-CFH-INVAST-JPM-HOTSPOT-MORGAN_STANLEY-LMAX-JUMP-HC_TECH-XTX_top5-1.csv.gz"

# our_prices_key = "go-data/GBPUSD_20201118-070000_20201118-130000/2020-11-18T07:00:00.055Z-2020-11-18T13:00:00.838Z-Output_CLIENT_INST_PRICE_LDN_top1-1.csv.gz"

# xtx_key = "go-data/GBPUSD_20201118-070000_20201118-130000/2020-11-18T07:00:02.037Z-2020-11-18T13:00:00.575Z-Input_XTX_top5-1.csv.gz"

# %% data from Drive
# google_drive_filename = "/Volumes/GoogleDrive/Shared drives/data/think_xauusd_20200908_quotes.csv"





In [None]:
# Bar size
resample_period = '10s'

# Chart settings
chart_padding_secs = 10

# Breakout settings
breakout_sigma = 1.5

# MR settings
mr_sigma = 10

# Hedging level
hedge_level = 3

# Hedger interest to fill time in ms
interest_to_fill = 10000

# n samples for moving average
ma_samples = 3

# optimise exits
optimise_exit = True

In [None]:
#################
# Load from S3
#################
# using S3
signal_tick_data = pd.read_csv('s3://{}/{}'.format(s3_bucket, s3_signal_data_key), index_col='t', parse_dates=['t'])
# our_price = pd.read_csv('s3://{}/{}'.format(s3_bucket, our_prices_key), index_col='t', parse_dates=['t'])
# xtx_prices = pd.read_csv('s3://{}/{}'.format(s3_bucket, xtx_key), index_col='t', parse_dates=['t'])

# optionally from from Drive
# google_drive_filename = "/Volumes/GoogleDrive/Shared drives/data/echo/go_gbpusd_20200810_quotes.csv"
# signal_tick_data = pd.read_csv(google_drive_filename, index_col='t', parse_dates=['t'])

In [None]:
# resample if necessary
# signal_tick_data = signal_tick_data["2021-01-08 14:00:00":"2021-01-08 15:00:00"]


trigger_tick_data = signal_tick_data
reval_tick_data = signal_tick_data
# Source for tick charts
chart_tick_data = reval_tick_data

signal_tick_data['spread'] = signal_tick_data['Offer0'] - signal_tick_data['Bid0']
reval_tick_data['spread'] = reval_tick_data['Offer0'] - reval_tick_data['Bid0']


#################
# Aggregate tick data
#################
signal_mid_price_series = (signal_tick_data.loc[:, 'Bid0'] + signal_tick_data.loc[:, 'Offer0']) / 2
trigger_mid_price_series = (trigger_tick_data.loc[:, 'Bid0'] + trigger_tick_data.loc[:, 'Offer0']) / 2
reval_mid_price_series = (reval_tick_data.loc[:, 'Bid0'] + reval_tick_data.loc[:, 'Offer0']) / 2
# print(reval_mid_price_series.tail())

#################
# HLOC
#################
signal_tick_data["mid"] = (signal_tick_data.loc[:, 'Bid0'] + signal_tick_data.loc[:, 'Offer0']) / 2


# adding bid and offer 
bid_sampler = signal_tick_data.loc[:, 'Bid0'].resample(resample_period)
bid_hloc = bid_sampler.ohlc() 

offer_sampler = signal_tick_data.loc[:, 'Offer0'].resample(resample_period)
offer_hloc = offer_sampler.ohlc() 

# original
bar_sampler = trigger_mid_price_series.resample(resample_period)
hloc = bar_sampler.ohlc() 


#################
# Derived columns
#################

hloc["Bid"] = bid_hloc["close"] 
hloc["Ask"] = offer_hloc["close"]  # tidy up naming of ask/offer


hloc['o_to_h'] = (hloc['high'] / hloc['open'] - 1)
hloc['o_to_l'] = (hloc['low'] / hloc['open'] - 1)
hloc['c_to_c'] = hloc['close'].pct_change()

hloc['o_to_h_vol'] = hloc['o_to_h'].rolling(ma_samples).std()
hloc['o_to_l_vol'] = hloc['o_to_l'].rolling(ma_samples).std()
hloc['c_to_c_vol'] = hloc['c_to_c'].rolling(ma_samples).std()

In [None]:
#################
# Trigger check
#################

def mid_trigger_price (timestamp, where): 
    index = trigger_mid_price_series[timestamp : timestamp + timestamp.freq].where(where).dropna().first_valid_index()
    if index == None :
        return None
    result = reval_mid_price_series.asof(index)
#     print(result)
    return result

def trigger_check(row, high_trigger_col, low_trigger_col, high_side):
    
    triggers = [
        ['High', mid_trigger_price(row.name, lambda price: price.gt(row[high_trigger_col])), 
                 +high_side, row[high_trigger_col]],
        ['Low', mid_trigger_price(row.name, lambda price: price.lt(row[low_trigger_col])),
                -high_side, row[low_trigger_col]]
    ]
    
    triggers = [t for t in triggers if t[1] is not None]
    
    triggers.sort(key=lambda t : t[1])
    
    if not triggers:
        return None
    
    return triggers[0]

In [None]:
#################
# Breakout signals
#################

hloc['breakout_high_trigger'] = hloc['open'] * (1 + (breakout_sigma * (hloc['o_to_h_vol'].shift(1))))
hloc['breakout_low_trigger'] = hloc['open'] * (1 - (breakout_sigma * (hloc['o_to_l_vol'].shift(1))))

hloc['breakout_triggered'] = hloc.apply(lambda row: trigger_check(row, 'breakout_high_trigger', 'breakout_low_trigger', 1), axis=1)
hloc['breakout_triggered'].isna().sum()

In [None]:
#################
# Mean reversion signals
#################

hloc['6_hr_ma'] = hloc['close'].rolling(ma_samples).mean()
# hloc['6_hr_ma_sig'] = np.where(hloc['close'].shift(1) > hloc['6_hr_ma'].shift(1), 1, -1)

hloc['mr_high_trigger'] = hloc['6_hr_ma'] * (1 + (mr_sigma * (hloc['c_to_c_vol'].shift(1))))
hloc['mr_low_trigger'] = hloc['6_hr_ma'] * (1 - (mr_sigma * (hloc['c_to_c_vol'].shift(1))))

hloc['mr_triggered'] = hloc.apply(lambda row: trigger_check(row, 'mr_high_trigger', 'mr_low_trigger', -1), axis=1)

In [None]:
# position
hloc_filtered = hloc
hloc_filtered['breakout_trade'] = hloc_filtered['breakout_triggered'].map(lambda x: 0 if x is None else x[2])
hloc_filtered['breakout_trade_price'] = hloc_filtered['breakout_triggered'].map(lambda x: 0 if x is None else x[1])
hloc_filtered['breakout_contra_trade_amount'] = -1 * hloc_filtered['breakout_trade'] * hloc_filtered['breakout_trade_price']

hloc_filtered['mr_trade'] = hloc_filtered['mr_triggered'].map(lambda x: 0 if x is None else x[2])
hloc_filtered['mr_trade_price'] = hloc_filtered['mr_triggered'].map(lambda x: 0 if x is None else x[1])
hloc_filtered['mr_contra_trade_amount'] = -1 * hloc_filtered['mr_trade'] * hloc_filtered['mr_trade_price']
hloc_filtered['position'] = hloc_filtered['breakout_trade'].cumsum() + hloc_filtered['mr_trade'].cumsum()

stdev = np.std(hloc_filtered['close'].diff())

In [None]:
# hedge trades
hloc_filtered['hedge_trade'] = 0.0
hloc_filtered['hedge_price'] = 0.0
hedge_trade_sum = 0.0
hloc_filtered['hedge_contra_amount'] = 0.0
current_position = 0.0
keep_position = True
i_prev = None
n = len(hloc_filtered)
 
for i in hloc_filtered.index:
    stay_long, stay_short = False, False
    current_position = hloc_filtered.loc[i]['position'] + hedge_trade_sum
    
    if(optimise_exit) :
        if current_position > 0 and (hloc_filtered.loc[i]['close'] > (hloc_filtered.loc[i_prev]['close'] + 2.5 * stdev)):
            stay_long = False
        if current_position < 0 and (hloc_filtered.loc[i]['close'] < (hloc_filtered.loc[i_prev]['close'] - 2.5 * stdev)):
            stay_short = False
        keep_position = stay_long or stay_short
    
    if i_prev and not keep_position and (np.absolute(current_position) >= hedge_level):
        hloc_filtered.loc[i,'hedge_trade'] = -1 * current_position
        hedge_trade_sum += -1 * current_position
        # TODO: Review
        hedge_price = hloc_filtered.asof(i + pd.Timedelta(milliseconds=interest_to_fill),
                                         subset=['close'])['close']
        hloc_filtered.loc[i,'hedge_price'] = hedge_price
        hloc_filtered.loc[i,'hedge_contra_amount'] = -1 * hloc_filtered.loc[i,'hedge_trade'] * \
                                                     hloc_filtered.loc[i,'hedge_price']
    i_prev = i

In [None]:
hloc_filtered['hedge_balance'] = hloc_filtered['hedge_trade'].cumsum()
hloc_filtered['overall_position'] = hloc_filtered['breakout_trade'].cumsum() + hloc_filtered['mr_trade'].cumsum() + hloc_filtered['hedge_trade'].cumsum()
hloc_filtered['overall_contra_position'] = hloc_filtered['breakout_contra_trade_amount'].cumsum() + hloc_filtered['mr_contra_trade_amount'].cumsum() + hloc_filtered['hedge_contra_amount'].cumsum()

In [None]:
# #pnl
hloc_filtered['pnl'] = np.where(hloc_filtered['overall_position'] == 0, hloc_filtered['overall_contra_position'], hloc_filtered['overall_position'] * hloc_filtered['close'] + hloc_filtered['overall_contra_position'])

In [None]:
# plot mr strategy returns
hloc_filtered['pnl'].resample("10s").last().plot()
plt.legend();

In [None]:
hloc_filtered['c_to_c_vol'].resample("1T").last().plot()
plt.legend();

In [None]:
hloc_filtered['overall_position'].resample("5s").last().plot()
plt.legend();

In [None]:
#volumes, pnl and yield
y = 0
breakout_volume = np.abs(hloc_filtered['breakout_trade']).sum()
mr_volume = np.abs(hloc_filtered['mr_trade']).sum()
hedge_volume = np.abs(hloc_filtered['hedge_trade']).sum()
total_volume = breakout_volume + mr_volume + hedge_volume
total_pnl = hloc_filtered['pnl'].tail(1)
y = total_pnl / total_volume 
display(breakout_volume, mr_volume, hedge_volume, total_volume, total_pnl, y)

In [None]:
hloc_filtered.index

In [None]:
nhbo_position = 0
maxPosition = 5

for i in hloc_filtered.index:
    trade = hloc_filtered.loc[i]['breakout_trade']
    if(trade != 0) :
        if abs(nhbo_position) < maxPosition:
#             display(abs(nhbo_position))
            hloc_filtered.loc[i, 'nhbo_trade'] = hloc_filtered['breakout_trade'].loc[i]
            hloc_filtered.loc[i, 'nhbo_trade_price'] = hloc_filtered['breakout_trade_price'].loc[i]
            hloc_filtered.loc[i, 'nhbo_contra_amount'] = hloc_filtered['breakout_contra_trade_amount'].loc[i]
            nhbo_position = nhbo_position + hloc_filtered['breakout_trade'].loc[i]
            
        else :
            hloc_filtered.loc[i, 'nhbo_trade'] = 0
        


In [None]:
hloc_filtered['nhbo_position'] = hloc_filtered['nhbo_trade'].cumsum()
hloc_filtered['nhbo_contra_position'] = hloc_filtered['nhbo_contra_amount'].cumsum()

hloc_filtered['nhbo_pnl'] = np.where(hloc_filtered['nhbo_position'] == 0, 
        hloc_filtered['nhbo_contra_position'], 
        hloc_filtered['nhbo_position'] * hloc_filtered['close'] + hloc_filtered['nhbo_contra_position'])




### Order Flow Imbalance

In [None]:

# OFI lookback 
ofi_lookback = 50
ofi_resample = "10ms"

def ofi(quotes,level):
    """Returns Order Flow Imbalance for given levels of the orderbook"""
    qdf = quotes.copy()
    bid_price_label = 'Bid' + str(level)
    offer_price_label = 'Offer' + str(level)
    bid_qty_label = 'Bid' +str(level) + 'Qty'
    offer_qty_label = 'Offer' + str(level)+'Qty'

    qdf['prev_bidprice'] = qdf[bid_price_label].shift()
    qdf['prev_bidsize'] = qdf[bid_qty_label].shift()
    qdf['prev_askprice'] = qdf[offer_price_label].shift()
    qdf['prev_asksize'] = qdf[offer_qty_label].shift()

    # Fix any missing/invalid data
    qdf.replace([np.inf, np.NINF], np.nan, inplace=True)
    qdf.fillna(method="ffill", inplace=True)
    qdf.fillna(method="bfill", inplace=True)
    
    bid_geq = qdf[bid_price_label] >= qdf['prev_bidprice']
    bid_leq = qdf[bid_price_label] <= qdf['prev_bidprice']
    ask_geq = qdf[offer_price_label] >= qdf['prev_askprice']
    ask_leq = qdf[offer_price_label] <= qdf['prev_askprice']
    
    qdf['ofi'] = np.zeros(len(qdf))
    qdf['ofi'].loc[bid_geq] += qdf[bid_qty_label].loc[bid_geq]
    qdf['ofi'].loc[bid_leq] -= qdf['prev_bidsize'].loc[bid_leq]
    qdf['ofi'].loc[ask_geq] += qdf['prev_asksize'].loc[ask_geq]
    qdf['ofi'].loc[ask_leq] -= qdf[offer_qty_label].loc[ask_leq]    
    return qdf['ofi']




In [None]:
########################
# OFI using top x levels 
########################
df_all = signal_tick_data.resample(ofi_resample).last().ffill()
df_all = df_all

df_all["Offer0Qty"] = df_all["Offer0Qty"].astype('float')
df_all["Offer0"] = df_all["Offer0"].astype('float')
df_all["Bid0"] = df_all["Bid0"].astype('float')
df_all["Bid0Qty"] = df_all["Bid0Qty"].astype('float')

df_all["Offer1Qty"] = df_all["Offer1Qty"].astype('float')
df_all["Offer1"] = df_all["Offer1"].astype('float')
df_all["Bid1"] = df_all["Bid1"].astype('float')
df_all["Bid1Qty"] = df_all["Bid1Qty"].astype('float')

df_all["Offer2Qty"] = df_all["Offer2Qty"].astype('float')
df_all["Offer2"] = df_all["Offer2"].astype('float')
df_all["Bid2"] = df_all["Bid2"].astype('float')
df_all["Bid2Qty"] = df_all["Bid2Qty"].astype('float')

df_all["Offer3Qty"] = df_all["Offer3Qty"].astype('float')
df_all["Offer3"] = df_all["Offer3"].astype('float')
df_all["Bid3"] = df_all["Bid3"].astype('float')
df_all["Bid3Qty"] = df_all["Bid3Qty"].astype('float')

df_all["Offer4Qty"] = df_all["Offer4Qty"].astype('float')
df_all["Offer4"] = df_all["Offer4"].astype('float')
df_all["Bid4"] = df_all["Bid4"].astype('float')
df_all["Bid4Qty"] = df_all["Bid4Qty"].astype('float')

In [None]:
# OFI with levels 1, 2 and 3 (works better than 0,1,2)
# 100 period MA works well

df_all = df_all #.resample(ofi_resample).last().ffill()
df_all['ofi'] = ofi(df_all,1) + ofi(df_all,2) + ofi(df_all,3) 

df_all["ofi_rolling"] = df_all['ofi'].rolling(ofi_lookback).mean()
df_all['ofi_signal'] = np.where(df_all['ofi_rolling'] > 0, 1, -1)
df_all['mid'] = ((df_all['Bid0'] + df_all['Offer0']) / 2.0)
df_all['mid_change'] = ((df_all['Bid0'] + df_all['Offer0']) / 2.0).pct_change()


# shift the signal
df_all['ofi_signal'] = df_all['ofi_signal'].shift(1)
df_all['ofi_pnl'] = (df_all['ofi_signal'] * df_all['mid_change'])

print("Cumulative PnL " + str(df_all['ofi_pnl'].cumsum().iloc[-1]))



In [None]:
df_all['ofi_pnl'].cumsum().resample("1T").last().plot()
plt.legend();

### Volume Weighted Mids

In [None]:
#######################
# Volume Weighted Mids
#######################

In [None]:
# CWM
cwm_lookback = 2

df_all['weighted_bid_notional_5'] =  df_all["Bid1Qty"] + df_all["Bid2Qty"] + df_all["Bid3Qty"] + df_all["Bid4Qty"]
df_all['weighted_offer_notional_5'] =  df_all["Offer1Qty"] + df_all["Offer2Qty"] + df_all["Offer3Qty"] + df_all["Offer4Qty"]
df_all['weighted_bid_5'] = (df_all["Bid1Qty"] * df_all["Bid1"] + df_all["Bid2Qty"] * df_all["Bid2"] + df_all["Bid3Qty"] * df_all["Bid3"] + df_all["Bid4Qty"] * df_all["Bid4"]) / df_all['weighted_bid_notional_5'] 
df_all['weighted_offer_5'] = ( df_all["Offer1Qty"] * df_all["Offer1"] + df_all["Offer2Qty"] * df_all["Offer2"] + df_all["Offer3Qty"] * df_all["Offer3"] + df_all["Offer4Qty"] * df_all["Offer4"]) / df_all['weighted_offer_notional_5'] 
df_all['conventionally_weighted_mid_5'] = (df_all['weighted_bid_5'] + df_all['weighted_offer_5']) / 2
df_all['cwm'] = df_all['conventionally_weighted_mid_5']
df_all['cwm_signal'] = np.where(df_all['cwm'] >= df_all['mid'], 1, -1)


# slow down to XTX pace 
df_all['cwm_signal_slow'] = df_all['cwm_signal'].rolling(cwm_lookback).mean()


# shift signal and calculate returns
df_all['cwm_signal'] = df_all['cwm_signal_slow'].shift(1)
df_all['cwm_pnl'] = df_all['cwm_signal'] * df_all['mid_change']
df_all['cwm_pnl'].cumsum().resample("1T").last().plot()
plt.legend();

In [None]:
#VWM - level 1 
df_all['vwm'] = (df_all["Bid1Qty"] * df_all["Offer1"] + df_all["Bid1"] * df_all["Offer1Qty"])  / (df_all["Bid1Qty"] + df_all["Offer1Qty"])
df_all['vwm_signal'] = np.where(df_all['vwm'] >= df_all['mid'], 1, -1)

df_all['vwm_signal'] = np.where(df_all['vwm'] > df_all['mid'], 1, -1)

# shift the signal
df_all['vwm_signal'] = df_all['vwm_signal'].shift(1)

# calculate returns
df_all['vwm_pnl'] = df_all['vwm_signal'] * df_all['mid_change']
df_all['vwm_pnl'].cumsum().resample("1T").last().plot()
plt.legend();


### Trend Following

In [None]:
####################
# Trend and Crossover
####################

trend_lookback = 200

crossover_1_lookback = 100
crossover_2_lookback = 500

# simple trend following model for mid generation
df_all['trend_signal'] = np.where(df_all['mid'] > df_all['mid'].rolling(trend_lookback).mean(), 1, -1)
df_all['trend_signal'] = df_all['trend_signal'].shift(1)
df_all['trend_pnl'] = df_all['trend_signal'] * df_all['mid_change']

# ma crossover
df_all['crossover_signal'] = np.where(df_all['mid'].rolling(crossover_1_lookback).mean() > df_all['mid'].rolling(crossover_2_lookback).mean(), 1, -1)
df_all['crossover_signal'] = df_all['crossover_signal'].shift(1)
df_all['crossover_pnl'] = df_all['crossover_signal'] * df_all['mid_change']


In [None]:
df_all[['crossover_pnl', 'trend_pnl']].between_time('00:00', '22:00').cumsum().resample("1T").last().plot()
plt.legend();

In [None]:
df_all['combined_signal'] = np.sign(df_all['ofi_signal'] + df_all['trend_signal'] + df_all['cwm_signal'])
df_all['combined_pnl'] = df_all['mid_change'] * df_all['combined_signal']
df_all['combined_pnl'].between_time('07:00', '19:00').cumsum().resample("1T").last().plot()
plt.legend();

### Create 10ms breakout signal and combine with OFI

In [None]:
breakout_signal = hloc_filtered['overall_position'].resample(ofi_resample).last().ffill()
breakout_signal = np.sign(breakout_signal)

In [None]:
breakout_signal.resample("1T").last().plot()
plt.legend();

In [None]:
df_all["breakout_signal"] = breakout_signal

In [None]:
df_all["breakout_pnl"] = df_all["breakout_signal"] * df_all['mid_change']

df_all["breakout_pnl"].cumsum().resample("1T").last().plot()
plt.legend();

### Adapt signal - Regime Switching 

In [None]:
# set adaptive lookback period
adapt_rolling = 10000

df_all['ofi_pnl_ma'] = df_all['ofi_pnl'].rolling(adapt_rolling).mean()
df_all['trend_pnl_ma'] = df_all['trend_pnl'].rolling(adapt_rolling).mean()
df_all['cwm_pnl_ma'] = df_all['cwm_pnl'].rolling(adapt_rolling).mean()
df_all['breakout_pnl_ma'] = df_all['breakout_pnl'].rolling(adapt_rolling).mean()


In [None]:
df_all['combined_pnl_ma'] = (df_all['ofi_pnl_ma'] + df_all['trend_pnl_ma'] + df_all['breakout_pnl_ma']) / 3

In [None]:
df_all['ofi_weight'] = np.where(df_all['ofi_pnl_ma'] > 0, 1, 0)
df_all['ofi_weight'] = df_all['ofi_weight'].shift(1)

In [None]:
df_all['trend_weight'] = np.where(df_all['trend_pnl_ma'] > 0, 1, 0)
df_all['trend_weight'] = df_all['trend_weight'].shift(1)

In [None]:
df_all['cwm_weight'] = np.where(df_all['cwm_pnl_ma'] > 0, 1, 0)
df_all['cwm_weight'] = df_all['cwm_weight'].shift(1)

In [None]:
df_all['breakout_weight'] = np.where(df_all['breakout_pnl_ma'] > 0, 1, 0)
df_all['breakout_weight'] = df_all['breakout_weight'].shift(1)

In [None]:
# two versions of the adapt

# weighted by recent pnl
df_all['adapt_signal'] = np.sign(
    (df_all['cwm_weight'] * df_all['cwm_signal']) + 
    (df_all['trend_weight'] * df_all['trend_signal']) +
    (df_all['ofi_weight'] * df_all['ofi_signal'])
)

# unweighted 
# df_all['adapt_signal'] = np.sign(
#     df_all['breakout_signal'] +
#     df_all['trend_signal'] +
#     df_all['ofi_signal']
# )

In [None]:
df_all['adapt_pnl'] = df_all['mid_change'] * df_all['adapt_signal']

In [None]:
df_all['adapt_pnl'].cumsum().resample("1T").last().plot()
plt.legend();

### Pnl Per Trade and Holding Period

In [None]:
###############
# Counts the number of ticks between changes in the signal
# Calculates pnl per trade 
###############

# these are the df and column for the signals 
signal_df = df_all
signal_column = 'breakout_signal'

# the df and column for the pnl
pnl_column = df_all['breakout_pnl']


def SignalPersisenceFast(df,column_name): 
    array= df[column_name].values
    previous_signal  = False 
    Counter = 0
    Times = []
    for x in range(len(array)):
        if((array[x] == previous_signal or Counter == 0) and array[x] != 0):
            Counter = Counter + 1
        else:
            Times.append(Counter)
            if array[x] != 0 : 
                Counter =  1
        previous_signal = array[x]
    return Times

Times = SignalPersisenceFast(df_all,signal_column)
number_trades = (df_all["s"].count() / np.mean(Times))
pnl_per_trade = pnl_column.sum() / number_trades



In [None]:
# prints out the results
print("**configs**")
print("adapt rolling: "f'{adapt_rolling:d}')
print("trend lookback: "f'{trend_lookback:.0f}')
print("cwm lookback: "f'{cwm_lookback:.0f}')
print("ofi rolling: "f'{ofi_lookback:.0f}')
print("ofi resample ms: "f'{ofi_resample}')
print()

print("**time in signal/trade**")
print("Mean units of time in trade "f'{(np.mean(Times)):.2f}')
print("25 percentile time in trade " + str(np.percentile(Times, 25, axis=0)))
print("Median time in trade " + str(np.median(Times)))
print("75 percentile time in trade " + str(np.percentile(Times, 75, axis=0)))
print()

print("**performance stats**")
print("Number of signals/trades: "f'{number_trades:.0f}')
print("Cumulative PnL %: "f'{(pnl_column.sum() * 100):.2f}')
print("Average Trade PnL $ per million "f'{(pnl_per_trade*1000000):.2f}')

### Test our mids

In [None]:
# test our mid 
inst_price_london = our_price.resample(ofi_resample).last().ffill()
df_all["inst_ldn_bid"] = inst_price_london["Bid0"]
df_all["inst_ldn_offer"] = inst_price_london["Offer0"]
df_all["inst_ldn_mid"] = (df_all["inst_ldn_bid"] + df_all["inst_ldn_offer"]) / 2
df_all["inst_ldn_signal"] = np.where(df_all["inst_ldn_mid"] > df_all["mid"], 1, -1)
df_all["inst_ldn_signal"] = df_all["inst_ldn_signal"].shift(1)
df_all["inst_ldn_pnl"] = df_all["inst_ldn_signal"] * df_all["mid_change"]
df_all["inst_ldn_pnl"].cumsum().resample("1T").last().plot()
plt.legend();

In [None]:
# test XTX mids
xtx_prices = xtx_prices.resample(ofi_resample).last().ffill()
df_all["xtx_bid"] = xtx_prices["Bid0"]
df_all["xtx_offer"] = xtx_prices["Offer0"]
df_all["xtx_mid"] = (df_all["xtx_bid"] + df_all["xtx_offer"]) / 2
df_all["xtx_signal"] = np.where(df_all["xtx_mid"] > df_all["mid"], 1, -1)
df_all["xtx_signal"] = df_all["xtx_signal"].shift(1)
df_all["xtx_pnl"] = df_all["xtx_signal"] * df_all["mid_change"]
df_all[["xtx_pnl", "combined_pnl", "inst_ldn_pnl"]].cumsum().resample("1T").last().plot()
plt.legend();

In [None]:
# run correlations between XTX and ourselves
df_corr = df_all[["xtx_pnl", "adapt_pnl", "inst_ldn_pnl", "trend_pnl", "ofi_pnl", "cwm_pnl", "combined_pnl"]]
print("XTX correlation with: "f'{df_corr.corr().iloc[:, 0]}')

In [None]:
# run a regression to investigate XTX prices
xtx_regression = df_corr

import statsmodels.api as sm
xtx_regression = xtx_regression.replace(np.NaN, 0)
X = xtx_regression[["trend_pnl", "ofi_pnl", "cwm_pnl"]]
y = xtx_regression['xtx_pnl']
## fit a OLS model with intercept on TV and Radio
# X = sm.add_constant(X)
est = sm.OLS(y, X).fit()
print(est.summary())

In [None]:
# old results

# XTX mid as a predictor 
**time in signal/trade**
Mean units of time in trade 115.23
25 percentile time in trade 4.0
Median time in trade 15.0
75 percentile time in trade 103.0

**performance stats**
Number of signals/trades: 18745
Cumulative PnL %: 12.75
Average Trade PnL $ per million 6.80

# our INST mid as a predictor 
**time in signal/trade**
Mean units of time in trade 69.44
25 percentile time in trade 3.0
Median time in trade 7.0
75 percentile time in trade 45.0

**performance stats**
Number of signals/trades: 31109
Cumulative PnL %: 4.14
Average Trade PnL $ per million 1.33

# models - slow
**time in signal/trade**
Mean units of time in trade 255.59
25 percentile time in trade 5.0
Median time in trade 47.0
75 percentile time in trade 360.0

**performance stats**
Number of signals/trades: 8452
Cumulative PnL %: 4.69
Average Trade PnL $ per million 5.55


# models - faster
**configs**
adapt rolling: 10000
trend lookback: 200
cwm lookback: 5
ofi rolling: 25
ofi resample ms: 10ms

**time in signal/trade**
Mean units of time in trade 36.76
25 percentile time in trade 7.0
Median time in trade 25.0
75 percentile time in trade 39.0

**performance stats**
Number of signals/trades: 58766
Cumulative PnL %: 9.78
Average Trade PnL $ per million 1.66

In [None]:
signal_tick_data[["Bid0", "Offer0"]].describe()

### Run simulations

In [None]:
# input dataframe

# stores as class variables can be accessed in the strategy
# e.g. df = [close, bid, offer, signal1, signal2, signal3]

# set up high frequency trend following
_hloc = hloc[["open", "high", "low", "close", "Bid", "Ask"]] 
_hloc = _hloc.rename(columns={"open": "Open", "high": "High", "low": "Low", "close": "Close"})
_hloc = _hloc.dropna()

In [None]:
# import backtesting library
from backtesting import Backtest, Strategy
from backtesting.lib import crossover

def SMA(values, n):
    """
    Return simple moving average of `values`, at
    each step taking into account `n` previous values.
    """
    return pd.Series(values).rolling(n).mean()

In [None]:
from backtesting.lib import crossover

class SmaCross(Strategy):
    # Define the two MA lags as *class variables*
    # for later optimization
    n1 = 400
    n2 = 500
    
    
    def init(self):
        # Precompute the two moving averages
        self.sma1 = self.I(SMA, self.data.Close, self.n1)
        self.sma2 = self.I(SMA, self.data.Close, self.n2)
#         self.signal = self.data.Signal
        # self.ask1 = self.data.Ask
    
    def next(self):
        # If sma1 crosses above sma2, close any existing
        # short trades, and buy 
        # if self.signal == 1:
            
        if crossover(self.sma1, self.sma2):
            self.position.close()
            self.buy()

        # Else, if sma1 crosses below sma2, close any existing
        # long trades, and sell 
        elif crossover(self.sma2, self.sma1):
            self.position.close()
            self.sell()

In [None]:
%%script echo

    def next(self):
        if (self.sma1[-2] < self.sma2[-2] and
                self.sma1[-1] > self.sma2[-1]):
            self.position.close()
            self.buy()

        elif (self.sma1[-2] > self.sma2[-2] and    
              self.sma1[-1] < self.sma2[-1]):
            self.position.close()
            self.sell()

In [None]:
%%time

from backtesting import Backtest

bt = Backtest(_hloc, SmaCross, cash=1_000_000, commission=.000, fee_rate = 0.00002)
stats = bt.run()

stats

In [None]:
bt.plot(plot_volume=False, plot_pl=True)


In [None]:
# Run optimizer

In [None]:
stats = bt.optimize(n1=range(0, 500, 50),
                    n2=range(0, 2000, 50),
                    maximize='Equity Final [$]',
                    constraint=lambda param: param.n1 < param.n2,
                   )
print(stats)

In [None]:
stats._strategy


In [None]:
stats.to_clipboard()

##### 

In [None]:
stats['_equity_curve']  # Contains equity/drawdown curves. DrawdownDuration is only defined at ends of DD periods.
# plotly_line_chart(stats, "Equity", "DrawdownPct")

In [None]:
stats['_trades']  # Contains individual trade data


In [None]:
df = pd.DataFrame(stats['_equity_curve'][["Equity", "DrawdownPct"]])

In [None]:
stats['_equity_curve'].head()

In [None]:
df.head()

In [None]:
df = pd.DataFrame(stats['_equity_curve']["Equity"]).resample("D").last()

In [None]:
df["return"] = df["Equity"].pct_change()
daily_sharpe = (df["return"].mean() * 365) / (df["return"].std() * np.sqrt(365))
print(f"Daily Sharpe: "{daily_sharpe})

In [None]:
df["Equity"].plot(title=daily_sharpe);

In [None]:
# TODO

# Backtrader - return calculations. Bid-ask spread. Total spread paid. 
# Signal with and without spread. 
# Signal from other models. 1, 0, -1. 
# Return stats for intraday. 

# Walk forward each hour
# Flip between MR and TF
# Differentiate between short and long term TF
# Cross market trend initiation? 

# AHL style with variance 
# Markov switching 

In [None]:
EURUSD.head()

### Machine learning 

In [None]:
from backtesting.test import SMA, EURUSD

EURUSD["Bid"] = EURUSD["Close"]
EURUSD["Ask"] = EURUSD["Close"]

data = EURUSD.copy()

In [None]:

def BBANDS(data, n_lookback, n_std):

    """Bollinger bands indicator"""
    hlc3 = (data.High + data.Low + data.Close) / 3
    mean, std = hlc3.rolling(n_lookback).mean(), hlc3.rolling(n_lookback).std()
    upper = mean + n_std*std
    lower = mean - n_std*std
    return upper, lower


close = data.Close.values
sma10 = SMA(data.Close, 10)
sma20 = SMA(data.Close, 20)
sma50 = SMA(data.Close, 50)
sma100 = SMA(data.Close, 100)
upper, lower = BBANDS(data, 20, 2)

# Design matrix / independent features:

# Price-derived features
data['X_SMA10'] = (close - sma10) / close
data['X_SMA20'] = (close - sma20) / close
data['X_SMA50'] = (close - sma50) / close
data['X_SMA100'] = (close - sma100) / close

data['X_DELTA_SMA10'] = (sma10 - sma20) / close
data['X_DELTA_SMA20'] = (sma20 - sma50) / close
data['X_DELTA_SMA50'] = (sma50 - sma100) / close

# Indicator features
data['X_MOM'] = data.Close.pct_change(periods=2)
data['X_BB_upper'] = (upper - close) / close
data['X_BB_lower'] = (lower - close) / close
data['X_BB_width'] = (upper - lower) / close
data['X_Sentiment'] = ~data.index.to_series().between('2017-09-27', '2017-12-14')

# Some datetime features for good measure
data['X_day'] = data.index.dayofweek
data['X_hour'] = data.index.hour

data = data.dropna().astype(float)

In [None]:
import numpy as np

# need to set 

def get_X(data):
    """Return model design matrix X"""
    return data.filter(like='X').values


def get_y(data):
    """Return dependent variable y"""
    y = data.Close.pct_change(48).shift(-48)  # Returns after 48 periods
    y[y.between(-.004, .004)] = 0             # Devalue returns smaller than 0.04%
    y[y > 0] = 1
    y[y < 0] = -1
    return y


def get_clean_Xy(df):
    """Return (X, y) cleaned of NaN values"""
    X = get_X(df)
    y = get_y(df).values
    isnan = np.isnan(y)
    X = X[~isnan]
    y = y[~isnan]
    return X, y

In [None]:
import pandas as pd
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split

X, y = get_clean_Xy(data)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.5, random_state=0)

clf = KNeighborsClassifier(7)  # Model the output based on 7 "nearest" examples
clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)

_ = pd.DataFrame({'y_true': y_test, 'y_pred': y_pred}).plot(figsize=(15, 2), alpha=.7)
print('Classification accuracy: ', np.mean(y_test == y_pred))

In [None]:
%%time

from backtesting import Backtest, Strategy

N_TRAIN = 400


class MLTrainOnceStrategy(Strategy):
    price_delta = .004  # 0.4%

    def init(self):        
        # Init our model, a kNN classifier
        self.clf = KNeighborsClassifier(7)

        # Train the classifier in advance on the first N_TRAIN examples
        df = self.data.df.iloc[:N_TRAIN]
        X, y = get_clean_Xy(df)
        self.clf.fit(X, y)

        # Plot y for inspection
        self.I(get_y, self.data.df, name='y_true')

        # Prepare empty, all-NaN forecast indicator
        self.forecasts = self.I(lambda: np.repeat(np.nan, len(self.data)), name='forecast')

    def next(self):
        # Skip the training, in-sample data
        if len(self.data) < N_TRAIN:
            return

        # Proceed only with out-of-sample data. Prepare some variables
        high, low, close = self.data.High, self.data.Low, self.data.Close
        current_time = self.data.index[-1]

        # Forecast the next movement
        X = get_X(self.data.df.iloc[-1:])
        forecast = self.clf.predict(X)[0]

        # Update the plotted "forecast" indicator
        self.forecasts[-1] = forecast

        # If our forecast is upwards and we don't already hold a long position
        # place a long order for 20% of available account equity. Vice versa for short.
        # Also set target take-profit and stop-loss prices to be one price_delta
        # away from the current closing price.
        upper, lower = close[-1] * (1 + np.r_[1, -1]*self.price_delta)

        if forecast == 1 and not self.position.is_long:
            self.buy(size=.2, tp=upper, sl=lower)
        elif forecast == -1 and not self.position.is_short:
            self.sell(size=.2, tp=lower, sl=upper)

        # Additionally, set aggressive stop-loss on trades that have been open 
        # for more than two days
        for trade in self.trades:
            if current_time - trade.entry_time > pd.Timedelta('2 days'):
                if trade.is_long:
                    trade.sl = max(trade.sl, low)
                else:
                    trade.sl = min(trade.sl, high)


bt = Backtest(data, MLTrainOnceStrategy, commission=.000001, margin=.05)
bt.run()


In [None]:
bt.plot()


In [None]:
%%time

class MLWalkForwardStrategy(MLTrainOnceStrategy):
    def next(self):
        # Skip the cold start period with too few values available
        if len(self.data) < N_TRAIN:
            return

        # Re-train the model only every 20 iterations.
        # Since 20 << N_TRAIN, we don't lose much in terms of
        # "recent training examples", but the speed-up is significant!
        if len(self.data) % 20:
            return super().next()

        # Retrain on last N_TRAIN values
        df = self.data.df[-N_TRAIN:]
        X, y = get_clean_Xy(df)
        self.clf.fit(X, y)

        # Now that the model is fitted, 
        # proceed the same as in MLTrainOnceStrategy
        super().next()


bt = Backtest(data, MLWalkForwardStrategy, commission=.00001, margin=.05)
bt.run()

In [None]:
bt.plot()
