In [1]:
# import libraries
import pandas as pd
pd.options.display.max_columns = None 

import numpy as np
import math
import datetime as dt
from datetime import timedelta

# charting libraries
import matplotlib.pyplot as plt
%matplotlib inline
plt.style.use("seaborn")

import warnings
warnings.filterwarnings('ignore')

from strategy_models import *



## Make sure the breakout is running the correct file. 


In [2]:
#################
# Definitions
#################
# Where to read data from
s3_bucket    = 'mfx-sagemaker-dev'

s3_signal_data_key = "go-data/GBPUSD_20201206-210000_20201211-220000/2020-12-06T22:00:01.125Z-2020-12-11T21:59:56.007Z-Input_CFH-JPM-UBS-CITADEL-MORGAN_STANLEY-JUMP-STATESTREET-HC_TECH-FASTMATCH-INVAST-CLIENT_INST_PRICE_LDN-HOTSPOT-LMAX-XTX-GOLDMAN_SACHS_top5-1.csv.gz"

our_prices_key = "go-data/GBPUSD_20201206-210000_20201211-220000/2020-12-06T21:00:02.339Z-2020-12-11T21:59:56.007Z-Output_CLIENT_INST_PRICE_LDN_top1-1.csv.gz"

xtx_key = "go-data/GBPUSD_20201206-210000_20201211-220000/2020-12-06T22:00:20.596Z-2020-12-11T21:59:45.384Z-Input_XTX_top5-1.csv.gz"

# %% data from Drive
google_drive_filename = "/Volumes/GoogleDrive/Shared drives/data/echo/go_gbpusd_20200804_quotes.csv"



In [3]:
# Bar size
resample_period = '5s'

# Chart settings
chart_padding_secs = 10

# Breakout settings
breakout_sigma = 1.5

# MR settings
mr_sigma = 10

# Hedging level
hedge_level = 3

# Hedger interest to fill time in ms
interest_to_fill = 10000

# n samples for moving average
ma_samples = 3

# optimise exits
optimise_exit = True

In [4]:
#################
# Load data
#################

# using S3
# signal_tick_data = pd.read_csv('s3://{}/{}'.format(s3_bucket, s3_signal_data_key), index_col='t', parse_dates=['t'])
# our_price = pd.read_csv('s3://{}/{}'.format(s3_bucket, our_prices_key), index_col='t', parse_dates=['t'])
# xtx_prices = pd.read_csv('s3://{}/{}'.format(s3_bucket, xtx_key), index_col='t', parse_dates=['t'])

# from Drive
signal_tick_data = pd.read_csv(google_drive_filename, index_col='t', parse_dates=['t'])

trigger_tick_data = signal_tick_data
reval_tick_data = signal_tick_data
chart_tick_data = signal_tick_data

In [6]:
signal_tick_data, signal_mid_price_series, trigger_mid_price_series, reval_mid_price_series, hloc = data_processing(signal_tick_data, trigger_tick_data, reval_tick_data, resample_period, ma_samples)

In [None]:
#################
# Breakout signals
#################

hloc['breakout_high_trigger'] = hloc['open'] * (1 + (breakout_sigma * (hloc['o_to_h_vol'].shift(1))))
hloc['breakout_low_trigger'] = hloc['open'] * (1 - (breakout_sigma * (hloc['o_to_l_vol'].shift(1))))

hloc['breakout_triggered'] = hloc.apply(lambda row: trigger_check(row, 'breakout_high_trigger', 'breakout_low_trigger', 1), axis=1)
hloc['breakout_triggered'].isna().sum()

In [None]:
#################
# Mean reversion signals
#################

hloc['6_hr_ma'] = hloc['close'].rolling(ma_samples).mean()
# hloc['6_hr_ma_sig'] = np.where(hloc['close'].shift(1) > hloc['6_hr_ma'].shift(1), 1, -1)

hloc['mr_high_trigger'] = hloc['6_hr_ma'] * (1 + (mr_sigma * (hloc['c_to_c_vol'].shift(1))))
hloc['mr_low_trigger'] = hloc['6_hr_ma'] * (1 - (mr_sigma * (hloc['c_to_c_vol'].shift(1))))

hloc['mr_triggered'] = hloc.apply(lambda row: trigger_check(row, 'mr_high_trigger', 'mr_low_trigger', -1), axis=1)


In [None]:
# position
hloc_filtered = hloc
hloc_filtered['breakout_trade'] = hloc_filtered['breakout_triggered'].map(lambda x: 0 if x is None else x[2])
hloc_filtered['breakout_trade_price'] = hloc_filtered['breakout_triggered'].map(lambda x: 0 if x is None else x[1])
hloc_filtered['breakout_contra_trade_amount'] = -1 * hloc_filtered['breakout_trade'] * hloc_filtered['breakout_trade_price']

hloc_filtered['mr_trade'] = hloc_filtered['mr_triggered'].map(lambda x: 0 if x is None else x[2])
hloc_filtered['mr_trade_price'] = hloc_filtered['mr_triggered'].map(lambda x: 0 if x is None else x[1])
hloc_filtered['mr_contra_trade_amount'] = -1 * hloc_filtered['mr_trade'] * hloc_filtered['mr_trade_price']
hloc_filtered['position'] = hloc_filtered['breakout_trade'].cumsum() + hloc_filtered['mr_trade'].cumsum()

stdev = np.std(hloc_filtered['close'].diff())

In [None]:
# hedge trades
hloc_filtered['hedge_trade'] = 0.0
hloc_filtered['hedge_price'] = 0.0
hedge_trade_sum = 0.0
hloc_filtered['hedge_contra_amount'] = 0.0
current_position = 0.0
keep_position = True
i_prev = None
n = len(hloc_filtered)
 
for i in hloc_filtered.index:
    stay_long, stay_short = False, False
    current_position = hloc_filtered.loc[i]['position'] + hedge_trade_sum
    
    if(optimise_exit) :
        if current_position > 0 and (hloc_filtered.loc[i]['close'] > (hloc_filtered.loc[i_prev]['close'] + 2.5 * stdev)):
            stay_long = False
        if current_position < 0 and (hloc_filtered.loc[i]['close'] < (hloc_filtered.loc[i_prev]['close'] - 2.5 * stdev)):
            stay_short = False
        keep_position = stay_long or stay_short
    
    if i_prev and not keep_position and (np.absolute(current_position) >= hedge_level):
        hloc_filtered.loc[i,'hedge_trade'] = -1 * current_position
        hedge_trade_sum += -1 * current_position
        # TODO: Review
        hedge_price = hloc_filtered.asof(i + pd.Timedelta(milliseconds=interest_to_fill),
                                         subset=['close'])['close']
        hloc_filtered.loc[i,'hedge_price'] = hedge_price
        hloc_filtered.loc[i,'hedge_contra_amount'] = -1 * hloc_filtered.loc[i,'hedge_trade'] * \
                                                     hloc_filtered.loc[i,'hedge_price']
    i_prev = i

In [None]:
hloc_filtered['hedge_balance'] = hloc_filtered['hedge_trade'].cumsum()
hloc_filtered['overall_position'] = hloc_filtered['breakout_trade'].cumsum() + hloc_filtered['mr_trade'].cumsum() + hloc_filtered['hedge_trade'].cumsum()
hloc_filtered['overall_contra_position'] = hloc_filtered['breakout_contra_trade_amount'].cumsum() + hloc_filtered['mr_contra_trade_amount'].cumsum() + hloc_filtered['hedge_contra_amount'].cumsum()

In [None]:
# #pnl
hloc_filtered['pnl'] = np.where(hloc_filtered['overall_position'] == 0, hloc_filtered['overall_contra_position'], hloc_filtered['overall_position'] * hloc_filtered['close'] + hloc_filtered['overall_contra_position'])

In [None]:
# plot mr strategy returns
hloc_filtered['pnl'].resample("10s").last().plot()
plt.legend();

In [None]:
hloc_filtered['c_to_c_vol'].resample("1T").last().plot()
plt.legend();

In [None]:
hloc_filtered['overall_position'].resample("5s").last().plot()
plt.legend();

In [None]:
#volumes, pnl and yield
y = 0
breakout_volume = np.abs(hloc_filtered['breakout_trade']).sum()
mr_volume = np.abs(hloc_filtered['mr_trade']).sum()
hedge_volume = np.abs(hloc_filtered['hedge_trade']).sum()
total_volume = breakout_volume + mr_volume + hedge_volume
total_pnl = hloc_filtered['pnl'].tail(1)
y = total_pnl / total_volume 
display(breakout_volume, mr_volume, hedge_volume, total_volume, total_pnl, y)

In [None]:
hloc_filtered.index

In [None]:
nhbo_position = 0
maxPosition = 5

for i in hloc_filtered.index:
    trade = hloc_filtered.loc[i]['breakout_trade']
    if(trade != 0) :
        if abs(nhbo_position) < maxPosition:
#             display(abs(nhbo_position))
            hloc_filtered.loc[i, 'nhbo_trade'] = hloc_filtered['breakout_trade'].loc[i]
            hloc_filtered.loc[i, 'nhbo_trade_price'] = hloc_filtered['breakout_trade_price'].loc[i]
            hloc_filtered.loc[i, 'nhbo_contra_amount'] = hloc_filtered['breakout_contra_trade_amount'].loc[i]
            nhbo_position = nhbo_position + hloc_filtered['breakout_trade'].loc[i]
            
        else :
            hloc_filtered.loc[i, 'nhbo_trade'] = 0
        


In [None]:
hloc_filtered['nhbo_position'] = hloc_filtered['nhbo_trade'].cumsum()
hloc_filtered['nhbo_contra_position'] = hloc_filtered['nhbo_contra_amount'].cumsum()

hloc_filtered['nhbo_pnl'] = np.where(hloc_filtered['nhbo_position'] == 0, 
        hloc_filtered['nhbo_contra_position'], 
        hloc_filtered['nhbo_position'] * hloc_filtered['close'] + hloc_filtered['nhbo_contra_position'])




### Order Flow Imbalance

In [None]:

# OFI lookback 
ofi_lookback = 200
ofi_resample = "10ms"

def ofi(quotes,level):
    """Returns Order Flow Imbalance for given levels of the orderbook"""
    qdf = quotes.copy()
    bid_price_label = 'Bid' + str(level)
    offer_price_label = 'Offer' + str(level)
    bid_qty_label = 'Bid' +str(level) + 'Qty'
    offer_qty_label = 'Offer' + str(level)+'Qty'

    qdf['prev_bidprice'] = qdf[bid_price_label].shift()
    qdf['prev_bidsize'] = qdf[bid_qty_label].shift()
    qdf['prev_askprice'] = qdf[offer_price_label].shift()
    qdf['prev_asksize'] = qdf[offer_qty_label].shift()

    # Fix any missing/invalid data
    qdf.replace([np.inf, np.NINF], np.nan, inplace=True)
    qdf.fillna(method="ffill", inplace=True)
    qdf.fillna(method="bfill", inplace=True)
    
    bid_geq = qdf[bid_price_label] >= qdf['prev_bidprice']
    bid_leq = qdf[bid_price_label] <= qdf['prev_bidprice']
    ask_geq = qdf[offer_price_label] >= qdf['prev_askprice']
    ask_leq = qdf[offer_price_label] <= qdf['prev_askprice']
    
    qdf['ofi'] = np.zeros(len(qdf))
    qdf['ofi'].loc[bid_geq] += qdf[bid_qty_label].loc[bid_geq]
    qdf['ofi'].loc[bid_leq] -= qdf['prev_bidsize'].loc[bid_leq]
    qdf['ofi'].loc[ask_geq] += qdf['prev_asksize'].loc[ask_geq]
    qdf['ofi'].loc[ask_leq] -= qdf[offer_qty_label].loc[ask_leq]    
    return qdf['ofi']




In [None]:
df_all = signal_tick_data.resample(ofi_resample).last().ffill()


In [None]:
########################
# OFI using top x levels 
########################
df_all = signal_tick_data.resample(ofi_resample).last().ffill()
df_all = df_all

df_all["Offer0Qty"] = df_all["Offer0Qty"].astype('float')
df_all["Offer0"] = df_all["Offer0"].astype('float')
df_all["Bid0"] = df_all["Bid0"].astype('float')
df_all["Bid0Qty"] = df_all["Bid0Qty"].astype('float')

df_all["Offer1Qty"] = df_all["Offer1Qty"].astype('float')
df_all["Offer1"] = df_all["Offer1"].astype('float')
df_all["Bid1"] = df_all["Bid1"].astype('float')
df_all["Bid1Qty"] = df_all["Bid1Qty"].astype('float')

df_all["Offer2Qty"] = df_all["Offer2Qty"].astype('float')
df_all["Offer2"] = df_all["Offer2"].astype('float')
df_all["Bid2"] = df_all["Bid2"].astype('float')
df_all["Bid2Qty"] = df_all["Bid2Qty"].astype('float')

df_all["Offer3Qty"] = df_all["Offer3Qty"].astype('float')
df_all["Offer3"] = df_all["Offer3"].astype('float')
df_all["Bid3"] = df_all["Bid3"].astype('float')
df_all["Bid3Qty"] = df_all["Bid3Qty"].astype('float')

df_all["Offer4Qty"] = df_all["Offer4Qty"].astype('float')
df_all["Offer4"] = df_all["Offer4"].astype('float')
df_all["Bid4"] = df_all["Bid4"].astype('float')
df_all["Bid4Qty"] = df_all["Bid4Qty"].astype('float')

In [None]:
# OFI with levels 1, 2 and 3 (works better than 0,1,2)
# 100 period MA works well

df_all = df_all #.resample(ofi_resample).last().ffill()
df_all['ofi'] = ofi(df_all,1) + ofi(df_all,2) + ofi(df_all,3) 

df_all["ofi_rolling"] = df_all['ofi'].rolling(ofi_lookback).mean()
df_all['ofi_signal'] = np.where(df_all['ofi_rolling'] > 0, 1, -1)
df_all['mid'] = ((df_all['Bid0'] + df_all['Offer0']) / 2.0)
df_all['mid_change'] = ((df_all['Bid0'] + df_all['Offer0']) / 2.0).pct_change()


# shift the signal
df_all['ofi_signal'] = df_all['ofi_signal'].shift(1)
df_all['ofi_pnl'] = (df_all['ofi_signal'] * df_all['mid_change'])

print("Cumulative PnL " + str(df_all['ofi_pnl'].cumsum().iloc[-1]))



In [None]:
df_all["ofi_threshold_signal_long"] = np.where(df_all['ofi_rolling'] >= 5000, 1, 0)
df_all["ofi_threshold_signal_short"] = np.where(df_all['ofi_rolling'] <= -5000, -1, 0)
df_all["ofi_threshold_signal"] = df_all["ofi_threshold_signal_long"] + df_all["ofi_threshold_signal_short"]

# shift the signal
df_all['ofi_threshold_signal'] = df_all['ofi_threshold_signal'].shift(1)
df_all['ofi_threshold_pnl'] = (df_all['ofi_threshold_signal'] * df_all['mid_change'])

print("Cumulative PnL " + str(df_all['ofi_threshold_pnl'].cumsum().iloc[-1]))

In [None]:
df_all['ofi_pnl'].cumsum().resample("1T").last().plot()
plt.legend();

In [None]:
df_all['ofi'].describe()

### Skewness and kurtosis

In [None]:
# have a look at kurtosis
df_all["kurtosis"] = df_all["mid_change"].rolling(100).kurt()
df_all["skewness"] = df_all["mid_change"].rolling(200).skew()

In [None]:
df_all["kurtosis"].plot()
plt.legend();

In [None]:
df_all["kurtosis_signal"] = np.where(df_all["kurtosis"] > 50, 1, 0)



In [None]:
df_all["skewness_signal"] = np.where(df_all["skewness"] > 0, 1, -1)
df_all["skewness_signal"] = df_all["skewness_signal"] * df_all["kurtosis_signal"]
df_all["skewness_signal"] = df_all["skewness_signal"].shift(1)
df_all["skewness_pnl"] = df_all["skewness_signal"] * df_all["mid_change"]
df_all["skewness_pnl"].cumsum().resample("1t").last().plot()
plt.legend();

### Volume Weighted Mids

In [None]:
#######################
# Volume Weighted Mids
#######################

In [None]:
# CWM
cwm_lookback = 50

df_all['weighted_bid_notional_5'] =  df_all["Bid1Qty"] + df_all["Bid2Qty"] + df_all["Bid3Qty"] + df_all["Bid4Qty"]
df_all['weighted_offer_notional_5'] =  df_all["Offer1Qty"] + df_all["Offer2Qty"] + df_all["Offer3Qty"] + df_all["Offer4Qty"]
df_all['weighted_bid_5'] = (df_all["Bid1Qty"] * df_all["Bid1"] + df_all["Bid2Qty"] * df_all["Bid2"] + df_all["Bid3Qty"] * df_all["Bid3"] + df_all["Bid4Qty"] * df_all["Bid4"]) / df_all['weighted_bid_notional_5'] 
df_all['weighted_offer_5'] = ( df_all["Offer1Qty"] * df_all["Offer1"] + df_all["Offer2Qty"] * df_all["Offer2"] + df_all["Offer3Qty"] * df_all["Offer3"] + df_all["Offer4Qty"] * df_all["Offer4"]) / df_all['weighted_offer_notional_5'] 
df_all['conventionally_weighted_mid_5'] = (df_all['weighted_bid_5'] + df_all['weighted_offer_5']) / 2
df_all['cwm'] = df_all['conventionally_weighted_mid_5']
df_all['cwm_signal'] = np.where(df_all['cwm'] >= df_all['mid'], 1, -1)


# slow down to XTX pace 
df_all['cwm_signal_slow'] = df_all['cwm_signal'].rolling(cwm_lookback).mean()


# shift signal and calculate returns
df_all['cwm_signal'] = df_all['cwm_signal_slow'].shift(1)
df_all['cwm_pnl'] = df_all['cwm_signal'] * df_all['mid_change']
df_all['cwm_pnl'].cumsum().resample("1T").last().plot()
plt.legend();

In [None]:
#VWM - level 1 
df_all['vwm'] = (df_all["Bid1Qty"] * df_all["Offer1"] + df_all["Bid1"] * df_all["Offer1Qty"])  / (df_all["Bid1Qty"] + df_all["Offer1Qty"])
df_all['vwm_signal'] = np.where(df_all['vwm'] >= df_all['mid'], 1, -1)

df_all['vwm_signal'] = np.where(df_all['vwm'] > df_all['mid'], 1, -1)

# shift the signal
df_all['vwm_signal'] = df_all['vwm_signal'].shift(1)

# calculate returns
df_all['vwm_pnl'] = df_all['vwm_signal'] * df_all['mid_change']
df_all['vwm_pnl'].cumsum().resample("1T").last().plot()
plt.legend();


### Trend Following

In [None]:
####################
# Trend and Crossover
####################

trend_lookback = 2000

crossover_1_lookback = 100
crossover_2_lookback = 500

# simple trend following model for mid generation
df_all['trend_signal'] = np.where(df_all['mid'] > df_all['mid'].rolling(trend_lookback).mean(), 1, -1)
df_all['trend_signal'] = df_all['trend_signal'].shift(1)
df_all['trend_pnl'] = df_all['trend_signal'] * df_all['mid_change']

# ma crossover
df_all['crossover_signal'] = np.where(df_all['mid'].rolling(crossover_1_lookback).mean() > df_all['mid'].rolling(crossover_2_lookback).mean(), 1, -1)
df_all['crossover_signal'] = df_all['crossover_signal'].shift(1)
df_all['crossover_pnl'] = df_all['crossover_signal'] * df_all['mid_change']


In [None]:
df_all[['crossover_pnl', 'trend_pnl']].between_time('00:00', '22:00').cumsum().resample("1T").last().plot()
plt.legend();

### Create 10ms breakout signal and combine with OFI

In [None]:
breakout_signal = hloc_filtered['overall_position'].resample(ofi_resample).last().ffill()
breakout_signal = np.sign(breakout_signal)

In [None]:
df_all["breakout_signal"] = breakout_signal

In [None]:
df_all["breakout_pnl"] = df_all["breakout_signal"] * df_all['mid_change']

df_all["breakout_pnl"].cumsum().resample("1T").last().plot()
plt.legend();

In [None]:
df_all['combined_signal'] = np.sign(df_all['ofi_signal'] + df_all['trend_signal'] + df_all['cwm_signal'] + df_all["skewness_signal"] + df_all["breakout_signal"])
df_all['combined_pnl'] = df_all['mid_change'] * df_all['combined_signal']
df_all['combined_pnl'].between_time('07:00', '19:00').cumsum().resample("1T").last().plot()
plt.legend();

In [None]:
breakout_resample = df_all[["breakout_signal", "breakout_pnl"]].between_time('07:00', '19:00')

In [None]:
breakout_resample.head()

### Adapt signal - Regime Switching 

In [None]:
# set adaptive lookback period
adapt_rolling = 10000

df_all['ofi_pnl_ma'] = df_all['ofi_pnl'].rolling(adapt_rolling).mean()
df_all['trend_pnl_ma'] = df_all['trend_pnl'].rolling(adapt_rolling).mean()
df_all['cwm_pnl_ma'] = df_all['cwm_pnl'].rolling(adapt_rolling).mean()
df_all['breakout_pnl_ma'] = df_all['breakout_pnl'].rolling(adapt_rolling).mean()


In [None]:
df_all['combined_pnl_ma'] = (df_all['ofi_pnl_ma'] + df_all['trend_pnl_ma'] + df_all['breakout_pnl_ma']) / 3

In [None]:
df_all['ofi_weight'] = np.where(df_all['ofi_pnl_ma'] > 0, 1, 0)
df_all['ofi_weight'] = df_all['ofi_weight'].shift(1)

In [None]:
df_all['trend_weight'] = np.where(df_all['trend_pnl_ma'] > 0, 1, 0)
df_all['trend_weight'] = df_all['trend_weight'].shift(1)

In [None]:
df_all['cwm_weight'] = np.where(df_all['cwm_pnl_ma'] > 0, 1, 0)
df_all['cwm_weight'] = df_all['cwm_weight'].shift(1)

In [None]:
df_all['breakout_weight'] = np.where(df_all['breakout_pnl_ma'] > 0, 1, 0)
df_all['breakout_weight'] = df_all['breakout_weight'].shift(1)

In [None]:
# two versions of the adapt

# weighted by recent pnl
df_all['adapt_signal'] = np.sign(
    (df_all['cwm_weight'] * df_all['cwm_signal']) + 
    (df_all['trend_weight'] * df_all['trend_signal']) +
    (df_all['ofi_weight'] * df_all['ofi_signal'])
)

# unweighted 
# df_all['adapt_signal'] = np.sign(
#     df_all['breakout_signal'] +
#     df_all['trend_signal'] +
#     df_all['ofi_signal']
# )

In [None]:
df_all['adapt_pnl'] = df_all['mid_change'] * df_all['adapt_signal']

In [None]:
df_all['adapt_pnl'].cumsum().resample("1T").last().plot()
plt.legend();

## AND OR signals
#### need to ammend this so it triggers when the breakout is +1 and the other is +1, but then keep the position for as long as breakout is +1. 

In [None]:
df_all["AND_1_signal_long"] = np.where(df_all["breakout_signal"] + df_all["trend_signal"] + df_all["ofi_signal"] == 3, 1, 0)
df_all["AND_1_signal_short"] = np.where(df_all["breakout_signal"] + df_all["trend_signal"] + df_all["ofi_signal"] == -3, -1, 0)

In [None]:
df_all["AND_1_pnl_long"] = df_all["AND_1_signal_long"] * df_all["mid_change"]
df_all["AND_1_pnl_short"] = df_all["AND_1_signal_short"] * df_all["mid_change"]
df_all["AND_1_pnl_total"] = df_all["AND_1_pnl_long"] + df_all["AND_1_pnl_short"]

In [None]:
df_all['AND_1_pnl_total'].cumsum().resample("1T").last().plot()
plt.legend();

In [None]:
# OR 
df_all["OR_1_signal_long"] = np.where(df_all["breakout_signal"] + df_all["trend_signal"] >= 1, 1, 0)
df_all["OR_1_signal_short"] = np.where(df_all["breakout_signal"] + df_all["trend_signal"] <= -1, -1, 0)
df_all["OR_1_pnl_long"] = df_all["OR_1_signal_long"] * df_all["mid_change"]
df_all["OR_1_pnl_short"] = df_all["OR_1_signal_short"] * df_all["mid_change"]
df_all["OR_1_pnl_total"] = df_all["OR_1_pnl_long"] + df_all["OR_1_pnl_short"]

df_all['OR_1_pnl_total'].cumsum().resample("1T").last().plot()
plt.legend();

### Pnl Per Trade and Holding Period

In [None]:
###############
# Counts the number of ticks between changes in the signal
# Calculates pnl per trade 
###############

# these are the df and column for the signals 
signal_df = breakout_resample
signal_column = 'breakout_signal'

# the df and column for the pnl
pnl_column = df_all['breakout_pnl']


def SignalPersisenceFast(df,column_name): 
    array= df[column_name].values
    previous_signal  = False 
    Counter = 0
    Times = []
    for x in range(len(array)):
        if((array[x] == previous_signal or Counter == 0) and array[x] != 0):
            Counter = Counter + 1
        else:
            Times.append(Counter)
            if array[x] != 0 : 
                Counter =  1
        previous_signal = array[x]
    return Times

Times = SignalPersisenceFast(signal_df,signal_column)
number_trades = (df_all["s"].count() / np.mean(Times))
pnl_per_trade = pnl_column.sum() / number_trades

# prints out the results
# print("name of strategy: "f'{signal_column}')
# print()
# print("**configs**")
# print("adapt rolling: "f'{adapt_rolling:d}')
# print("trend lookback: "f'{trend_lookback:.0f}')
# print("cwm lookback: "f'{cwm_lookback:.0f}')
# print("ofi rolling: "f'{ofi_lookback:.0f}')
# print("ofi resample ms: "f'{ofi_resample}')
# print()

print("**time in signal/trade**")
print("Mean units of time in trade "f'{(np.mean(Times)):.2f}')
print("25 percentile time in trade " + str(np.percentile(Times, 25, axis=0)))
print("Median time in trade " + str(np.median(Times)))
print("75 percentile time in trade " + str(np.percentile(Times, 75, axis=0)))
print()

print("**performance stats**")
print("Number of signals/trades: "f'{number_trades:.0f}')
print("Cumulative PnL %: "f'{(pnl_column.sum() * 100):.2f}')
print("Average Trade PnL $ per million "f'{(pnl_per_trade*1000000):.2f}')

In [None]:
name of strategy: xtx_signal

**configs**
adapt rolling: 10000
trend lookback: 2000
cwm lookback: 50
ofi rolling: 20
ofi resample ms: 10ms

**time in signal/trade**
Mean units of time in trade 115.23
25 percentile time in trade 4.0
Median time in trade 15.0
75 percentile time in trade 103.0

**performance stats**
Number of signals/trades: 18745
Cumulative PnL %: 12.75
Average Trade PnL $ per million 6.80

### Test our mid

In [None]:
# test our mid 
inst_price_london = our_price.resample(ofi_resample).last().ffill()
df_all["inst_ldn_bid"] = inst_price_london["Bid0"]
df_all["inst_ldn_offer"] = inst_price_london["Offer0"]
df_all["inst_ldn_mid"] = (df_all["inst_ldn_bid"] + df_all["inst_ldn_offer"]) / 2
df_all["inst_ldn_signal"] = np.where(df_all["inst_ldn_mid"] > df_all["mid"], 1, -1)
df_all["inst_ldn_signal"] = df_all["inst_ldn_signal"].shift(1)
df_all["inst_ldn_pnl"] = df_all["inst_ldn_signal"] * df_all["mid_change"]
df_all["inst_ldn_pnl"].cumsum().resample("1T").last().plot()
plt.legend();

In [None]:
# test XTX mids
xtx_prices = xtx_prices.resample(ofi_resample).last().ffill()
df_all["xtx_bid"] = xtx_prices["Bid0"]
df_all["xtx_offer"] = xtx_prices["Offer0"]
df_all["xtx_mid"] = (df_all["xtx_bid"] + df_all["xtx_offer"]) / 2
df_all["xtx_signal"] = np.where(df_all["xtx_mid"] > df_all["mid"], 1, -1)
df_all["xtx_signal"] = df_all["xtx_signal"].shift(1)
df_all["xtx_pnl"] = df_all["xtx_signal"] * df_all["mid_change"]
df_all[["xtx_pnl"]].cumsum().resample("1T").last().plot()
plt.legend();

In [None]:
# run correlations between XTX and ourselves
df_corr = df_all[["xtx_pnl", "adapt_pnl", "inst_ldn_pnl", "trend_pnl", "ofi_pnl", "cwm_pnl", "combined_pnl", "skewness_pnl"]]
print("XTX correlation with: "f'{df_corr.corr().iloc[:, 0]}')

In [None]:
df_corr = df_all[["inst_ldn_pnl", "xtx_pnl"]]
df_corr.corr()

In [None]:
# run a regression to investigate XTX prices
xtx_regression = df_corr

import statsmodels.api as sm
xtx_regression = xtx_regression.replace(np.NaN, 0)
X = xtx_regression[["trend_pnl", "ofi_pnl", "cwm_pnl"]]
y = xtx_regression['xtx_pnl']

est = sm.OLS(y, X).fit()
print(est.summary())

In [None]:
df_all[["skewness", "mid"]]["2020-11-18 12:00:00" : "2020-11-18 12:05:00"].plot(subplots=True)

In [None]:
df_all["ofi_sig_yield_1"] = df_all["breakout_signal"] * df_all["mid_change"].shift(-100000)
df_all["ofi_sig_yield_10"] = df_all["adapt_signal"] * df_all["mid_change"].shift(-100000)

In [None]:
ofi_sig_1_seconds_yield = df_all["ofi_sig_yield_1"].mean(axis=0)
ofi_sig_10_seconds_yield = df_all["ofi_sig_yield_10"].mean(axis=0)


In [None]:
ofi_sig_1_seconds_yield 

In [None]:
# old results

# XTX mid as a predictor 
**time in signal/trade**
Mean units of time in trade 115.23
25 percentile time in trade 4.0
Median time in trade 15.0
75 percentile time in trade 103.0

**performance stats**
Number of signals/trades: 18745
Cumulative PnL %: 12.75
Average Trade PnL $ per million 6.80

# our INST mid as a predictor 
**time in signal/trade**
Mean units of time in trade 69.44
25 percentile time in trade 3.0
Median time in trade 7.0
75 percentile time in trade 45.0

**performance stats**
Number of signals/trades: 31109
Cumulative PnL %: 4.14
Average Trade PnL $ per million 1.33

# models - slow
**time in signal/trade**
Mean units of time in trade 255.59
25 percentile time in trade 5.0
Median time in trade 47.0
75 percentile time in trade 360.0

**performance stats**
Number of signals/trades: 8452
Cumulative PnL %: 4.69
Average Trade PnL $ per million 5.55


# models - faster
**configs**
adapt rolling: 10000
trend lookback: 200
cwm lookback: 5
ofi rolling: 25
ofi resample ms: 10ms

**time in signal/trade**
Mean units of time in trade 36.76
25 percentile time in trade 7.0
Median time in trade 25.0
75 percentile time in trade 39.0

**performance stats**
Number of signals/trades: 58766
Cumulative PnL %: 9.78
Average Trade PnL $ per million 1.66

In [None]:
df_all[["xtx_signal", "adapt_signal"]]["2020-11-18 12:00:00" : "2020-11-18 12:05:00"].plot(subplots=True)

In [None]:
df_all[["xtx_pnl, inst_ldn_pnl"]].cumsum().resample("1T").last().plot()
plt.legend();