/
microprice_calculator.py
305 lines (272 loc) · 14.3 KB
/
microprice_calculator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
import datetime
import os
from decimal import Decimal
from operator import itemgetter
import numpy as np
import pandas as pd
from scipy.linalg import block_diag
from hummingbot.strategy.script_strategy_base import ScriptStrategyBase
class MicropricePMM(ScriptStrategyBase):
# ! Configuration
trading_pair = "ETH-USDT"
exchange = "kucoin_paper_trade"
range_of_imbalance = 1 # ? Compute imbalance from [best bid/ask, +/- ticksize*range_of_imbalance)
# ! Microprice configuration
dt = 1
n_imb = 6 # ? Needs to be large enough to capture shape of imbalance adjustmnts without being too large to capture noise
# ! Advanced configuration variables
show_data = False # ? Controls whether current df is shown in status
path_to_data = './data' # ? Default file format './data/microprice_{trading_pair}_{exchange}_{date}.csv'
interval_to_write = 60
price_line_width = 60
precision = 4 # ? should be the length of the ticksize
data_size_min = 10000 # ? Seems to be the ideal value to get microprice adjustment values for other spreads
day_offset = 1 # ? How many days back to start looking for csv files to load data from
# ! Script variabes
columns = ['date', 'time', 'bid', 'bs', 'ask', 'as']
current_dataframe = pd.DataFrame(columns=columns)
time_to_write = 0
markets = {exchange: {trading_pair}}
g_star = None
recording_data = True
ticksize = None
n_spread = None
# ! System methods
def on_tick(self):
# Record data, dump data, update write timestamp
self.record_data()
if self.time_to_write < self.current_timestamp:
self.time_to_write = self.interval_to_write + self.current_timestamp
self.dump_data()
def format_status(self) -> str:
bid, ask = itemgetter('bid', 'ask')(self.get_bid_ask())
bar = '=' * self.price_line_width + '\n'
header = f'Trading pair: {self.trading_pair}\nExchange: {self.exchange}\n'
price_line = f'Adjusted Midprice: {self.compute_adjusted_midprice()}\n Midprice: {round((bid + ask) / 2, 8)}\n = {round(self.compute_adjusted_midprice() - ((bid + ask) / 2), 20)}\n\n{self.get_price_line()}\n'
imbalance_line = f'Imbalance: {self.compute_imbalance()}\n{self.get_imbalance_line()}\n'
data = f'Data path: {self.get_csv_path()}\n'
g_star = f'g_star:\n{self.g_star}' if self.g_star is not None else ''
return f"\n\n\n{bar}\n\n{header}\n{price_line}\n\n{imbalance_line}\nn_spread: {self.n_spread} {'tick' if self.n_spread == 1 else 'ticks'}\n\n\n{g_star}\n\n{data}\n\n{bar}\n\n\n"
# ! Data recording methods
# Records a new row to the dataframe every tick
# Every 'time_to_write' ticks, writes the dataframe to a csv file
def record_data(self):
# Fetch bid and ask data
bid, ask, bid_volume, ask_volume = itemgetter('bid', 'ask', 'bs', 'as')(self.get_bid_ask())
# Fetch date and time in seconds
date = datetime.datetime.now().strftime("%Y-%m-%d")
time = self.current_timestamp
data = [[date, time, bid, bid_volume, ask, ask_volume]]
self.current_dataframe = self.current_dataframe.append(pd.DataFrame(data, columns=self.columns), ignore_index=True)
return
def dump_data(self):
if len(self.current_dataframe) < 2 * self.range_of_imbalance:
return
# Dump data to csv file
csv_path = f'{self.path_to_data}/microprice_{self.trading_pair}_{self.exchange}_{datetime.datetime.now().strftime("%Y-%m-%d")}.csv'
try:
data = pd.read_csv(csv_path, index_col=[0])
except Exception as e:
self.logger().info(e)
self.logger().info(f'Creating new csv file at {csv_path}')
data = pd.DataFrame(columns=self.columns)
data = data.append(self.current_dataframe.iloc[:-self.range_of_imbalance], ignore_index=True)
data.to_csv(csv_path)
self.current_dataframe = self.current_dataframe.iloc[-self.range_of_imbalance:]
return
# ! Data methods
def get_csv_path(self):
# Get all files in self.path_to_data directory
files = os.listdir(self.path_to_data)
for i in files:
if i.startswith(f'microprice_{self.trading_pair}_{self.exchange}'):
len_data = len(pd.read_csv(f'{self.path_to_data}/{i}', index_col=[0]))
if len_data > self.data_size_min:
return f'{self.path_to_data}/{i}'
# Otherwise just return today's file
return f'{self.path_to_data}/microprice_{self.trading_pair}_{self.exchange}_{datetime.datetime.now().strftime("%Y-%m-%d")}.csv'
def get_bid_ask(self):
bids, asks = self.connectors[self.exchange].get_order_book(self.trading_pair).snapshot
# if size > 0, return average of range
best_ask = asks.iloc[0].price
ask_volume = asks.iloc[0].amount
best_bid = bids.iloc[0].price
bid_volume = bids.iloc[0].amount
return {'bid': best_bid, 'ask': best_ask, 'bs': bid_volume, 'as': ask_volume}
# ! Microprice methods
def compute_adjusted_midprice(self):
data = self.get_df()
if len(data) < self.data_size_min or self.current_dataframe.empty:
self.recording_data = True
return -1
if self.n_spread is None:
self.n_spread = self.compute_n_spread()
if self.g_star is None:
ticksize, g_star = self.compute_G_star(data)
self.g_star = g_star
self.ticksize = ticksize
# Compute adjusted midprice from G_star and mid
bid, ask = itemgetter('bid', 'ask')(self.get_bid_ask())
mid = (bid + ask) / 2
G_star = self.g_star
ticksize = self.ticksize
n_spread = self.n_spread
# ? Compute adjusted midprice
last_row = self.current_dataframe.iloc[-1]
imb = last_row['bs'].astype(float) / (last_row['bs'].astype(float) + last_row['as'].astype(float))
# Compute bucket of imbalance
imb_bucket = [abs(x - imb) for x in G_star.columns].index(min([abs(x - imb) for x in G_star.columns]))
# Compute and round spread index to nearest ticksize
spreads = G_star[G_star.columns[imb_bucket]].values
spread = last_row['ask'].astype(float) - last_row['bid'].astype(float)
# ? Generally we expect this value to be < self._n_spread so we log when it's > self._n_spread
spread_bucket = round(spread / ticksize) * ticksize // ticksize - 1
if spread_bucket >= n_spread:
spread_bucket = n_spread - 1
spread_bucket = int(spread_bucket)
# Compute adjusted midprice
adj_midprice = mid + spreads[spread_bucket]
return round(adj_midprice, self.precision * 2)
def compute_G_star(self, data):
n_spread = self.n_spread
T, ticksize = self.prep_data_sym(data, self.n_imb, self.dt, n_spread)
imb = np.linspace(0, 1, self.n_imb)
G1, B = self.estimate(T, n_spread, self.n_imb)
# Calculate G1 then B^6*G1
G2 = np.dot(B, G1) + G1
G3 = G2 + np.dot(np.dot(B, B), G1)
G4 = G3 + np.dot(np.dot(np.dot(B, B), B), G1)
G5 = G4 + np.dot(np.dot(np.dot(np.dot(B, B), B), B), G1)
G6 = G5 + np.dot(np.dot(np.dot(np.dot(np.dot(B, B), B), B), B), G1)
# Reorganize G6 into buckets
index = [str(i + 1) for i in range(0, n_spread)]
G_star = pd.DataFrame(G6.reshape(n_spread, self.n_imb), index=index, columns=imb)
return ticksize, G_star
def G_star_invalid(self, G_star, ticksize):
# Check if any values of G_star > ticksize/2
if np.any(G_star > ticksize / 2):
return True
# Check if any values of G_star < -ticksize/2
if np.any(G_star < -ticksize / 2):
return True
# Round middle values of G_star to self.precision and check if any values are 0
if np.any(np.round(G_star.iloc[int(self.n_imb / 2)], self.precision) == 0):
return True
return False
def estimate(self, T, n_spread, n_imb):
no_move = T[T['dM'] == 0]
no_move_counts = no_move.pivot_table(index=['next_imb_bucket'],
columns=['spread', 'imb_bucket'],
values='time',
fill_value=0,
aggfunc='count').unstack()
Q_counts = np.resize(np.array(no_move_counts[0:(n_imb * n_imb)]), (n_imb, n_imb))
# loop over all spreads and add block matrices
for i in range(1, n_spread):
Qi = np.resize(np.array(no_move_counts[(i * n_imb * n_imb):(i + 1) * (n_imb * n_imb)]), (n_imb, n_imb))
Q_counts = block_diag(Q_counts, Qi)
move_counts = T[(T['dM'] != 0)].pivot_table(index=['dM'],
columns=['spread', 'imb_bucket'],
values='time',
fill_value=0,
aggfunc='count').unstack()
R_counts = np.resize(np.array(move_counts), (n_imb * n_spread, 4))
T1 = np.concatenate((Q_counts, R_counts), axis=1).astype(float)
for i in range(0, n_imb * n_spread):
T1[i] = T1[i] / T1[i].sum()
Q = T1[:, 0:(n_imb * n_spread)]
R1 = T1[:, (n_imb * n_spread):]
K = np.array([-0.01, -0.005, 0.005, 0.01])
move_counts = T[(T['dM'] != 0)].pivot_table(index=['spread', 'imb_bucket'],
columns=['next_spread', 'next_imb_bucket'],
values='time',
fill_value=0,
aggfunc='count')
R2_counts = np.resize(np.array(move_counts), (n_imb * n_spread, n_imb * n_spread))
T2 = np.concatenate((Q_counts, R2_counts), axis=1).astype(float)
for i in range(0, n_imb * n_spread):
T2[i] = T2[i] / T2[i].sum()
R2 = T2[:, (n_imb * n_spread):]
G1 = np.dot(np.dot(np.linalg.inv(np.eye(n_imb * n_spread) - Q), R1), K)
B = np.dot(np.linalg.inv(np.eye(n_imb * n_spread) - Q), R2)
return G1, B
def compute_n_spread(self, T=None):
if not T:
T = self.get_df()
spread = T.ask - T.bid
spread_counts = spread.value_counts()
return len(spread_counts[spread_counts > self.data_size_min])
def prep_data_sym(self, T, n_imb, dt, n_spread):
spread = T.ask - T.bid
ticksize = np.round(min(spread.loc[spread > 0]) * 100) / 100
# T.spread=T.ask-T.bid
# adds the spread and mid prices
T['spread'] = np.round((T['ask'] - T['bid']) / ticksize) * ticksize
T['mid'] = (T['bid'] + T['ask']) / 2
# filter out spreads >= n_spread
T = T.loc[(T.spread <= n_spread * ticksize) & (T.spread > 0)]
T['imb'] = T['bs'] / (T['bs'] + T['as'])
# discretize imbalance into percentiles
T['imb_bucket'] = pd.qcut(T['imb'], n_imb, labels=False, duplicates='drop')
T['next_mid'] = T['mid'].shift(-dt)
# step ahead state variables
T['next_spread'] = T['spread'].shift(-dt)
T['next_time'] = T['time'].shift(-dt)
T['next_imb_bucket'] = T['imb_bucket'].shift(-dt)
# step ahead change in price
T['dM'] = np.round((T['next_mid'] - T['mid']) / ticksize * 2) * ticksize / 2
T = T.loc[(T.dM <= ticksize * 1.1) & (T.dM >= -ticksize * 1.1)]
# symetrize data
T2 = T.copy(deep=True)
T2['imb_bucket'] = n_imb - 1 - T2['imb_bucket']
T2['next_imb_bucket'] = n_imb - 1 - T2['next_imb_bucket']
T2['dM'] = -T2['dM']
T2['mid'] = -T2['mid']
T3 = pd.concat([T, T2])
T3.index = pd.RangeIndex(len(T3.index))
return T3, ticksize
def get_df(self):
csv_path = self.get_csv_path()
try:
df = pd.read_csv(csv_path, index_col=[0])
df = df.append(self.current_dataframe)
except Exception as e:
self.logger().info(e)
df = self.current_dataframe
df['time'] = df['time'].astype(float)
df['bid'] = df['bid'].astype(float)
df['ask'] = df['ask'].astype(float)
df['bs'] = df['bs'].astype(float)
df['as'] = df['as'].astype(float)
df['mid'] = (df['bid'] + df['ask']) / float(2)
df['imb'] = df['bs'] / (df['bs'] + df['as'])
return df
def compute_imbalance(self) -> Decimal:
if self.get_df().empty or self.current_dataframe.empty:
self.logger().info('No data to compute imbalance, recording data')
self.recording_data = True
return Decimal(-1)
bid_size = self.current_dataframe['bs'].sum()
ask_size = self.current_dataframe['as'].sum()
return round(Decimal(bid_size) / Decimal(bid_size + ask_size), self.precision * 2)
# ! Format status methods
def get_price_line(self) -> str:
# Get best bid and ask
bid, ask = itemgetter('bid', 'ask')(self.get_bid_ask())
# Mid price is center of line
price_line = int(self.price_line_width / 2) * '-' + '|' + int(self.price_line_width / 2) * '-'
# Add bid, adjusted midprice,
bid_offset = int(self.price_line_width / 2 - len(str(bid)) - (len(str(self.compute_adjusted_midprice())) / 2))
ask_offset = int(self.price_line_width / 2 - len(str(ask)) - (len(str(self.compute_adjusted_midprice())) / 2))
labels = str(bid) + bid_offset * ' ' + str(self.compute_adjusted_midprice()) + ask_offset * ' ' + str(ask) + '\n'
# Create microprice of size 'price_line_width' with ends best bid and ask
mid = (bid + ask) / 2
spread = ask - bid
microprice_adjustment = self.compute_adjusted_midprice() - mid + (spread / 2)
adjusted_midprice_i = int(microprice_adjustment / spread * self.price_line_width) + 1
price_line = price_line[:adjusted_midprice_i] + 'm' + price_line[adjusted_midprice_i:]
return labels + price_line
def get_imbalance_line(self) -> str:
imb_line = int(self.price_line_width / 2) * '-' + '|' + int(self.price_line_width / 2) * '-'
imb_line = imb_line[:int(self.compute_imbalance() * self.price_line_width)] + 'i' + imb_line[int(self.compute_imbalance() * self.price_line_width):]
return imb_line