## Time between Futures Trades

Here we examine the distribution of times between trades on bitcoin futures.  That is, the time of the nth trade minus the time of the n-1th trade.

In [11]:
import pandas as pd
import numpy as np
import calendar
from datetime import datetime
import gc
import matplotlib.pyplot as plt
import mpld3

%reset Out

Once deleted, variables cannot be recovered. Proceed (y/[n])? y
Flushing output cache (0 entries)


In [12]:
def df_with_interstitial_time(ms_offset_file, curncy_file):
    print(f"parsing {ms_offset_file}")
    df1 = pd.read_csv(ms_offset_file, parse_dates=["time_offset"])
    df2 = pd.read_csv(curncy_file, parse_dates=["time"])
    df2["index"] = df2.index
    df2_trades = df2[(df2["type"] == "TRADE") & (df2["conditionCodes"] == "TSUM")]

    j = pd.merge(df2_trades, df1, how="left", on="index", validate="one_to_one")
    j = j.dropna(subset=['time_offset'])
    j["time_offset"] = pd.to_timedelta(j['time_offset'])
    j["time_ms"] = j["time"] + j["time_offset"]

    time_travelers_mask = check_time_travel(j)
    if np.where(time_travelers_mask.size)[0] > 5:
        print(f"lots of time travelers{np.where(time_travelers_mask.size)[0]}, time to come up with a better solution")
        return 0,0
    else:
        j = j.drop(j.loc[time_travelers_mask].index)
    
    j["trade_dt"] = j["time_ms"].diff()
    #idx = j["trade_dt"] > pd.Timedelta('0')
    #print(f"percent simultaneous trades: {100*(1-idx.sum() / idx.shape[0]):.1f}%")
    #j.loc[idx, "trade_dt"].describe()
    print(np.shape(j))
    return j

def interstitial_histo(df):
    idx = df["trade_dt"] > pd.Timedelta('0')
    dt_trd = df.loc[idx, "trade_dt"].dropna().dt.total_seconds() * 1e3
    val_bins, bins = np.histogram(dt_trd,bins=np.logspace(1.5,8,num=250))
    return val_bins, bins

def check_time_travel(df):
    # check for multiple trades within a time bucket that don't get an offset added
    # push forward last offset
    mask = df["time_offset"].apply(lambda x: isinstance(x,str))
    diff_mask = df["time_ms"].diff() < pd.Timedelta('0')
    print(f"travelers: {df.loc[diff_mask]}")
    #j.iloc[12260:12266,:]
    return diff_mask

In [3]:
# compute time between trades
futures = '''BTCH0,BTCJ0,BTCK0,BTCM0,BTCN0,BTCQ0,BTCU0,BTCV0,BTCX0'''.split(',')
futures_histo = {}

for f in futures:
    df = df_with_interstitial_time(f"../data_dumps/{f}_ms-offset.csv", f"../data_dumps/{f}_Curncy.csv")
    hist = interstitial_histo(df)
    futures_histo[f] = hist
    print()

parsing ../data_dumps/BTCH0_ms-offset.csv
travelers: Empty DataFrame
Columns: [time, type, value, size, conditionCodes, index, time_offset, time_ms]
Index: []

parsing ../data_dumps/BTCJ0_ms-offset.csv
travelers: Empty DataFrame
Columns: [time, type, value, size, conditionCodes, index, time_offset, time_ms]
Index: []

parsing ../data_dumps/BTCK0_ms-offset.csv
travelers: Empty DataFrame
Columns: [time, type, value, size, conditionCodes, index, time_offset, time_ms]
Index: []

parsing ../data_dumps/BTCM0_ms-offset.csv
travelers: Empty DataFrame
Columns: [time, type, value, size, conditionCodes, index, time_offset, time_ms]
Index: []

parsing ../data_dumps/BTCN0_ms-offset.csv
travelers: Empty DataFrame
Columns: [time, type, value, size, conditionCodes, index, time_offset, time_ms]
Index: []

parsing ../data_dumps/BTCQ0_ms-offset.csv
travelers: Empty DataFrame
Columns: [time, type, value, size, conditionCodes, index, time_offset, time_ms]
Index: []

parsing ../data_dumps/BTCU0_ms-offset.cs

In [8]:
%reset_selective -f df

In [9]:
Out

{}

In [None]:
#futures = '''BTCH0,BTCJ0,BTCK0,BTCM0,BTCN0,BTCQ0,BTCU0,BTCV0,BTCX0'''.split(',')
futures = '''BTCH0,BTCJ0,BTCK0,BTCM0'''.split(',')

dfs = []
for f in futures:
    df2 = df_with_interstitial_time(f"../data_dumps/{f}_ms-offset.csv", f"../data_dumps/{f}_Curncy.csv")
    dfs.append(df2)
df = pd.concat(dfs,axis=0)
    
#histotal = interstitial_histo(df)
df

parsing ../data_dumps/BTCH0_ms-offset.csv


In [20]:
# visualize the data with log scales
futures = '''BTCH0,BTCJ0,BTCK0,BTCM0,BTCN0,BTCQ0,BTCU0,BTCV0,BTCX0'''.split(',')
colors = ["#1f77b4", "#ff7f0e", "#2ca02c", "#d62728", "#9467bd", "#8c564b", "#e377c2", "#7f7f7f", "#bcbd22", "#17becf"]

#fig, ax = plt.subplots(figsize=(12,8))
fig, axs = plt.subplots(3, 3, sharex='all', figsize=(14,12))

for i in range(0,3):
    for j in range(0,3):
        h = futures.pop()
        h_data = futures_histo[h]
        val_bins = h_data[0]
        bins = h_data[1]
        axs[i,j].bar(x=bins[:-1], height=val_bins, width=bins[1:] - bins[:-1], label=h, color=colors.pop())

        axs[i,j].set_xscale('log')
        
        if i==2:
            axs[i,j].set_xlabel("log scale time between trades (ms)")
        
        if j==0:
            axs[i,j].set_ylabel("counts")
            #axs[i,j].set_title(f"time between trades {h} (ms)")

        if i == 0 and j == 0:
            axs[i,j].set_title("Time between trades on futures (ms)")
        
        axs[i,j].legend(loc="upper right")
        
        
mpld3.display(fig)
#plt.savefig("interstitial_time_multiple_futures.png")