In [71]:
%reset -f

import numpy as np
import numba as  nb
import pandas as pd

all = pd.read_parquet('tmp.parq').sort_values(by=['t'])
# df_plt = bid_ask.copy()[bid_ask.t > np.datetime64('2023-06-22 10:00:00')]
# m = 15
# fig = Figure(layout=go.Layout(height=500, margin={'t':m,'b':m,'l':m,'r':m}))
# fig.add_traces(Scatter(x=df_plt['t'], y=df_plt['ask'], name='ask', mode="markers", marker={'size':2},showlegend=False))
# fig.add_traces(Scatter(x=df_plt['t'], y=df_plt['bid'], name='bid', mode="markers", marker={'size':2},showlegend=False))
# fig.add_traces(Scatter(x=df_plt['t'], y=(df_plt['bid'] + df_plt['ask'])/2, name='mid', mode="markers", marker={'size':2, 'color':'black'},showlegend=False))
# fig.show()
subset = all[-100000:]

In [72]:
@nb.jit(nopython=True)
def insert(a:np.array, idx:np.int64, v:np.int64) -> np.array:
    N = len(a)
    M = N + 1

    if idx < 0:
        if np.abs(idx) > M:
            raise Exception(f"Insertion index out of range {idx}.")
    else:
        if np.abs(idx) > N:
            raise Exception(f"Insertion index out of range {idx}.")

    # standard python behaviour. allows insertions from rear    
    insertion_pt = idx % M

    tgt = np.full(M, v)
        
    for i in range (0, insertion_pt):
        tgt[i] = a[i]
    tgt[insertion_pt] = v
    for i in range (insertion_pt, N):
        tgt[i+1] = a[i]

    return tgt


In [82]:

@nb.jit(nopython=True)
def numba_binner(t:np.array, val:np.array, bin_start:np.array, bin_end:np.array, ) -> np.array:
    ## figure out some metrics about the data being binned
    n_ticks, n_bins = len(t), len(np.unique(bin_start))
    bin_size = bin_end[0] - bin_start[0]

    #
    # create target arrays
    #
    bin_start_index = np.full(n_bins, t[0])
    bin_end_index = np.full(n_bins, t[0])
    n_samples = np.full(n_bins, 0)
    o = np.full(n_bins, 0.0)
    h = np.full(n_bins, 0.0)
    l = np.full(n_bins, 0.0)
    c = np.full(n_bins, 0.0)
    twap = np.full(n_bins, 0.0)
    t0 = np.full(n_bins, t[0])
    t1 = np.full(n_bins, t[0])

    # do the binning. j is leading index, i is traling index, bi is the bin index
    i, j, bi = 0, 0, 0
    while j < n_ticks:
        while (j < n_ticks) & (bin_start[i] == bin_start[j]):
            j += 1

        # j has overshot the bin, so the bin index is on the prev bin
        bin_start_index[bi] = bin_start[j - 1]
        bin_end_index[bi]   = bin_end[j - 1]

        n_samples[bi] = j - i

        times_bin = t[i:j]
        ticks_bin = val[i:j]

        o[bi] = ticks_bin[0]
        h[bi] = np.max(ticks_bin)
        l[bi] = np.min(ticks_bin)
        c[bi] = ticks_bin[-1]
        t0[bi] = times_bin[0]
        t1[bi] = times_bin[-1]

        # #####
        # for some calcs, forward fill from prev bin and to end of bin
        # #####
        if bi > 0:
            #  if there is a prev bin and if the previous bin exactly preceding in time
            # forward fill the last tick to the bin boundary
            if bin_start_index[bi] - bin_start_index[bi - 1] == bin_size:
                times_bin = insert(times_bin, 0, bin_start_index[bi])
                ticks_bin = insert(ticks_bin, 0, val[i-1])

        # forward fill last tick to bin boundary
        times_bin = insert(times_bin, -1, bin_end_index[bi])

        dt = np.diff(times_bin)
        twap[bi] = np.sum(dt * ticks_bin)/np.sum(dt)

        bi += 1
        i = j
            
    return bin_start_index, bin_end_index, t0, t1, n_samples, o, h, l, c, twap

#pad_on_bin_bopundary
def bin_ticks(ticks:pd.DataFrame,
        bin_size_sec:int,
        time_col:str = "t",
        ) -> pd.DataFrame:

    bin_start = ticks[time_col].values.astype(f'datetime64[{bin_size_sec}s]').astype('datetime64[ns]')
    bin_end = (bin_start + np.timedelta64(bin_size_sec, 's')).astype(np.int64)
    bin_start  = bin_start.astype(np.int64)

    tt = ticks.t.values.astype(np.int64)
    mid = (ticks.bid.values + ticks.ask.values) / 2
    bin_start_index, bin_end_index, t0, t1, n_samples, o, h, l, c, twap =  \
        numba_binner(tt, mid, bin_start, bin_end)
    return pd.DataFrame({'bin_start':bin_start_index.astype('datetime64[ns]'),
                         'bin_end':bin_end_index.astype('datetime64[ns]'),
                         't0':t0.astype('datetime64[ns]'),
                         't1':t1.astype('datetime64[ns]'),
                         'n_samples':n_samples,
                         'o':o, 'h':h, 'l':l, 'c':c, 'twap':twap})

res = bin_ticks(subset, 60)
res

Unnamed: 0,bin_start,bin_end,t0,t1,n_samples,o,h,l,c,twap
0,2023-06-29 09:44:00,2023-06-29 09:45:00,2023-06-29 09:44:31.438,2023-06-29 09:44:59.725,21,1.158185,1.158275,1.158185,1.158235,1.158239
1,2023-06-29 09:45:00,2023-06-29 09:46:00,2023-06-29 09:45:00.558,2023-06-29 09:45:59.164,33,1.158235,1.158275,1.158215,1.158255,1.158244
2,2023-06-29 09:46:00,2023-06-29 09:47:00,2023-06-29 09:46:00.231,2023-06-29 09:46:59.514,33,1.158295,1.158295,1.158225,1.158255,1.158259
3,2023-06-29 09:47:00,2023-06-29 09:48:00,2023-06-29 09:47:02.347,2023-06-29 09:47:59.637,30,1.158265,1.158395,1.158225,1.158345,1.158289
4,2023-06-29 09:48:00,2023-06-29 09:49:00,2023-06-29 09:48:00.994,2023-06-29 09:48:57.027,36,1.158345,1.158415,1.158275,1.158415,1.158338
...,...,...,...,...,...,...,...,...,...,...
3179,2023-07-03 18:07:00,2023-07-03 18:08:00,2023-07-03 18:07:04.946,2023-07-03 18:07:26.835,4,1.162575,1.162585,1.162565,1.162585,1.162578
3180,2023-07-03 18:08:00,2023-07-03 18:09:00,2023-07-03 18:08:03.893,2023-07-03 18:08:58.570,13,1.162605,1.162605,1.162565,1.162575,1.162578
3181,2023-07-03 18:09:00,2023-07-03 18:10:00,2023-07-03 18:09:00.475,2023-07-03 18:09:53.762,16,1.162585,1.162595,1.162565,1.162565,1.162579
3182,2023-07-03 18:10:00,2023-07-03 18:11:00,2023-07-03 18:10:09.203,2023-07-03 18:10:58.372,12,1.162575,1.162625,1.162575,1.162625,1.162582


In [80]:
res = bin_ticks(all, 60)
res

Unnamed: 0,bin_start,bin_end,t0,t1,n_samples,o,h,l,c,twap
0,2023-06-06 19:22:00,2023-06-06 19:23:00,2023-06-06 19:22:24.585,2023-06-06 19:22:59.563,18,1.162185,1.162205,1.162185,1.162195,1.162195
1,2023-06-06 19:23:00,2023-06-06 19:24:00,2023-06-06 19:23:01.584,2023-06-06 19:23:20.241,7,1.162195,1.162205,1.162165,1.162165,1.162176
2,2023-06-06 19:27:00,2023-06-06 19:28:00,2023-06-06 19:27:35.485,2023-06-06 19:27:53.589,4,1.162165,1.162175,1.162165,1.162165,1.162166
3,2023-06-06 19:29:00,2023-06-06 19:30:00,2023-06-06 19:29:01.237,2023-06-06 19:29:05.443,3,1.162165,1.162185,1.162165,1.162185,1.162184
4,2023-06-06 20:27:00,2023-06-06 20:28:00,2023-06-06 20:27:22.878,2023-06-06 20:27:37.094,6,1.161995,1.162005,1.161995,1.161995,1.161996
...,...,...,...,...,...,...,...,...,...,...
17079,2023-07-03 18:07:00,2023-07-03 18:08:00,2023-07-03 18:07:04.946,2023-07-03 18:07:26.835,4,1.162575,1.162585,1.162565,1.162585,1.162578
17080,2023-07-03 18:08:00,2023-07-03 18:09:00,2023-07-03 18:08:03.893,2023-07-03 18:08:58.570,13,1.162605,1.162605,1.162565,1.162575,1.162578
17081,2023-07-03 18:09:00,2023-07-03 18:10:00,2023-07-03 18:09:00.475,2023-07-03 18:09:53.762,16,1.162585,1.162595,1.162565,1.162565,1.162579
17082,2023-07-03 18:10:00,2023-07-03 18:11:00,2023-07-03 18:10:09.203,2023-07-03 18:10:58.372,12,1.162575,1.162625,1.162575,1.162625,1.162582


In [81]:
res[res.l < 60]

Unnamed: 0,bin_start,bin_end,t0,t1,n_samples,o,h,l,c,twap
0,2023-06-06 19:22:00,2023-06-06 19:23:00,2023-06-06 19:22:24.585,2023-06-06 19:22:59.563,18,1.162185,1.162205,1.162185,1.162195,1.162195
1,2023-06-06 19:23:00,2023-06-06 19:24:00,2023-06-06 19:23:01.584,2023-06-06 19:23:20.241,7,1.162195,1.162205,1.162165,1.162165,1.162176
2,2023-06-06 19:27:00,2023-06-06 19:28:00,2023-06-06 19:27:35.485,2023-06-06 19:27:53.589,4,1.162165,1.162175,1.162165,1.162165,1.162166
3,2023-06-06 19:29:00,2023-06-06 19:30:00,2023-06-06 19:29:01.237,2023-06-06 19:29:05.443,3,1.162165,1.162185,1.162165,1.162185,1.162184
4,2023-06-06 20:27:00,2023-06-06 20:28:00,2023-06-06 20:27:22.878,2023-06-06 20:27:37.094,6,1.161995,1.162005,1.161995,1.161995,1.161996
...,...,...,...,...,...,...,...,...,...,...
17079,2023-07-03 18:07:00,2023-07-03 18:08:00,2023-07-03 18:07:04.946,2023-07-03 18:07:26.835,4,1.162575,1.162585,1.162565,1.162585,1.162578
17080,2023-07-03 18:08:00,2023-07-03 18:09:00,2023-07-03 18:08:03.893,2023-07-03 18:08:58.570,13,1.162605,1.162605,1.162565,1.162575,1.162578
17081,2023-07-03 18:09:00,2023-07-03 18:10:00,2023-07-03 18:09:00.475,2023-07-03 18:09:53.762,16,1.162585,1.162595,1.162565,1.162565,1.162579
17082,2023-07-03 18:10:00,2023-07-03 18:11:00,2023-07-03 18:10:09.203,2023-07-03 18:10:58.372,12,1.162575,1.162625,1.162575,1.162625,1.162582
