In [1]:
import numpy as np
from numba import njit

@njit
def CBS(row):
    """consolidated bid size"""
    col = row.copy() * -1
    col[col <= 0] = 0
    diff = np.abs(np.diff(col))
    return np.sum(diff) + col[0]
@njit
def CAS(row):
    """consolidated ask size"""
    col = row.copy()
    col[col <= 0] = 0
    diff = np.abs(np.diff(col))
    return np.sum(diff) + col[0]
@njit
def TCBS(cbs):
    """total consolidated bid size"""
    return np.sum(cbs)
@njit
def TCAS(cas):
    """total consolidated ask size"""
    return np.sum(cas)
@njit
def WBP(cbs):
    """weighted bid price"""
    tcbs = TCBS(cbs)
    wbp = 0
    for p, cbsi in enumerate(cbs):
        wbp += (p+1) * (cbsi / tcbs)
    return wbp
@njit
def WAP(cas):
    """weighted bid price"""
    tcas = TCAS(cas)
    wbp = 0
    for p, casi in enumerate(cas):
        wbp += (p+1) * (casi / tcas)
    return wbp
@njit
def AWS(row):
    """actvity weighted spread"""
    return WAP(row) - WBP(row)

def AP(tapes_segment):
    """average price"""
    pass

def AWR(tapes):
    """activity weighted return"""
    pass

@njit
def calculate_high_bid(sliced_arr):
    negative_indices = np.where(sliced_arr < 0, np.arange(sliced_arr.shape[1]), -1)
    max_negative_index = []
    for row in negative_indices:
        max_negative_index.append(max(row))
    max_negative_index.sort()
    high_bid = max_negative_index[len(max_negative_index) // 2] + 1 # get median
    return high_bid

@njit
def calculate_low_ask(sliced_arr):
    positive_indices = np.where(sliced_arr > 0, np.arange(sliced_arr.shape[1]), 800)
    min_positive_index = []
    for row in positive_indices:
        min_positive_index.append(min(row))
    min_positive_index.sort()
    low_ask =  min_positive_index[len(min_positive_index) // 2] + 1 # get median
    return low_ask

In [13]:
from numba import njit, prange
import os
from tools import read_csr, get_Tapes


#@njit(parallel=True)
def get_times_s(raw_lob: list[str]):
    n = len(raw_lob)
    times = np.zeros(n)

    for i in range(n):
        times[i] = float(raw_lob[i].split(",")[0][1:])
    
    return times

LOB_filename = os.listdir("Data\LOBs")
for filename in LOB_filename:
    with open(f"Data\LOBs\{filename}", 'r') as f:
        raw_lob = f.readlines()

    times = get_times_s(raw_lob)
    save_name = f"CSR_Data\TIM_LOB_{filename.split('L')[0].split('_')[2]}.npy"
    print(save_name)
    np.save(save_name, times)

CSR_Data\TIM_LOB_2025-01-02.npy
CSR_Data\TIM_LOB_2025-01-03.npy
CSR_Data\TIM_LOB_2025-01-06.npy
CSR_Data\TIM_LOB_2025-01-07.npy
CSR_Data\TIM_LOB_2025-01-08.npy
CSR_Data\TIM_LOB_2025-01-09.npy
CSR_Data\TIM_LOB_2025-01-10.npy
CSR_Data\TIM_LOB_2025-01-13.npy
CSR_Data\TIM_LOB_2025-01-14.npy
CSR_Data\TIM_LOB_2025-01-15.npy
CSR_Data\TIM_LOB_2025-01-16.npy
CSR_Data\TIM_LOB_2025-01-17.npy
CSR_Data\TIM_LOB_2025-01-20.npy
CSR_Data\TIM_LOB_2025-01-21.npy
CSR_Data\TIM_LOB_2025-01-22.npy
CSR_Data\TIM_LOB_2025-01-23.npy
CSR_Data\TIM_LOB_2025-01-24.npy
CSR_Data\TIM_LOB_2025-01-27.npy
CSR_Data\TIM_LOB_2025-01-28.npy
CSR_Data\TIM_LOB_2025-01-29.npy
CSR_Data\TIM_LOB_2025-01-30.npy
CSR_Data\TIM_LOB_2025-01-31.npy
CSR_Data\TIM_LOB_2025-02-03.npy
CSR_Data\TIM_LOB_2025-02-04.npy
CSR_Data\TIM_LOB_2025-02-05.npy
CSR_Data\TIM_LOB_2025-02-06.npy
CSR_Data\TIM_LOB_2025-02-07.npy
CSR_Data\TIM_LOB_2025-02-10.npy
CSR_Data\TIM_LOB_2025-02-11.npy
CSR_Data\TIM_LOB_2025-02-12.npy
CSR_Data\TIM_LOB_2025-02-13.npy
CSR_Data

In [22]:
import pandas as pd

Tapes_filenames = os.listdir("Data\Tapes")
for filename in Tapes_filenames:
    tapes = pd.read_csv(f"Data\Tapes\{filename}")
    np_tapes = tapes.to_numpy()
    print(filename)
    np.save(f"CSR_Data\TAP_{filename.split('ta')[0].split('_')[2]}.npy", np_tapes)

UoB_Set01_2025-01-02tapes.csv
UoB_Set01_2025-01-03tapes.csv
UoB_Set01_2025-01-06tapes.csv
UoB_Set01_2025-01-07tapes.csv
UoB_Set01_2025-01-08tapes.csv
UoB_Set01_2025-01-09tapes.csv
UoB_Set01_2025-01-10tapes.csv
UoB_Set01_2025-01-13tapes.csv
UoB_Set01_2025-01-14tapes.csv
UoB_Set01_2025-01-15tapes.csv
UoB_Set01_2025-01-16tapes.csv
UoB_Set01_2025-01-17tapes.csv
UoB_Set01_2025-01-20tapes.csv
UoB_Set01_2025-01-21tapes.csv
UoB_Set01_2025-01-22tapes.csv
UoB_Set01_2025-01-23tapes.csv
UoB_Set01_2025-01-24tapes.csv
UoB_Set01_2025-01-27tapes.csv
UoB_Set01_2025-01-28tapes.csv
UoB_Set01_2025-01-29tapes.csv
UoB_Set01_2025-01-30tapes.csv
UoB_Set01_2025-01-31tapes.csv
UoB_Set01_2025-02-03tapes.csv
UoB_Set01_2025-02-04tapes.csv
UoB_Set01_2025-02-05tapes.csv
UoB_Set01_2025-02-06tapes.csv
UoB_Set01_2025-02-07tapes.csv
UoB_Set01_2025-02-10tapes.csv
UoB_Set01_2025-02-11tapes.csv
UoB_Set01_2025-02-12tapes.csv
UoB_Set01_2025-02-13tapes.csv
UoB_Set01_2025-02-14tapes.csv
UoB_Set01_2025-02-17tapes.csv
UoB_Set01_