In [2]:
import pandas as pd
TIMESTAMP_ORDERBOOK_FILE = '1705074061033'
SIMULATION_START_TIMESTAMP_SECONDS = 1705072791
DF_COE_PATH = f'data/oracle/{TIMESTAMP_ORDERBOOK_FILE}_{SIMULATION_START_TIMESTAMP_SECONDS}_BI8.csv'
DF_ORDERBOOK_PATH = f'/home/davide/Desktop/phd/bitfinex-api-py/data/orderbook_changes/orderbook_changes_{TIMESTAMP_ORDERBOOK_FILE}.tsv'
COE_TRAINING_START_TIMESTAMP_SECONDS = SIMULATION_START_TIMESTAMP_SECONDS - 30*60


def get_orderbook_df(path: str) -> pd.DataFrame:
    return pd.read_csv(path, sep='\t')

def get_coe_df(path: str) -> pd.DataFrame:
    return pd.read_csv(path)

def get_processed_orderbook_df(
    df: pd.DataFrame, bi_level: int, training_start_timestamp_seconds: int
) -> pd.DataFrame:
    df['MidPrice'] = (df["AskPrice1"]+df["BidPrice1"])/2
    df['Return'] = (-df["MidPrice"]+df["MidPrice"].shift(-1)) / df["MidPrice"]

    pbid = df["BidPrice1"] - df[f"BidPrice{bi_level}"]
    pask = df[f"AskPrice{bi_level}"] - df["AskPrice1"]
    df["BaseImbalance"] = (pbid-pask)/(pbid+pask)

    df['OffsetTimestamp'] = (df['Timestamp'])/1000 - training_start_timestamp_seconds

    return df


In [3]:
orderbook_df = get_orderbook_df(DF_ORDERBOOK_PATH)
processed_orderbook_df = get_processed_orderbook_df(orderbook_df, 8, COE_TRAINING_START_TIMESTAMP_SECONDS)
coe_df = get_coe_df(DF_COE_PATH)

In [8]:
orderbook_df[orderbook_df['OffsetTimestamp'] < 1918].tail(10)

Unnamed: 0,AskPrice1,AskSize1,AskPrice2,AskSize2,AskPrice3,AskSize3,AskPrice4,AskSize4,AskPrice5,AskSize5,...,BidSize23,BidPrice24,BidSize24,BidPrice25,BidSize25,Timestamp,MidPrice,Return,BaseImbalance,OffsetTimestamp
12196,45011,0.355468,45016,0.006665,45017,0.02056,45019,0.018,45020,0.04452,...,0.355985,44944.0,0.0266,44942.0,6e-05,1705072907126,45010.5,3.3e-05,0.404255,1916.126
12197,45014,0.355446,45016,0.006665,45020,0.00256,45021,0.006625,45022,0.033852,...,0.114106,44945.0,0.355985,44944.0,0.0266,1705072907360,45012.0,0.0,0.5,1916.36
12198,45014,0.355446,45018,0.00256,45019,0.006665,45021,0.006625,45022,0.033852,...,0.355896,44954.0,0.00084,44952.0,7.5,1705072907611,45012.0,-2.2e-05,0.463415,1916.611
12199,45012,0.35546,45017,0.009225,45018,0.018,45020,0.018,45021,0.049705,...,0.0266,44956.0,0.355896,44953.0,0.00084,1705072907834,45011.0,0.0,0.315789,1916.834
12200,45012,0.35546,45017,0.009225,45018,0.018,45020,0.018,45021,0.049705,...,0.0266,44956.0,0.355896,44953.0,0.00084,1705072907835,45011.0,4.4e-05,0.315789,1916.835
12201,45016,0.3621,45018,0.018,45020,0.06324,45021,0.006625,45024,0.121424,...,0.00084,44956.0,0.355896,44953.0,0.286723,1705072908043,45013.0,0.0,0.435897,1917.043
12202,45016,0.006665,45020,0.04524,45021,0.006625,45024,0.121424,45025,0.2031,...,0.02744,44956.0,0.355896,44952.0,7.786723,1705072908255,45013.0,3.3e-05,0.4,1917.255
12203,45019,0.051425,45021,0.006625,45024,0.123984,45025,0.2031,45026,0.645632,...,6e-05,44958.0,0.0266,44957.0,0.00084,1705072908470,45014.5,-8.9e-05,0.473684,1917.47
12204,45011,0.391468,45015,0.627191,45017,0.2317,45018,0.36208,45020,0.00256,...,7.5,44951.0,0.286723,44947.0,6e-05,1705072908671,45010.5,0.0,0.409091,1917.671
12205,45011,0.605681,45015,0.355438,45017,0.23426,45018,0.434854,45021,0.117675,...,7.5,44950.0,0.286723,44946.0,6e-05,1705072908930,45010.5,0.0,0.404255,1917.93


In [12]:
cutted_orderbook_df = orderbook_df[orderbook_df['Return'] != 0]
cutted_orderbook_df = cutted_orderbook_df.dropna(subset=['Return', 'BaseImbalance'])

cutted_orderbook_df[cutted_orderbook_df['OffsetTimestamp'] > 1800].head(20)

Unnamed: 0,AskPrice1,AskSize1,AskPrice2,AskSize2,AskPrice3,AskSize3,AskPrice4,AskSize4,AskPrice5,AskSize5,...,BidSize23,BidPrice24,BidSize24,BidPrice25,BidSize25,Timestamp,MidPrice,Return,BaseImbalance,OffsetTimestamp
11507,44991,3.416188,44994,0.11112,44996,0.48482,44997,0.366579,44998,0.035495,...,0.318189,44950.0,0.355944,44949.0,0.395745,1705072791021,44990.5,-3.3e-05,0.0,1800.021
11508,44991,3.416188,44994,0.11112,44996,0.48482,44997,0.366579,44998,0.035495,...,,,,,,1705072791268,44989.0,-1.1e-05,0.117647,1800.268
11509,44990,0.00267,44991,3.167108,44994,0.11112,44996,0.35,44997,0.366579,...,0.034744,44941.0,0.687128,44940.0,0.001015,1705072791269,44988.5,-0.000256,0.310345,1800.269
11510,44990,0.00267,44991,3.167108,44996,0.35,44997,0.366579,44998,0.032825,...,,,,,,1705072791498,44977.0,-2.2e-05,-0.357143,1800.498
11511,44988,1.271063,44989,0.790304,44990,0.069884,44991,1.342555,44996,0.35,...,0.32701,44928.0,0.02445,44926.0,0.540691,1705072791499,44976.0,-0.000211,-0.142857,1800.499
11512,44969,0.214213,44986,0.23437,44987,0.768304,44988,0.46683,44989,0.075063,...,0.02445,44927.0,0.252869,44926.0,0.2317,1705072791722,44966.5,1.1e-05,-0.459459,1800.722
11514,44969,0.214213,44978,0.2317,44979,0.355722,44986,0.00267,44987,0.607977,...,0.610684,44928.0,0.02445,44927.0,0.019857,1705072791943,44967.0,-1.1e-05,-0.375,1800.943
11515,44969,0.214213,44978,0.2317,44979,0.355722,44987,0.607977,44988,0.419884,...,,,,,,1705072792165,44966.5,-3.3e-05,-0.208333,1801.165
11517,44979,0.355722,44987,0.096132,44988,0.419884,44989,0.022,44990,0.17076,...,,,,,,1705072792407,44965.0,-0.000222,-0.235294,1801.407
11518,44959,0.214213,44964,0.00267,44965,1.124642,44976,0.142829,44977,0.011,...,0.308346,44905.0,0.001,44904.0,0.002874,1705072792408,44955.0,-7.8e-05,-0.380952,1801.408


In [14]:
coe_df[coe_df['1'] > 1800].head(10)

Unnamed: 0,1,2,3,4
3171,1800.021,0.0,-3.3e-05,1800.021
3172,1801.165,-0.208333,-3.3e-05,1801.165
3173,1802.134,-0.5,-4.4e-05,1802.134
3174,1803.028,0.052632,7.8e-05,1803.028
3175,1804.007,0.16129,2.2e-05,1804.007
3176,1805.146,-0.317073,-2.2e-05,1805.146
3177,1806.076,-0.2,-1.1e-05,1806.076
3178,1807.453,-0.214286,-1.1e-05,1807.453
3179,1808.144,0.052632,1.1e-05,1808.144
3180,1809.083,0.030303,-3.3e-05,1809.083


# HAWKES CHECK

In [18]:
TIMESTAMP_ORDERBOOK_FILE = '1705930483445'
SIMULATION_START_TIMESTAMP_SECONDS = 1705928979
DF_COE_PATH = f'data/hawkes_20/{TIMESTAMP_ORDERBOOK_FILE}_{SIMULATION_START_TIMESTAMP_SECONDS}_BI8.csv'
DF_ORDERBOOK_PATH = f'/home/davide/Desktop/phd/bitfinex-api-py/data/orderbook_changes/orderbook_changes_{TIMESTAMP_ORDERBOOK_FILE}_interrupted.tsv'
COE_TRAINING_START_TIMESTAMP_SECONDS = SIMULATION_START_TIMESTAMP_SECONDS - 30*60


In [21]:
orderbook_df = get_orderbook_df(DF_ORDERBOOK_PATH)
processed_orderbook_df = get_processed_orderbook_df(orderbook_df, 8, COE_TRAINING_START_TIMESTAMP_SECONDS)
coe_df = get_coe_df(DF_COE_PATH)
coe_df['error'] = (coe_df['1'] - coe_df['4']).abs()

In [24]:
coe_df.tail(20)

Unnamed: 0,1,2,3,4,error
1670,1900.874,0.357143,0.000182,1901.047,0.173
1671,1901.369,0.35,0.000194,1901.515,0.146
1672,1902.45,0.272727,-1.2e-05,1902.423,0.027
1673,1903.233,0.217391,3.6e-05,1903.126,0.107
1674,1904.498,0.272727,-1.2e-05,1904.698,0.2
1675,1905.802,0.058824,7.3e-05,1907.515,1.713
1676,1918.916,-0.185185,2.4e-05,1914.889,4.027
1677,1926.201,-0.185185,2.4e-05,1914.889,11.312
1678,1909.231,0.363636,-6.1e-05,1909.225,0.006
1679,1909.513,0.3,2.4e-05,1909.463,0.05
