In [47]:
import os, sys
import numpy as np
import pandas as pd
from pathlib import Path
from datetime import datetime, timedelta
from typing import List, Dict

In [10]:
!pip install dolphindb

Collecting dolphindb
  Using cached dolphindb-1.30.17.2-cp38-cp38-win_amd64.whl (3.8 MB)
Installing collected packages: dolphindb
Successfully installed dolphindb-1.30.17.2


In [15]:
import dolphindb as ddb 
s = ddb.session()
s.connect("172.16.99.183", 38961, "", "")

True

In [18]:

q1 = 'trade_table = loadTable("dfs://tick_trade", "trades")'
q2 = 'depth_table = loadTable("dfs://tick_depth", "depths")'
s.run(q1)
s.run(q2)

In [242]:
start_date, end_date = "2022-05-01", "2022-05-24"

In [243]:
delay = 15

In [244]:
symbols = {
    "ETHUSDC.UNS": ["ETHUSDT.CUP"]
}

In [245]:
gas_fees = {
    "UNS": 30,
}

In [246]:
starts, ends = [], []
start = datetime.strptime(f"{start_date} 00:00:00", "%Y-%m-%d %H:%M:%S")
end = datetime.strptime(f"{end_date} 00:00:00", "%Y-%m-%d %H:%M:%S")
while start < end:
    starts.append(start.strftime("%Y.%m.%dT%H:%M:%S"))
    start += timedelta(days=1)
    ends.append(start.strftime("%Y.%m.%dT%H:%M:%S"))

In [247]:
def get_data_date(sym: str, hedge: str, side: str, delay: int, start: str, end: str, n_levels: int = 20) -> pd.DataFrame:
    t1  = "t1 = select timestamp, sum(volume) as volume, wavg(price, volume) as vwap from trade_table"
    t1 += " where timestamp>={}, timestamp<{}, symbol='{}', side={} group by timestamp order by timestamp".format(
        start, end, sym, side)

    price_type = 'a' if side == '1' else 'b'
    volume_type = price_type + 'v'

    t2  = "t2 = select timestamp, " + price_type + f", {price_type}".join(map(str, list(range(1, n_levels + 1))))
    t2 += ', '
    t2 += volume_type + f", {volume_type}".join(map(str, list(range(1, n_levels + 1))))
    t2 += " from depth_table where timestamp>={}, timestamp<{}, symbol='{}', {}1>0 order by timestamp".format(
        start, end, hedge, price_type)

    t3  = "select * from wj(t1, t2, {}s:{}s, <[".format(delay, delay + 1)
    for level in range(1, n_levels + 1):
        t3 += f"first({price_type}{level}) as {price_type}{level}, first({volume_type}{level}) as {volume_type}{level}, "

    t3  = t3[:-2] + "]>, `timestamp)"

    s.run(t1)
    s.run(t2)
    df = s.run(t3)

    return df


def get_data(sym: str, hedge: str, side: str, delay: int, starts: List, ends: List) -> pd.DataFrame:
    n = len(starts)
    if not n > 0: return pd.DataFrame()

    res = []
    for i in range(n):
        df_tmp = get_data_date(sym, hedge, side, delay, starts[i], ends[i])
        if not df_tmp.empty:
            res.append(df_tmp)

    if len(res) > 0:
        return pd.concat(res)

    return pd.DataFrame()

In [248]:
def buy_match(x, n_levels=20):
    res, pnl = 0, 0
    for i in range(1, n_levels + 1):
        if x["vwap"] < x[f"a{i}"]:
            break

        res += x[f"av{i}"] * x[f"a{i}"]
        pnl += x[f"av{i}"] * (x["vwap"] - x[f"a{i}"])

    if pnl < 30:
        return 0, 0

    pnl -= 30

    return res, pnl

def sell_match(x, n_levels=20):
    res, pnl = 0, 0
    for i in range(1, n_levels + 1):
        if x["vwap"] > x[f"b{i}"]:
            break

        res += x[f"bv{i}"] * x[f"b{i}"]
        pnl += x[f"bv{i}"] * (x[f"b{i}"] - x["vwap"])

    if pnl < 30:
        return 0, 0

    pnl -= 30

    return res, pnl


In [249]:
results = []
for delay in [10, 15, 20]:
    for sym, hedges in symbols.items():
        _, exch = sym.split('.')
        gas = gas_fees[exch]
        for hedge in hedges:
            df_L = get_data(sym, hedge, '1', delay, starts, ends)
            df_S = get_data(sym, hedge, '-1', delay, starts, ends)
            buys = df_L.apply(buy_match, axis=1)
            sells = df_S.apply(sell_match, axis=1)

            df_L["potential_match"] = buys.str[0]
            df_L["potential_pnl"] = buys.str[1]
            df_S["potential_match"] = sells.str[0]
            df_S["potential_pnl"] = sells.str[1]

            df_L["timestamp"] = pd.to_datetime(df_L["timestamp"])
            df_S["timestamp"] = pd.to_datetime(df_S["timestamp"])

            df_L.set_index("timestamp", inplace=True)
            df_S.set_index("timestamp", inplace=True)

            df = pd.DataFrame({
                "LongMatch": df_L["potential_match"].resample("1D").sum(),
                "LongPnL": df_L["potential_pnl"].resample("1D").sum(),
                "ShortMatch": df_S["potential_match"].resample("1D").sum(),
                "ShortPnL": df_S["potential_pnl"].resample("1D").sum(),
            })

            df["symbol"] = sym
            df["hedge"] = hedge
            df["delay"] = delay

            results.append(df)

df = pd.concat(results)

In [254]:
df.loc[df.delay == 20]

Unnamed: 0_level_0,LongMatch,LongPnL,ShortMatch,ShortPnL,symbol,hedge,delay
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2022-05-01,264706500.0,300416.33325,267032900.0,386769.5,ETHUSDC.UNS,ETHUSDT.CUP,20
2022-05-02,191056700.0,272224.369467,264629300.0,332843.3,ETHUSDC.UNS,ETHUSDT.CUP,20
2022-05-03,177668100.0,273271.357835,231017600.0,335210.8,ETHUSDC.UNS,ETHUSDT.CUP,20
2022-05-04,205304800.0,267728.102544,280469800.0,344289.1,ETHUSDC.UNS,ETHUSDT.CUP,20
2022-05-05,218897200.0,285020.595072,265356400.0,353671.6,ETHUSDC.UNS,ETHUSDT.CUP,20
2022-05-06,181902500.0,263215.22606,296097200.0,449654.2,ETHUSDC.UNS,ETHUSDT.CUP,20
2022-05-07,151028300.0,207049.835284,281411300.0,369393.3,ETHUSDC.UNS,ETHUSDT.CUP,20
2022-05-08,137895000.0,194004.111049,414875400.0,556866.8,ETHUSDC.UNS,ETHUSDT.CUP,20
2022-05-09,153772700.0,214559.143104,270506900.0,394002.5,ETHUSDC.UNS,ETHUSDT.CUP,20
2022-05-10,229700900.0,287478.227963,312016600.0,449665.3,ETHUSDC.UNS,ETHUSDT.CUP,20


In [228]:
df.to_csv("ETHUSDC.UNS.arb.csv", index=True)