In [18]:
import requests

import numpy as np
import pandas as pd

### Using TraderMade REST API for historical data

In [19]:
API_KEY = "api_key_here"

In [20]:
def request_pair(src:str, dst:str, start_date:str="2022-11-29 08:30", end_date:str="2022-11-29 09:00"):
    req = f"https://marketdata.tradermade.com/api/v1/tick_historical_sample/{src}{dst}/{start_date}/{end_date}?api_key={API_KEY}&format=json"
    resp = requests.get(req)
    return resp         

def parse_json(resp) -> pd.DataFrame:
    json_df = pd.DataFrame(resp.json())
    df = pd.DataFrame(json_df["quotes"].to_list())
    df[["from","to"]] = df["inst"].str.extract('(.{3,3})' * 2)
    df = df.drop(columns="inst")
    
    df["start_date"] = json_df["start_date"]
    df["end_date"] = json_df["end_date"]
    return df

def save_frame(df:pd.DataFrame, name:str) -> None:
    df.to_pickle(f"{name}.pickle")

def download(src, dst):
    try:
        resp = request_pair(src, dst)
        df = parse_json(resp)
        save_frame(df, f"{src}{dst}")
    except Exception as e:
        print("error")
        print(resp.json())

#download("GBP", "USD")

### Setup Data

In [21]:
pair_list = [ 
    ("BTC", "JPY"),
    ("BTC", "USD"),
    ("USD", "JPY"),
    ("ETH", "JPY"),
    ("ETH", "USD"),
    ("ETH", "BTC"),
]

nodes = list(set([x[0] for x in pair_list] + [x[1] for x in pair_list]))

dfs = []
for src, dst in pair_list:
    df = pd.read_pickle(f"{src}{dst}.pickle")
    dfs.append(df)

In [22]:
combined = pd.concat(dfs)
combined = combined.sort_values(by="time")

combined

Unnamed: 0,ask,bid,time,from,to,start_date,end_date
0,138.363,138.361,1669710600723,USD,JPY,2022-11-29 08:30,2022-11-29 09:00
1,138.363,138.360,1669710600816,USD,JPY,2022-11-29 08:30,2022-11-29 09:00
2,138.362,138.360,1669710601129,USD,JPY,2022-11-29 08:30,2022-11-29 09:00
0,2281130.000,2278454.000,1669710601136,BTC,JPY,2022-11-29 08:30,2022-11-29 09:00
1,2281130.000,2278454.000,1669710601683,BTC,JPY,2022-11-29 08:30,2022-11-29 09:00
...,...,...,...,...,...,...,...
6335,138.094,138.092,1669712399648,USD,JPY,2022-11-29 08:30,2022-11-29 09:00
6336,138.095,138.092,1669712399660,USD,JPY,2022-11-29 08:30,2022-11-29 09:00
6337,138.096,138.092,1669712399678,USD,JPY,2022-11-29 08:30,2022-11-29 09:00
4366,1214.850,1214.350,1669712399737,ETH,USD,2022-11-29 08:30,2022-11-29 09:00


In [23]:
## Bin ticks into timestamps, (the backtest will lose accuracy)
combined["bin time"] = pd.cut(combined["time"], int(combined.shape[0]/2))
combined_pt = combined.pivot_table(index="bin time", columns=["from", "to"], values=["bid", "ask"], aggfunc="mean")

### No binning done, (likely no arbs will be found)
#combined_pt = combined.pivot_table(index="time", columns=["from", "to"], values=["bid", "ask", "exch_rate_out", "exch_rate_in"], aggfunc="mean") # no timestamp bins

valid_periods = combined_pt[combined_pt.loc[:, (["ask","bid"], slice(None))].count(axis=1) > 1] # filter rows with <=1 nodes in graph, arb is obviously impossible 
valid_periods

Unnamed: 0_level_0,ask,ask,ask,ask,ask,ask,bid,bid,bid,bid,bid,bid
from,BTC,BTC,ETH,ETH,ETH,USD,BTC,BTC,ETH,ETH,ETH,USD
to,JPY,USD,BTC,JPY,USD,JPY,JPY,USD,BTC,JPY,USD,JPY
bin time,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3
"(1669710598923.949, 1669710600904.484]",,,,,,138.363000,,,,,,138.360500
"(1669710601085.968, 1669710601267.452]",2281130.0,,,,,138.362000,2278454.0,,,,,138.360000
"(1669710601630.42, 1669710601811.904]",2281130.0,,,,,,2278454.0,,,,,
"(1669710601811.904, 1669710601993.388]",2281724.0,,,,,138.363000,2278454.0,,,,,138.360000
"(1669710602537.84, 1669710602719.324]",,,,,,138.364000,,,,,,138.361000
...,...,...,...,...,...,...,...,...,...,...,...,...
"(1669712398685.096, 1669712398866.58]",,,,,1214.870,138.099000,,,,,1214.36,138.096000
"(1669712398866.58, 1669712399048.064]",,,,,1214.860,,,,,,1214.36,
"(1669712399229.548, 1669712399411.032]",2282299.0,,,,,138.096500,2278550.0,,,,,138.094500
"(1669712399411.032, 1669712399592.516]",2282300.0,,,,,138.096667,2278550.0,,,,,138.089333


### Bellman-Ford for cycle detection

Idea:
  - Model each tick as a graph of n nodes (assets), where each edge cost represents the current exchange rate
    - Ideally, we have a complete graph with nC2 edges, but there might be some lacking data
  - We define the cost of a path as multiplicative and not additive:
    - As such, we take the log(exchange rate) as our edge cost instead, since log(a) + log(b) = log(ab)
  - If there exists arbitrage in the market, then there exists a negative weighted cycle 
    - We use n passes of the Bellman-Ford algorithm to detect whether such a cycle exists
      - This takes O(VE) time, but since we have a (nearly) complete graph, it is O(V^3) time

In [24]:
class Graph:
    def __init__(self, nodes:list):
        '''
        Graph DS, implemented as EdgeList
        '''
        self.nodes = nodes
        self.edges = []

    def add_edge(self, src, dst, cost) -> None:
        self.edges.append((src, dst, -np.log(cost)))

    def bellman_ford(self):
        dist = {node: np.inf for node in self.nodes}
        dist[self.nodes[0]] = 0

        parent = {node: -1 for node in self.nodes}

        for _ in range(0, len(self.nodes)-1):
            for u, v, cost in self.edges:
                #print(u, v, cost)
                if dist[v] > dist[u] + cost:
                    dist[v] = dist[u] + cost
                    parent[v] = u
        
        # check neg cycle
        C = None
        for u,v,cost in self.edges:
            if dist[v] > dist[u] + cost:
                C = v
                
        if C != None:
            # cycle
            for _ in range(len(self.nodes)):
                if parent[C] == -1:
                    return False
                C = parent[C]
            cycle = []
            v = C
            while True:
                cycle.append(v)
                if (v == C and len(cycle) > 1):
                    break
                else:
                    v = parent[v]
            cycle.reverse()

            return cycle
        else:
            return False 

In [25]:
num_cycles = 0
for i, row in valid_periods.iterrows():
    G = Graph(nodes=nodes)
    
    ask = row["ask"]
    bid = row["bid"]
    for src, dst in ask.index:
        cost1 = bid[(src, dst)]
        cost2 = 1 / ask[(src, dst)]
        G.add_edge(src, dst, cost1)
        G.add_edge(dst, src, cost2)

    cycle = G.bellman_ford()
    if cycle:
        print(f"Negative weight cycle detected at bin time {i} | Cycle: {cycle}")
        prev = cycle[0]
        product = 1
        for curr in cycle[1:]:
            if (prev, curr) in bid.index:
                rate = bid[(prev, curr)]
                product *= rate
            else:
                rate = 1 / ask[(curr, prev)]
                product *= rate            
            prev = curr
        
        if product > 1:
            print(f"Return: {product}")
            num_cycles+=1
print(f"Num cycles detected = {num_cycles}")

Negative weight cycle detected at bin time (1669710710339.342, 1669710710520.827] | Cycle: ['BTC', 'ETH', 'USD', 'JPY', 'BTC']
Return: 1.000168807644518
Negative weight cycle detected at bin time (1669710737198.976, 1669710737380.46] | Cycle: ['USD', 'ETH', 'JPY', 'USD']
Return: 1.0003073736372181
Negative weight cycle detected at bin time (1669710756799.249, 1669710756980.734] | Cycle: ['USD', 'ETH', 'JPY', 'USD']
Return: 1.0000223364558953
Negative weight cycle detected at bin time (1669710764421.578, 1669710764603.062] | Cycle: ['USD', 'ETH', 'JPY', 'USD']
Return: 1.0003687617075838
Negative weight cycle detected at bin time (1669710781481.075, 1669710781662.559] | Cycle: ['USD', 'ETH', 'JPY', 'USD']
Return: 1.0020190699477451
Negative weight cycle detected at bin time (1669710806525.868, 1669710806707.352] | Cycle: ['USD', 'ETH', 'JPY', 'USD']
Return: 1.000057853728248
Negative weight cycle detected at bin time (1669710808340.708, 1669710808522.192] | Cycle: ['USD', 'ETH', 'JPY', '