In [1]:
#the classic arbitrage in forex is triangular arbitrage
#for instance, we wanna short GBP long NOK
#we can short GBP long PLN short PLN long NOK
#which may give us more NOK for some cases
#for this sort of arbitrage problem in high frequency trading
#we can use dijkstra/bellman-ford algo to get the path and optimal result
#however, algo trading is already a major player in forex market
#it is rare that we can find this sort of arbitrage opportunity
#if it exists, it would have been exploited long before we know
#unfortunately, i didnt find any datasets that left opportunities to arbitrage
import pandas as pd
import numpy as np

In [2]:
import os
os.getcwd()

'/home/jovyan'

In [3]:
#forex data can be downloaded in the data subfolder
# https://github.com/je-suis-tm/graph-theory/blob/master/data/forex.csv
df=pd.read_csv('forex.csv')

In [4]:
#details of graph adt can be found in the following link
# https://github.com/je-suis-tm/graph-theory/blob/master/BFS%20DFS%20on%20DCG.ipynb
class graph:
        def __init__(self):
            self.graph={}
            self.visited={}
    
        def append(self,vertexid,edge,weight):
            if vertexid not in self.graph.keys():          
                self.graph[vertexid]={}
                self.visited[vertexid]=0
            self.graph[vertexid][edge]=weight
            
        def reveal(self):
            return self.graph
        
        def vertex(self):
            return list(self.graph.keys())
    
        def edge(self,vertexid):
            return list(self.graph[vertexid].keys())
        
        def weight(self,vertexid,edge):
            return (self.graph[vertexid][edge])
        
        def size(self):
            return len(self.graph)
        
        def visit(self,vertexid):
            self.visited[vertexid]=1
            
        def go(self,vertexid):
            return self.visited[vertexid]
        
        def route(self):
            return self.visited

#details of dijkstra algo could be found in the following link
# https://github.com/je-suis-tm/graph-theory/blob/master/dijkstra%20shortest%20path.ipynb
def dijkstra(df,start,end):
    queue={}
    distance={}
    queue[start]=0
    pred={}
    
    for i in df.vertex():
        distance[i]=float('inf')
    distance[start]=0    
        
    while queue:
        temp=min(queue,key=queue.get)
        queue.pop(temp)
        for j in df.edge(temp):
            #there is a slight modification
            #as the graph may be incompleted
            #we should ignore keyerror 
            #we would see why in the session of building graph adt
            try:
                if distance[temp]+df.weight(temp,j)<distance[j]:
                    distance[j]=distance[temp]+df.weight(temp,j)
                    pred[j]=temp
                
                if df.go(j)==0 and j not in queue:
                    queue[j]=distance[j]
            except KeyError:
                pass
            
        df.visit(temp)
        
    
    k=end
    path=[]
    while pred:
        path.insert(0,k)
        if k==start:
            break
        k=pred[k]
     
    return distance[end],path
        

#bellman ford is somewhat similar to dijkstra
#it doesnt need a queue to do bfs travesal and keep track of where it has been
#it travels on all nodes in vertex list and their children nodes for numbers of nodes times
#it adds one more part to detect if there is a negative cycle
#it travels on all nodes in vertex list and their children nodes for once
#if there is negative weight, traversals cannot bring convergence
#we would have a very small distance caused by deduction after rounds of traversal
#when there is no negative cycle, it works just like dijkstra to return the optimal steps
#when there is, it reports the error
#with 3 layers of loops, we can tell bellman fords time complexity is much higher than dijkstra
def bellman_ford(df,start,end):
    
    distance={}
    pred={}

    for i in df.vertex():
        distance[i]=float('inf')
            
    distance[start]=0    
    
    for counter in range(1,len(df.vertex())-1):
        for i in df.vertex():
            for j in df.edge(i):
                try:
                    if distance[i]+df.weight(i,j)<distance[j]:
                        distance[j]=distance[i]+df.weight(i,j)
                        pred[j]=i
                
                except KeyError:
                    pass
    
    #
    for k in df.vertex():
        for l in df.edge(k):
            try:
                assert distance[k]+df.weight(k,l)>=distance[l],'negative cycle exists!'
            except KeyError:
                pass
    
    k=end
    path=[]
    while pred:
        path.insert(0,k)
        if k==start:
            break
        k=pred[k]
     
    return distance[end],path

In [5]:
df

Unnamed: 0,currency,ask,bid
0,AUDJPY,83.893,83.861
1,AUDUSD,0.7599,0.7596
2,CHFJPY,112.083,112.037
3,EURCHF,1.1604,1.1601
4,EURGBP,0.8809,0.8806
5,EURJPY,130.016,129.968
6,EURUSD,1.1778,1.1773
7,GBPCHF,1.3177,1.3172
8,GBPJPY,147.649,147.577
9,GBPUSD,1.3375,1.3367


In [6]:
g=graph()
#the tricky part of forex is its calculation
#for instance, assume we have EURGBP,GBPUSD
#to get EURUSD, we need to multiply EURGBP by GBPUSD
#however, dijkstra accepts plus only
#thus, we need a logarithm transformation
#cuz log(a)+log(b)=e**(a*b)
#however, some forex rate such as EURGBP is smaller than 1
#after logarithm transformation we get negative numbers(cuz e**0=1)
#therefore we use negative logarithm transformation
#the good thing about it is that we wanna get a forex rate larger than a direct one
#after negative logarithm transformation, we only need to get the smallest number
#which is consistent with dijkstra!


#there is an issue for dijkstra to really work in high frequency trading
#some forex rate such as EURJPY is larger than 100
#after logarithm transformation we get postive numbers
#in order to use dijkstra, we only add negative weights to the graph adt
for i in df.index:
    if np.log(df['bid'][i])<0:
        g.append(df['currency'][i][:3],df['currency'][i][3:],-np.log(df['bid'][i]))
    if np.log(1/df['bid'][i])<0:
        g.append(df['currency'][i][3:],df['currency'][i][:3],-np.log(1/df['bid'][i]))

In [7]:
#note that this graph adt is incomplete
g.reveal()

{'AUD': {'USD': 0.27496330004400615},
 'CHF': {'EUR': 0.14850620829922395, 'GBP': 0.2755082715200721},
 'EUR': {'GBP': 0.12715178566048355},
 'JPY': {'AUD': 4.429160666307817,
  'CHF': 4.718829173882046,
  'EUR': 4.867288266308598,
  'GBP': 4.994350073465887,
  'USD': 4.703892718905732},
 'USD': {'CHF': 0.01491061273575424,
  'EUR': 0.16322368110201413,
  'GBP': 0.29020388999529034}}

In [8]:
answer=dijkstra(g,'USD','EUR')

In [9]:
#we revert the answer back to non-logarithm forex rate
np.e**(-answer[0])

0.8494011721736175

In [10]:
#the shortest route is direct route USDEUR
answer[1]

['USD', 'EUR']

In [11]:
#which is consistent with inverse of EURUSD bid price
1/df['bid'][df['currency']=='EURUSD']

6    0.849401
Name: bid, dtype: float64

In [12]:
#if we try USD to JPY to EUR
#we have a larger number
#it proves that dijkstra gave us the optimal forex rate
a=float(df['bid'][df['currency']=='EURJPY'])
b=float(df['bid'][df['currency']=='USDJPY'])
b/a

0.8492552012803153

In [13]:
#lets try to build a complete graph adt and implement bellman ford algo
g=graph()
for i in df.index:
    g.append(df['currency'][i][:3],df['currency'][i][3:],-np.log(df['bid'][i]))
    g.append(df['currency'][i][3:],df['currency'][i][:3],-np.log(1/df['bid'][i]))

In [14]:
#the complete graph adt
g.reveal()

{'AUD': {'JPY': -4.429160666307817, 'USD': 0.27496330004400615},
 'CHF': {'EUR': 0.14850620829922395,
  'GBP': 0.2755082715200721,
  'JPY': -4.718829173882046,
  'USD': -0.01491061273575428},
 'EUR': {'CHF': -0.14850620829922395,
  'GBP': 0.12715178566048355,
  'JPY': -4.867288266308598,
  'USD': -0.16322368110201407},
 'GBP': {'CHF': -0.2755082715200721,
  'EUR': -0.1271517856604836,
  'JPY': -4.9943500734658866,
  'USD': -0.2902038899952903},
 'JPY': {'AUD': 4.429160666307817,
  'CHF': 4.718829173882046,
  'EUR': 4.867288266308598,
  'GBP': 4.994350073465887,
  'USD': 4.703892718905732},
 'USD': {'AUD': -0.27496330004400615,
  'CHF': 0.01491061273575424,
  'EUR': 0.16322368110201413,
  'GBP': 0.29020388999529034,
  'JPY': -4.703892718905732}}

In [15]:
#there is a negative cycle
#we got error message
bellman_ford(g,'USD','EUR')

AssertionError: negative cycle exists!