# Arbitrage Detection Bot/Service

In [28]:
import pandas as pd
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import yfinance as yf
import networkx as nx
import numpy as np
import random
import time

Scrape YahooFinance Currency exchange rate page for currencies to use as graph nodes and exchange rates between them, populate dictionary distance matrix with exchange rates

In [29]:
def extract():
    # URL of the Yahoo Finance currencies page
    url = "https://finance.yahoo.com/currencies/"

    # Set up Chrome options
    chrome_options = Options()
    chrome_options.add_argument("--headless")  # Ensure GUI is off
    chrome_options.add_argument("--no-sandbox")
    chrome_options.add_argument("--disable-dev-shm-usage")

    # Set up the webdriver
    driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=chrome_options)

    try:
        # Open the Yahoo Finance currencies page
        driver.get(url)

        # Explicit wait for the table to be present
        WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, "table tbody tr")))

        # Extract the table rows containing exchange rate data
        rows = driver.find_elements(By.CSS_SELECTOR, "table tbody tr")

        # Initialize dictionaries for currency names and last prices
        currencies = set()
        last_prices = {}

        # Iterate through each row to extract currency names and last prices
        for row in rows:
            # Extract currency names (split from pair, e.g., 'EURUSD=X')
            currency_pair = row.find_element(By.XPATH, ".//td[2]").text.strip()
            currency_A = currency_pair[:3].strip('/')
            currency_B = currency_pair[3:].strip('/')
            currencies.add(currency_A)
            currencies.add(currency_B)

            # Extract the "Last Price" column value directly using its index
            last_price_text = row.find_element(By.XPATH, ".//td[3]").text.strip()
            last_price = float(last_price_text.replace(',', ''))  # Convert to float

            # Store last prices for both currencies (assuming bidirectional conversion)
            last_prices[currency_A + "_" + currency_B] = last_price
            last_prices[currency_B + "_" + currency_A] = 1 / last_price  # Reciprocal for reverse conversion

    finally:
        # Close the driver
        driver.quit()
        
    return currencies, last_prices

Testing functionality

In [30]:
%%time

currencies, rates = extract()

CPU times: total: 141 ms
Wall time: 21.2 s


In [7]:
print("Currencies:")
for currency in currencies:
    print(currency)

print("\nRates:")    
for currency_pair, rate in rates.items():
    print(currency_pair, ":", rate)

Currencies:
HUF
MYR
CAD
SGD
PHP
JPY
RUB
NZD
IDR
CHF
HKD
MXN
INR
EUR
AUD
USD
GBP
CNY
SEK
THB
ZAR

Rates:
EUR_USD : 1.091
USD_EUR : 0.9165902841429882
USD_JPY : 157.848
JPY_USD : 0.0063352085550656325
GBP_USD : 1.299
USD_GBP : 0.7698229407236336
AUD_USD : 0.6786
USD_AUD : 1.4736221632773356
NZD_USD : 0.6121
USD_NZD : 1.6337199803953604
EUR_JPY : 172.212
JPY_EUR : 0.005806796274359511
GBP_JPY : 205.069
JPY_GBP : 0.004876407453101151
EUR_GBP : 0.8397
GBP_EUR : 1.1909015124449207
EUR_CAD : 1.4863
CAD_EUR : 0.672811680010765
EUR_SEK : 11.4848
SEK_EUR : 0.0870716076901644
EUR_CHF : 0.9751
CHF_EUR : 1.0255358424776946
EUR_HUF : 391.56
HUF_EUR : 0.0025538870160384105
USD_CNY : 7.2495
CNY_USD : 0.13794054762397406
USD_HKD : 7.8073
HKD_USD : 0.1280852535447594
USD_SGD : 1.3408
SGD_USD : 0.7458233890214797
USD_INR : 83.493
INR_USD : 0.011977051968428491
USD_MXN : 17.598
MXN_USD : 0.056824639163541316
USD_PHP : 58.401
PHP_USD : 0.017122994469272786
USD_IDR : 16134.0
IDR_USD : 6.198090987975704e-05


Using extracted node and graph weight values, create directed graph to be used with Bellman-Ford algorithm to detect arbitrage through negative cycle detection! CAVEAT => the weights must first be converted to their negative log form. This is the so as in the case of arbitrage, where $$w_1, w_2, w_3, ..., w_n$$ are the edge weights in the graph, this identity holds: $$w_1*w_2*w_3*...w_n>1$$ and we must transform it in a fashion where the sum on the left hand side ends up being less than 0, which is a negative weighted cycle. In order to do this we can first take the log of both sides and end up with: $$log(w_1)+log(w_2)+log(w_3)+...+log(w_n)>0$$ and finally, negating both sides flips the inequality sign and you end up with: $$(-log(w_1))+(-log(w_2))+(-log(w_3))+...+(-log(w_n))<0$$ Now, simply running a modified version of the Bellman-Ford Algorithm can 

In [31]:
def graph_init(rates):
    # Initialize a directed graph
    G = nx.DiGraph()

    # Add edges with weights to the graph
    for pair, rate in rates.items():
        currency_A, currency_B = pair.split('_')
        G.add_edge(currency_A, currency_B, weight=-np.log(rate))  # Use negative log of the rate
        
    return G

In [32]:
G = graph_init(rates)

Here is a look at the graph we have constructed using the currencies as nodes, their exchanges as edges, and their exchange rates as weights.

In [33]:
# Print the graph edges with weights for verification
print("Graph edges with weights:")
for edge in G.edges(data=True):
    print(edge)

Graph edges with weights:
('EUR', 'USD', {'weight': -0.08709470685093373})
('EUR', 'JPY', {'weight': -5.148726275976795})
('EUR', 'GBP', {'weight': 0.17471059379261952})
('EUR', 'CAD', {'weight': -0.3962898101727122})
('EUR', 'SEK', {'weight': -2.441024421971151})
('EUR', 'CHF', {'weight': 0.025215249141063763})
('EUR', 'HUF', {'weight': -5.97013876039323})
('USD', 'EUR', {'weight': 0.08709470685093369})
('USD', 'JPY', {'weight': -5.06163254466716})
('USD', 'GBP', {'weight': 0.2615947376884625})
('USD', 'AUD', {'weight': -0.38772342663200726})
('USD', 'NZD', {'weight': -0.490859611125113})
('USD', 'CNY', {'weight': -1.9809325009711114})
('USD', 'HKD', {'weight': -2.0550591934524984})
('USD', 'SGD', {'weight': -0.29326645074568153})
('USD', 'INR', {'weight': -4.424762796007354})
('USD', 'MXN', {'weight': -2.867785259223369})
('USD', 'PHP', {'weight': -4.067333012975251})
('USD', 'IDR', {'weight': -9.68868412549692})
('USD', 'THB', {'weight': -3.585184217775171})
('USD', 'MYR', {'weight'

In [36]:
def arbitrage_checker(G):
    # Initialize distances and predecessors
    distance = {vertex: float('inf') for vertex in G.nodes}
    predecessor = {vertex: None for vertex in G.nodes}
    
    # Randomly select the source vertex
    source = random.choice(list(G.nodes))
    distance[source] = 0
    
    # Relax edges |V| - 1 times
    for _ in range(len(G.nodes) - 1):
        for u, v, weight in G.edges(data='weight'):
            if distance[u] + weight < distance[v]:
                distance[v] = distance[u] + weight
                predecessor[v] = u

    # Check for negative weight cycles
    for u, v, weight in G.edges(data='weight'):
        if distance[u] + weight < distance[v]:
            # If a cycle is detected, trace back the cycle
            cycle = []
            current = v
            seen = set()
            while current not in seen:
                if current is None:
                    return None  # This means no cycle
                cycle.append(current)
                seen.add(current)
                current = predecessor[current]
            cycle_start = cycle.index(current)
            cycle = cycle[cycle_start:]  # Start the cycle at the first repetition
            cycle.reverse()  # Optional based on how you want to present the cycle
            return "Graph contains a negative weight cycle", cycle, source
    
    return distance, source

In [40]:
%%time

arbitrage_checker(G)

CPU times: total: 0 ns
Wall time: 0 ns


('Graph contains a negative weight cycle', ['NZD', 'USD'], 'MYR')