In [37]:
import pandas as pd
import re
from datetime import datetime
import os

In [38]:
# Regex pattern to parse FIX messages
fix_pattern = r'\|([A-Za-z0-9]+)=([^\|]+)'

In [39]:
def parse_fix_logs(log_file):
    """Extract execution times and prices from FIX logs"""
    pattern = r'(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}),\d{3}.*\|35=8.*\|55=([A-Z\.]+)\|.*\|31=([\d\.]+)\|.*\|6007=([a-z_]+)'

    data = []
    with open(log_file) as f:
        for line in f:
            match = re.search(pattern, line)
            if match:
                timestamp = datetime.strptime(match.group(1), "%Y-%m-%d %H:%M:%S")
                data.append({
                    'symbol': match.group(2),
                    'price': float(match.group(3)),
                    'strategy': match.group(4),
                    'timestamp': timestamp
                })
    return pd.DataFrame(data)

In [40]:
print("Current directory:", os.getcwd())

Current directory: /Users/mcfeely/Projects/OrderBookHFT/Notebooks


In [41]:
market_maker = parse_fix_logs("/Users/mcfeely/Projects/OrderBookHFT/logs/fix_market_maker.log")
momentum = parse_fix_logs("/Users/mcfeely/Projects/OrderBookHFT/logs/fix_momentum.log")
my_strategy = parse_fix_logs("/Users/mcfeely/Projects/OrderBookHFT/logs/fix_my_strategy.log")
passive = parse_fix_logs("/Users/mcfeely/Projects/OrderBookHFT/logs/fix_passive_liquidity_provider.log")

In [42]:
all_trades = pd.concat([market_maker, momentum, my_strategy, passive])

In [43]:
def find_competitions(df):
    # Group by symbol and price
    grouped = df.groupby(['symbol', 'price'])

    competitions = []
    for (symbol, price), group in grouped:
        if len(group) > 1:  # Multiple strategies at same price
            # Filter trades within 1 second of each other
            group = group.sort_values('timestamp')
            time_diff = group['timestamp'].diff().dt.total_seconds().abs()
            cluster = (time_diff <= 1).cumsum()

            for _, cluster_group in group.groupby(cluster):
                if len(cluster_group) > 1:
                    competitions.append(cluster_group)

    return pd.concat(competitions) if competitions else pd.DataFrame()

competing_trades = find_competitions(all_trades)

In [44]:
def calculate_speed_metrics(competing_trades):
    results = []
    for (symbol, price), group in competing_trades.groupby(['symbol', 'price']):
        first_trade = group.nsmallest(1, 'timestamp')
        results.append({
            'symbol': symbol,
            'price': price,
            'fastest_strategy': first_trade['strategy'].values[0],
            'time_advantage_ms': (group['timestamp'].min() - group['timestamp'].max()).total_seconds() * 1000
        })
    return pd.DataFrame(results)

speed_results = calculate_speed_metrics(competing_trades)

In [36]:
print(speed_results)

     symbol       price            fastest_strategy  time_advantage_ms
0     AD.AS   37.218011                 my_strategy           -10000.0
1     AD.AS   37.255341  passive_liquidity_provider           -20000.0
2     AD.AS   37.292671                market_maker           -22000.0
3     AD.AS   37.367331                    momentum           -17000.0
4   ASML.AS  661.075224  passive_liquidity_provider           -20000.0
5   ASML.AS  661.737624  passive_liquidity_provider           -15000.0
6   INGA.AS   18.969815                    momentum           -10000.0
7   INGA.AS   18.988861                    momentum           -20000.0
8   INGA.AS   18.988899                market_maker            -5000.0
9   INGA.AS   19.007907  passive_liquidity_provider           -10000.0
10  INGA.AS   19.026953                 my_strategy           -33000.0
11  INGA.AS   19.065045                    momentum            -8000.0
12    OR.PA  377.792887                 my_strategy            -5000.0
13    