Please use this link, it direct to this notebook only! Will simplify the replication!

https://colab.research.google.com/drive/11W242v0iD9L8klkXTFCN1-HM-vwX0_t2?usp=sharing



In [None]:
import pandas as pd
import numpy as np
import optuna
import matplotlib.pyplot as plt
from collections import Counter

**The following code is used to optimize and find the best weights for the signals given a particular time based threshold. We are separately optimizing the signal weights for the buy and sell side. Our time thresholds are manual inputs based on the analysis from the execution plots on how and when does the trade actually happens.**

**Signals**

In [None]:
df = pd.read_csv("enter csv file name here")
df['mid_price'] = (df['ask price'] + df['bid price']) / 2
df['momentum'] = df['mid_price'].diff(periods=50).fillna(0)
df['datetime'] = pd.to_datetime(df['time'], unit='s', origin='unix')
df.set_index('datetime', inplace=True)
df.sort_index(inplace=True)
df['minute'] = df.index.floor('min')
df['spread'] = df['ask price'] - df['bid price']
# Rolling quantile window changed to last 5 minutes
window = '5min'
df['spread_q10'] = df['spread'].rolling(window).quantile(0.1)
df['volume_imbalance'] = (df['bid volume'] - df['ask volume']) / (df['bid volume'] + df['ask volume'] + 1e-6)
df['vol_imb_q95'] = df['volume_imbalance'].rolling(window).quantile(0.95)
df["trade_intensity"] = df["order id"].rolling('15s').count().values
df['trade_intensity_q10'] = df['trade_intensity'].rolling(window).quantile(0.1)
df["volume_curve_30s"] = df["size"].rolling("30s").sum().fillna(0)
df["volume_curve_baseline"] = df["volume_curve_30s"].rolling("5min").mean()
df["volume_curve_deviation"] = df["volume_curve_30s"] - df["volume_curve_baseline"]
df["volume_curve_deviation_q20"] = df["volume_curve_deviation"].rolling(window).quantile(0.2)
df['vol_imb_q05'] = df['volume_imbalance'].rolling(window).quantile(0.05)
df['momentum_q80'] = df['momentum'].rolling(window).quantile(0.80)
df['momentum_q20'] = df['momentum'].rolling(window).quantile(0.20)
# Aggression Ratio: Assuming aggressive trades impact either ask_volume or bid_volume
# Changes in volume are aggressive if volume sharply decreases at bid/ask
df["delta_ask_vol"] = df["ask volume"].diff()
df["delta_bid_vol"] = df["bid volume"].diff()
# Define aggressive trade as negative volume diff (market order hitting limit order)
df["aggressive_trades"] = ((df["delta_ask_vol"] < 0).astype(int) + (df["delta_bid_vol"] < 0).astype(int))
# Rolling aggression ratio (number of aggressive ticks in recent ticks window)
df["aggression_ratio"] = df["aggressive_trades"].rolling('30s').mean()
df["aggression_ratio_q80"] = df["aggression_ratio"].rolling(window).quantile(0.8).fillna(0)

**Buy Weight Optimizer**

In [None]:
df['second'] = df.index.second
def assign_zone(second):
    if second < 15:
        return '0–15s'
    elif second < 30:
        return '15–30s'
    elif second < 45:
        return '30–45s'
    else:
        return '45–59s'

df['time_zone'] = df['second'].apply(assign_zone)

zone_thresholds = {
    '0–15s': add number here,
    '15–30s': add number here,
    '30–45s': add number here,
    '45–59s': add number here
}

all_minutes = sorted(df['minute'].unique())

def evaluate_buy_weights(w):
    exec_prices, twap_prices = [], []

    for current_minute in all_minutes:
        minute_data = df[df['minute'] == current_minute]
        if minute_data.empty:
            continue

        # TWAP price = first ask of the minute
        first_sec_data = minute_data[minute_data.index.second == 0]
        twap_price = first_sec_data.iloc[0]['ask price'] if not first_sec_data.empty else minute_data.iloc[0]['ask price']
        executed = False

        for _, row in minute_data.iterrows():
            if row.name.second > 59:
                continue

            current_zone = row['time_zone']
            threshold = zone_thresholds.get(current_zone, 3)

            # === BUY-SIDE SIGNALS ===
            signal_volume_imb = row['volume_imbalance'] > row['vol_imb_q95']
            signal_spread = row['spread'] < row['spread_q10']
            signal_momentum = row['momentum'] > row['momentum_q80']
            signal_trade_intensity = row['trade_intensity'] < row['trade_intensity_q10']
            signal_volume_curve_dev = row["volume_curve_deviation"] < row["volume_curve_deviation_q20"]
            signal_agg = row["aggression_ratio"] > row["aggression_ratio_q80"]

            signal_flags = [
                w[0] * signal_volume_imb,
                w[1] * signal_spread,
                w[2] * signal_momentum,
                w[3] * signal_trade_intensity,
                w[4] * signal_volume_curve_dev,
                w[5] * signal_agg
            ]

            if sum(signal_flags) > threshold:
                exec_price = row['ask price']
                executed = True
                break

        if not executed:
            exec_price = minute_data.iloc[-1]['ask price']

        exec_prices.append(exec_price)
        twap_prices.append(twap_price)

    exec_prices = np.array(exec_prices)
    twap_prices = np.array(twap_prices)
    return np.mean(twap_prices - exec_prices)  # Higher is better

def objective_buy(trial):
    weights = [trial.suggest_int(f'bw{i}', 0, 4) for i in range(6)]
    avg_improvement = evaluate_buy_weights(weights)
    return -avg_improvement  # Optuna minimizes

print("🔍 Optimizing BUY-side Weights...")
study_buy = optuna.create_study(direction='minimize')
study_buy.optimize(objective_buy, n_trials=200)

best_buy_weights = [study_buy.best_params[f'bw{i}'] for i in range(6)]
best_buy_score = -study_buy.best_value

print(f"\nBest BUY-side Weights: {best_buy_weights}")
print(f"Avg Price Improvement (TWAP - Exec): {best_buy_score:.6f}")

**Sell Weight Optimizer**

In [None]:
df['second'] = df.index.second
def assign_zone(second):
    if second < 15:
        return '0–15s'
    elif second < 30:
        return '15–30s'
    elif second < 45:
        return '30–45s'
    else:
        return '45–59s'

df['time_zone'] = df['second'].apply(assign_zone)

zone_thresholds = {
    '0–15s': add number here,
    '15–30s': add number here,
    '30–45s': add number here,
    '45–59s': add number here
}

all_minutes = sorted(df['minute'].unique())

def evaluate_sell_weights(w):
    exec_prices, twap_prices = [], []

    for current_minute in all_minutes:
        minute_data = df[df['minute'] == current_minute]
        if minute_data.empty:
            continue

        # TWAP price = first bid of the minute
        first_sec_data = minute_data[minute_data.index.second == 0]
        twap_price = first_sec_data.iloc[0]['bid price'] if not first_sec_data.empty else minute_data.iloc[0]['bid price']
        executed = False

        for _, row in minute_data.iterrows():
            if row.name.second > 59:
                continue

            current_zone = row['time_zone']
            threshold = zone_thresholds.get(current_zone, 3)

            # === SELL-SIDE SIGNALS ===
            signal_volume_imb = row['volume_imbalance'] < row['vol_imb_q05']
            signal_spread = row['spread'] < row['spread_q10']
            signal_momentum = row['momentum'] < row['momentum_q20']
            signal_trade_intensity = row['trade_intensity'] < row['trade_intensity_q10']
            signal_volume_curve_dev = row["volume_curve_deviation"] < row["volume_curve_deviation_q20"]
            signal_agg = row["aggression_ratio"] > row["aggression_ratio_q80"]

            signal_flags = [
                w[0] * signal_volume_imb,
                w[1] * signal_spread,
                w[2] * signal_momentum,
                w[3] * signal_trade_intensity,
                w[4] * signal_volume_curve_dev,
                w[5] * signal_agg
            ]

            if sum(signal_flags) > threshold:
                exec_price = row['bid price']
                executed = True
                break

        if not executed:
            exec_price = minute_data.iloc[-1]['bid price']

        exec_prices.append(exec_price)
        twap_prices.append(twap_price)

    exec_prices = np.array(exec_prices)
    twap_prices = np.array(twap_prices)
    return np.mean(exec_prices - twap_prices)  # Higher is better

def objective_sell(trial):
    weights = [trial.suggest_int(f'sw{i}', 0, 4) for i in range(6)]
    avg_improvement = evaluate_sell_weights(weights)
    return -avg_improvement

print("🔍 Optimizing SELL-side Weights...")
study_sell = optuna.create_study(direction='minimize')
study_sell.optimize(objective_sell, n_trials=200)

best_sell_weights = [study_sell.best_params[f'sw{i}'] for i in range(6)]
best_sell_score = -study_sell.best_value

print(f"\nBest SELL-side Weights: {best_sell_weights}")
print(f"Avg Price Improvement (Exec - TWAP): {best_sell_score:.6f}")
