In [2]:
import pandas as pd
import numpy as np
from tqdm import tqdm
import warnings
import os
warnings.filterwarnings("ignore")

In [6]:
def process_csv_files(directory, ticker):
    """
    Convert input csv to dataframes and prepare to process through trading signal.

    directory (String): Name of directory that contains list of csv files.
    
    return (list[DataFrame]): List of pandas DataFrames, one for each trading day.
    """
    
    # Directory containing the CSV files
    bbo_cols=['bid_px_00', 'ask_px_00', 'bid_sz_00', 'ask_sz_00', 'bid_ct_00','ask_ct_00', 'symbol']

    ret = []
    
    # Loop through all files in the directory and convert csv to df
    for filename in os.listdir(directory):
        
        if filename.endswith('.csv'):
            filepath = os.path.join(directory, filename)
            try:
                print(f"Processing {filename}")
                df = pd.read_csv(filepath, parse_dates=['ts_recv', 'ts_event'])
                print(f"Processed {filename}\n")
            except Exception as e:
                print(f"Error processing {filename}\n")
                continue
                
            # Data cleaning and processing - resample to 100ms as per assignment description
            bbo_df = df[df['symbol']==ticker].set_index('ts_event').resample('100ms').last().ffill()
            ret.append(bbo_df)
            
    return ret
        

In [8]:
dfs = process_csv_files('equity-data', 'ANF')

Processing xnas-itch-20240826.mbp-1.csv
Processed xnas-itch-20240826.mbp-1.csv
Processing xnas-itch-20240807.mbp-1.csv
Processed xnas-itch-20240807.mbp-1.csv
Processing xnas-itch-20240813.mbp-1.csv
Processed xnas-itch-20240813.mbp-1.csv
Processing xnas-itch-20240801.mbp-1.csv
Processed xnas-itch-20240801.mbp-1.csv
Processing xnas-itch-20240815.mbp-1.csv
Processed xnas-itch-20240815.mbp-1.csv
Processing xnas-itch-20240820.mbp-1.csv
Processed xnas-itch-20240820.mbp-1.csv
Processing xnas-itch-20240827.mbp-1.csv
Processed xnas-itch-20240827.mbp-1.csv
Processing xnas-itch-20240812.mbp-1.csv
Processed xnas-itch-20240812.mbp-1.csv
Processing xnas-itch-20240806.mbp-1.csv
Processed xnas-itch-20240806.mbp-1.csv
Processing xnas-itch-20240819.mbp-1.csv
Processed xnas-itch-20240819.mbp-1.csv
Processing xnas-itch-20240814.mbp-1.csv
Processed xnas-itch-20240814.mbp-1.csv
Processing xnas-itch-20240821.mbp-1.csv
Processed xnas-itch-20240821.mbp-1.csv
Processing xnas-itch-20240808.mbp-1.csv
Processed xn

In [103]:
def moving_average_signal(df, short_window, long_window, b):
    """
    Generate trading signals based on moving average cross-over strategy.

    prices (Series): asset prices
    short_window (int): Window size for the short-term moving average
    long_window (int): Window size for the long-term moving average
    b (int): Bandwidth parameter that determines the buy/sell thresholds
    return (Series): Series with trading signals (+1 for buy, -1 for sell, 0 for hold)
    """

    # Create midprice column (average of bid and ask prices)
    mid_price = (df['bid_px_00'] + df['ask_px_00']) / 2
    
    # Calculate short-term and long-term moving averages
    short_ma = mid_price.rolling(window=short_window).mean()
    long_ma = mid_price.rolling(window=long_window).mean()
    
    # Define thresholds
    upper_threshold = (1 + b) * long_ma
    lower_threshold = (1 - b) * long_ma
    
    signal = pd.Series(0, index=df.index)

    # It's not hitting either of these for some reason
    
    # Generate buy signals (+1 where short_ma > upper_threshold)
    signal[short_ma > upper_threshold] = 1
    
    # Generate sell signals (-1 where short_ma < lower_threshold)
    signal[short_ma < lower_threshold] = -1

    trade_signals = signal[signal != 0]
    print("Trade Signals (Buy/Sell):\n", trade_signals)

    return signal



In [104]:
def execute_trading_signal(bbo_df, signal, order_size=1e6):
    """
    Execute the trading signal based on the available order book depth and calculate actual P&L.

    bbo_df (DataFrame) : DataFrame containing the order book and signals for each day.
    order_size: Size of the order (e.g., $1M)
    :return: DataFrame with actual P&L calculated for each signal
    """

    # Resample for 100ms
        
    # Initialize return
    bbo_df['return'] = 0
    
    # Identify zero indices for sell signals (signal turns 0 after non-zero)
    zero_indices = bbo_df[(bbo_df['signal'] == 0) & 
                            (bbo_df['signal'].shift(1).fillna(0) != 0)].index
    
    # Backtesting loop for calculating PnL
    for zero_index in zero_indices:
        last_zero_index = bbo_df.loc[:zero_index - pd.Timedelta(milliseconds=100)][bbo_df['signal'] == 0].index[-1]
        first_buy_index = last_zero_index + pd.Timedelta(milliseconds=100)
        
        if first_buy_index in bbo_df.index:
            # Prices during buy and sell
            first_buy_ask_price = bbo_df.loc[first_buy_index, 'ask_px_00']  # ask price at buy time
            sell_bid_price = bbo_df.loc[zero_index, 'bid_px_00']  # bid price at sell time
            
            # Calculate buy return
            buy_return = (sell_bid_price - first_buy_ask_price) / first_buy_ask_price
            
            # Calculate sell return
            first_buy_bid_price = bbo_df.loc[first_buy_index, 'bid_px_00']  # bid price at buy time
            sell_ask_price = bbo_df.loc[zero_index, 'ask_px_00']  # ask price at sell time
            sell_return = -1 * (sell_ask_price - first_buy_bid_price) / first_buy_bid_price
            
            # Combine buy and sell returns into overall trade return
            bbo_df.loc[zero_index, 'return'] = buy_return + sell_return
    
    # Scale return to $1M for actual PnL calculation as per assignment description
    bbo_df['scaled_return'] = bbo_df['return'] * 1_000_000
    
    # Cumulative returns for analysis
    bbo_df['cumulative_return'] = bbo_df['scaled_return'].cumsum()
    return bbo_df

In [106]:
test = dfs[0]
test = test.between_time('13:40', '19:55', inclusive='left')

test['signal'] = moving_average_signal(test, short_window=50, long_window=500, b=0.05)

df = execute_trading_signal(test, 'MA', order_size=1)

Trade Signals (Buy/Sell):
 Series([], Freq: 100L, dtype: int64)


In [4]:

        #backtesting
        zero_indices = nvda_bbo[(nvda_bbo['signal'] == 0) & (nvda_bbo['signal'].shift(1).fillna(0)!= 0)].index

        for zero_index in tqdm(zero_indices):
        # Find the last zero index behind the set of non-zero values
            last_zero_index = nvda_bbo.loc[:zero_index-pd.Timedelta(milliseconds=100)][nvda_bbo['signal'] == 0].index[-1]
            first_buy_index = last_zero_index + pd.Timedelta(milliseconds=100)
            
            if first_buy_index in nvda_bbo.index:
                first_buy_price = nvda_bbo.loc[first_buy_index, 'ask_px_00']
                sell_price = nvda_bbo.loc[zero_index, 'bid_px_00']
                nvda_bbo.loc[zero_index, 'return'] = ((sell_price - first_buy_price) / first_buy_price)

        nvda_bbo['cumulative_return'] = (nvda_bbo['return']).cumsum()
        cumulative_returns=pd.concat([cumulative_returns,nvda_bbo['cumulative_return']])
        

Processing xnas-itch-20240826.mbp-1.csv
Processed xnas-itch-20240826.mbp-1.csv


NameError: name 'd1' is not defined