In [1]:
import pandas as pd
import numpy as np
import math
from scipy.optimize import minimize
pd.options.mode.chained_assignment = None

This code computes the summary statistics and Empirical vs. Model probabilities, replicating the tables in Avellaneda, Reedy, and Stoikov (2011), Forecasting prices from
level-I quotes in the presence of hidden liquidity, Algorithmic Finance 1, 35-43. The statistical analysis is run on AAPL, QQQ, XLF, and JPM for the first five trading days of January 2010.

The code is broken up into three sections:

(a) Parse and clean the data.
(b) Compute the summary statistics, replicating table 2 in the paper.
(c) Compute the Empirical vs Model probabilites, replicating table 3 in the paper.

Table 3 for all 4 securities on the Nasdaq exchange, and our thoughts on how we would use this, is contained in the report.

(a) Parse data

In [2]:
# read data from WRDS
df = pd.read_csv('data.csv', low_memory=False)

# filter for targeted exchange
target_exchanges = ['T', 'P', 'Z'] # nasdaq, nyse-arca, or bats

# Filter and get rid of negative and zero bid-ask spread
df['spread'] = df['ASK'] - df['BID']
data = df[df['EX'].isin(target_exchanges) & (df.spread >= 0)]

(b) Summary Statistics - Table 2

In [3]:
# Convert time and date into datetime
data['DATE'] = data['DATE'].astype(str)
data['TIME_M'] = data['TIME_M'].astype(str)
data['TIMESTAMP'] = pd.to_datetime(data['DATE'] + ' ' + data['TIME_M'])

data['midquote'] = (data['BID'] + data['ASK']) / 2
data['bsize_asize'] = data['BIDSIZ'] + data['ASKSIZ']

# Compute summary statistics
summary_stats = data.groupby(['SYM_ROOT', 'EX']).agg(
    num_quotes=('midquote', 'size'),
    avg_spread=('spread', 'mean'),
    avg_bsize_asize=('bsize_asize', 'mean'),
    avg_price=('midquote', 'mean')
).reset_index()

# edit the results to look exactly like the table in the paper
summary_stats['quotes_per_sec'] = (summary_stats['num_quotes'] / ((6 * 60 * 60) * 5)).round().astype(int)
summary_stats['num_quotes'] = round(summary_stats['num_quotes'] / 1e6, 1).astype(str) + 'M'
summary_stats['avg_spread'] = round(summary_stats['avg_spread'], 3)
summary_stats['avg_bsize_asize'] = summary_stats['avg_bsize_asize'].apply(
    lambda x: int(x) if x > 10 else round(x, 1)
).round().astype(int)
summary_stats['avg_price'] = round(summary_stats['avg_price'], 2)

# Rename and organize to match table 2 in paper
summary_stats.rename(columns={
    'SYM_ROOT': 'Ticker',
    'EX': 'Exchange',
    'num_quotes': 'Num Quotes',
    'avg_spread': 'avg(spread)',
    'avg_bsize_asize': 'avg(bsize + asize)',
    'avg_price': 'avg(price)'
}, inplace=True)

exchange_mapping = {
    'T': 'NASDAQ',
    'P': 'NYSE',
    'Z': 'BATS'
}
summary_stats['Exchange'] = summary_stats['Exchange'].replace(exchange_mapping)
exchange_order = ['NASDAQ', 'NYSE', 'BATS']
summary_stats['Exchange'] = pd.Categorical(summary_stats['Exchange'], categories=exchange_order, ordered=True)

ticker_order = ['XLF', 'QQQQ', 'JPM', 'AAPL']
summary_stats['Ticker'] = pd.Categorical(summary_stats['Ticker'], categories=ticker_order, ordered=True)

summary_stats = summary_stats[['Ticker', 'Exchange', 'Num Quotes', 'quotes_per_sec', 'avg(spread)', 'avg(bsize + asize)', 'avg(price)']]

summary_stats = summary_stats.sort_values(by=['Ticker', 'Exchange'], ascending=[True, True])

In [4]:
summary_stats

Unnamed: 0,Ticker,Exchange,Num Quotes,quotes_per_sec,avg(spread),avg(bsize + asize),avg(price)
10,XLF,NASDAQ,0.7M,7,0.01,8799,15.02
9,XLF,NYSE,0.4M,4,0.01,10466,15.01
11,XLF,BATS,0.4M,4,0.011,7507,15.0
7,QQQQ,NASDAQ,2.7M,25,0.01,1455,46.3
6,QQQQ,NYSE,4.0M,37,0.011,1151,46.27
8,QQQQ,BATS,1.6M,15,0.011,1055,46.28
4,JPM,NASDAQ,1.2M,11,0.341,84,43.46
3,JPM,NYSE,0.8M,7,0.072,41,41.33
5,JPM,BATS,0.6M,5,0.016,38,43.77
1,AAPL,NASDAQ,1.4M,13,0.035,9,212.5


(c) Table 3 - Empirical vs. Model probabilities for the probability of an
upward move on Nasdaq (T) for all 4 securities.

In [3]:
def calculate_optimal_h_and_empirical_tables(data, ticker, exchange, spread=None):
    """
    Function to calculate the optimal H and output empirical and model probability tables.
    
    Parameters:
        data (pd.DataFrame): DataFrame containing stock data.
        ticker (str): Stock ticker symbol.
        exchange (str): Exchange code.
        spread (float or None): Spread value to filter data (optional).
    
    Returns:
        H_optimal (float): Estimated optimal H value.
        emp_table (pd.DataFrame): Empirical probability table.
        model_table (pd.DataFrame): Model probability table.
        error (float): Total squared error between empirical and model probabilities.
    """

    # Filter data based on ticker, exchange, and optional spread
    if spread is None:
        td = data[(data['SYM_ROOT'] == ticker) & (data['EX'] == exchange) & (data['spread'] > 0)].copy()
    else:
        td = data[(data['SYM_ROOT'] == ticker) & (data['EX'] == exchange) 
                  & (data['spread'] > 0) & (np.round(data['spread'].values, 2) == spread)].copy()

    # Calculate midquote
    td['MIDQUOTE'] = (td['BID'] + td['ASK']) / 2

    # Calculate direction based on MID price changes
    td['Direction'] = [0] + list(np.sign(np.diff(td['MIDQUOTE'].values)))
    td['Direction'] = td['Direction'].replace(to_replace=0, method='bfill')

    # Create boundaries for bid and ask size buckets
    bid_boundaries = td['BIDSIZ'].quantile(np.arange(0.1, 1, 0.1)).values.astype(int)
    ask_boundaries = td['ASKSIZ'].quantile(np.arange(0.1, 1, 0.1)).values.astype(int)

    # Assign bid and ask size buckets based on the created boundaries
    td['BIDSIZ_bucket'] = pd.cut(td['BIDSIZ'], bins=[-np.inf] + list(bid_boundaries) + [np.inf], labels=False, duplicates="drop")
    td['ASKSIZ_bucket'] = pd.cut(td['ASKSIZ'], bins=[-np.inf] + list(ask_boundaries) + [np.inf], labels=False, duplicates="drop")

    # Model probability function using deciles
    def model_prob(i, j, H):
        return (i + H) / (j + i + 2 * H)

    # Objective function to minimize
    def objective_function(H, bid_decile, ask_decile, directions):
        total_error = 0

        # Iterate through the bid and ask decile buckets
        for i in range(9):  # 9 deciles
            for j in range(9):
                # Get model probability using the current H
                model_p = model_prob((i + 1) / 10, (j + 1) / 10, H)

                # Retrieve the actual direction (up/down move) for comparison
                actual_direction = directions[(bid_decile == i) & (ask_decile == j)]

                if len(actual_direction) == 0:
                    continue

                actual_prob = (actual_direction == 1).mean()  # Calculate empirical probability of upward moves

                # Add weighted squared error to the total error
                total_error += ((actual_prob - model_p) ** 2) * len(actual_direction)  # Weighted error

        return total_error

    # Function to estimate H using deciles and directions
    def estimate_H(bid_decile, ask_decile, directions):
        initial_guess = 0.2  # Starting guess for H

        # Minimize the objective function to estimate H
        result = minimize(objective_function, initial_guess, args=(bid_decile, ask_decile, directions), method='Nelder-Mead')

        # Retrieve the optimal value for H
        H_optimal = result.x[0]
        print(f"Estimated H: {H_optimal}\nSuccess: {result.success}\nMessage: {result.message}")

        return H_optimal

    # Estimate the optimal H value
    H_optimal = estimate_H(td['BIDSIZ_bucket'], td['ASKSIZ_bucket'], td['Direction'])

    # Calculate empirical and model probabilities and the total error
    emp_prob = []
    model_probabilities = []
    error = 0

    for i in range(9):
        emp_row = []
        model_row = []
        for j in range(9):
            actual_direction = td['Direction'][(td['BIDSIZ_bucket'] == i) & (td['ASKSIZ_bucket'] == j)]
            actual_prob = (actual_direction == 1).mean()
            emp_row.append(actual_prob)

            model_p = model_prob(i / 10, j / 10, H_optimal)
            model_row.append(model_p)

            error += ((actual_prob - model_p) ** 2) * len(actual_direction)

        emp_prob.append(emp_row)
        model_probabilities.append(model_row)

    # Create dataframes for empirical and model probability tables
    col = [f'{np.arange(0.1, 1, 0.1)[i]:.1f} ({ask_boundaries[i]})' for i in range(9)]
    row = [f'{np.arange(0.1, 1, 0.1)[i]:.1f} ({bid_boundaries[i]})' for i in range(9)]

    emp_table = pd.DataFrame(emp_prob, columns=col, index=row).round(2)
    model_table = pd.DataFrame(model_probabilities, columns=col, index=row).round(2)

    return H_optimal, emp_table, model_table, error
    

In [4]:
# QQQQ
H_optimal, empirical_table, model_table, total_error = calculate_optimal_h_and_empirical_tables(data, 'QQQQ', 'T', spread=None)

  td['Direction'] = td['Direction'].replace(to_replace=0, method='bfill')


Estimated H: 0.21902343750000003
Success: True
Message: Optimization terminated successfully.


In [5]:
empirical_table

Unnamed: 0,0.1 (168),0.2 (287),0.3 (392),0.4 (504),0.5 (628),0.6 (767),0.7 (913),0.8 (1104),0.9 (1387)
0.1 (180),0.46,0.27,0.28,0.28,0.28,0.25,0.18,0.24,0.21
0.2 (307),0.63,0.5,0.5,0.39,0.34,0.32,0.41,0.29,0.29
0.3 (421),0.69,0.55,0.53,0.5,0.42,0.44,0.42,0.36,0.34
0.4 (539),0.7,0.64,0.56,0.51,0.48,0.52,0.32,0.45,0.4
0.5 (692),0.73,0.63,0.58,0.53,0.4,0.43,0.42,0.43,0.47
0.6 (823),0.82,0.71,0.67,0.66,0.56,0.52,0.45,0.46,0.5
0.7 (967),0.8,0.69,0.64,0.64,0.7,0.62,0.54,0.53,0.5
0.8 (1156),0.66,0.67,0.69,0.63,0.78,0.68,0.57,0.5,0.47
0.9 (1399),0.74,0.7,0.68,0.5,0.71,0.4,0.59,0.23,0.39


In [6]:
model_table

Unnamed: 0,0.1 (168),0.2 (287),0.3 (392),0.4 (504),0.5 (628),0.6 (767),0.7 (913),0.8 (1104),0.9 (1387)
0.1 (180),0.5,0.41,0.34,0.3,0.26,0.23,0.21,0.19,0.18
0.2 (307),0.59,0.5,0.43,0.38,0.34,0.31,0.28,0.26,0.24
0.3 (421),0.66,0.57,0.5,0.45,0.4,0.37,0.34,0.31,0.29
0.4 (539),0.7,0.62,0.55,0.5,0.46,0.42,0.39,0.36,0.34
0.5 (692),0.74,0.66,0.6,0.54,0.5,0.46,0.43,0.4,0.38
0.6 (823),0.77,0.69,0.63,0.58,0.54,0.5,0.47,0.44,0.41
0.7 (967),0.79,0.72,0.66,0.61,0.57,0.53,0.5,0.47,0.45
0.8 (1156),0.81,0.74,0.69,0.64,0.6,0.56,0.53,0.5,0.47
0.9 (1399),0.82,0.76,0.71,0.66,0.62,0.59,0.55,0.53,0.5


In [34]:
# JPM
H_optimal, empirical_table, model_table, total_error = calculate_optimal_h_and_empirical_tables(data, 'JPM', 'T', spread=None)

  td['Direction'] = td['Direction'].replace(to_replace=0, method='bfill')


Estimated H: 0.21777343750000003
Success: True
Message: Optimization terminated successfully.


In [35]:
empirical_table

Unnamed: 0,0.1 (9),0.2 (16),0.3 (22),0.4 (28),0.5 (34),0.6 (41),0.7 (50),0.8 (62),0.9 (85)
0.1 (9),0.52,0.5,0.42,0.39,0.31,0.26,0.22,0.2,0.16
0.2 (16),0.52,0.5,0.46,0.44,0.38,0.35,0.33,0.28,0.25
0.3 (23),0.59,0.55,0.5,0.46,0.44,0.42,0.37,0.35,0.3
0.4 (29),0.65,0.55,0.52,0.49,0.47,0.45,0.41,0.36,0.35
0.5 (35),0.71,0.6,0.56,0.52,0.51,0.48,0.46,0.4,0.39
0.6 (41),0.73,0.63,0.57,0.53,0.51,0.51,0.49,0.42,0.42
0.7 (49),0.76,0.65,0.61,0.56,0.54,0.52,0.49,0.45,0.44
0.8 (60),0.8,0.71,0.64,0.61,0.59,0.58,0.54,0.48,0.47
0.9 (79),0.84,0.75,0.71,0.67,0.64,0.63,0.58,0.51,0.54


In [36]:
model_table

Unnamed: 0,0.1 (9),0.2 (16),0.3 (22),0.4 (28),0.5 (34),0.6 (41),0.7 (50),0.8 (62),0.9 (85)
0.1 (9),0.5,0.41,0.34,0.3,0.26,0.23,0.21,0.19,0.18
0.2 (16),0.59,0.5,0.43,0.38,0.34,0.31,0.28,0.26,0.24
0.3 (23),0.66,0.57,0.5,0.45,0.4,0.37,0.34,0.31,0.29
0.4 (29),0.7,0.62,0.55,0.5,0.46,0.42,0.39,0.36,0.34
0.5 (35),0.74,0.66,0.6,0.54,0.5,0.46,0.43,0.4,0.38
0.6 (41),0.77,0.69,0.63,0.58,0.54,0.5,0.47,0.44,0.41
0.7 (49),0.79,0.72,0.66,0.61,0.57,0.53,0.5,0.47,0.45
0.8 (60),0.81,0.74,0.69,0.64,0.6,0.56,0.53,0.5,0.47
0.9 (79),0.82,0.76,0.71,0.66,0.62,0.59,0.55,0.53,0.5


In [37]:
# XLF
H_optimal, empirical_table, model_table, total_error = calculate_optimal_h_and_empirical_tables(data, 'XLF', 'T', spread=None)

  td['Direction'] = td['Direction'].replace(to_replace=0, method='bfill')


Estimated H: 0.19453125000000002
Success: True
Message: Optimization terminated successfully.


In [38]:
empirical_table

Unnamed: 0,0.1 (1112),0.2 (1808),0.3 (2508),0.4 (3445),0.5 (4541),0.6 (5279),0.7 (5770),0.8 (6125),0.9 (6669)
0.1 (1436),0.63,0.59,0.15,0.28,0.56,0.37,0.3,0.21,0.27
0.2 (2277),0.69,0.32,0.39,0.91,0.31,0.42,0.36,0.32,0.27
0.3 (3351),0.67,0.73,0.94,0.79,0.38,0.45,0.46,0.47,0.46
0.4 (4505),0.93,0.82,0.55,0.62,0.46,0.52,0.48,0.52,0.52
0.5 (5237),0.66,0.56,0.57,0.65,0.56,0.55,0.63,0.56,0.56
0.6 (5623),0.79,0.34,0.69,0.59,0.55,0.54,0.51,0.5,0.59
0.7 (5985),0.89,0.76,0.59,0.54,0.61,0.62,0.59,0.64,0.55
0.8 (6427),0.91,0.79,0.72,0.7,0.67,0.6,0.56,0.66,0.67
0.9 (6995),0.99,0.91,0.81,0.72,0.63,0.69,0.69,0.74,0.81


In [39]:
model_table

Unnamed: 0,0.1 (1112),0.2 (1808),0.3 (2508),0.4 (3445),0.5 (4541),0.6 (5279),0.7 (5770),0.8 (6125),0.9 (6669)
0.1 (1436),0.5,0.4,0.33,0.28,0.25,0.22,0.2,0.18,0.16
0.2 (2277),0.6,0.5,0.43,0.37,0.33,0.3,0.27,0.25,0.23
0.3 (3351),0.67,0.57,0.5,0.44,0.4,0.36,0.33,0.31,0.28
0.4 (4505),0.72,0.63,0.56,0.5,0.45,0.42,0.38,0.36,0.33
0.5 (5237),0.75,0.67,0.6,0.55,0.5,0.46,0.43,0.4,0.37
0.6 (5623),0.78,0.7,0.64,0.58,0.54,0.5,0.47,0.44,0.41
0.7 (5985),0.8,0.73,0.67,0.62,0.57,0.53,0.5,0.47,0.44
0.8 (6427),0.82,0.75,0.69,0.64,0.6,0.56,0.53,0.5,0.47
0.9 (6995),0.84,0.77,0.72,0.67,0.63,0.59,0.56,0.53,0.5


In [13]:
# AAPL-spread 1
H_optimal, empirical_table, model_table, total_error = calculate_optimal_h_and_empirical_tables(data, 'AAPL', 'T', spread=.01)


  td['Direction'] = td['Direction'].replace(to_replace=0, method='bfill')


Estimated H: 1.4017968750000045
Success: True
Message: Optimization terminated successfully.


In [15]:
empirical_table.iloc[:,1:]=empirical_table.iloc[:,:-1]

In [19]:
empirical_table.iloc[:,3:]=empirical_table.iloc[:,2:-1]

In [21]:
empirical_table.iloc[1:,:]=empirical_table.iloc[:-1,:]

In [22]:
empirical_table

Unnamed: 0,0.1 (1),0.2 (1),0.3 (2),0.4 (2),0.5 (3),0.6 (4),0.7 (4),0.8 (6),0.9 (8)
0.1 (1),0.51,0.51,0.51,0.51,0.47,0.45,0.43,0.45,0.47
0.2 (1),0.51,0.51,0.51,0.51,0.47,0.45,0.43,0.45,0.47
0.3 (2),0.51,0.51,0.49,0.49,0.46,0.45,0.45,0.43,0.41
0.4 (2),0.53,0.53,0.53,0.53,0.5,0.49,0.47,0.48,0.41
0.5 (3),0.55,0.55,0.55,0.55,0.52,0.51,0.49,0.48,0.45
0.6 (4),0.56,0.56,0.55,0.55,0.52,0.5,0.47,0.55,0.42
0.7 (5),0.56,0.56,0.56,0.56,0.53,0.51,0.51,0.6,0.4
0.8 (6),0.61,0.61,0.53,0.53,0.51,0.54,0.51,0.53,0.41
0.9 (8),0.61,0.61,0.59,0.59,0.55,0.54,0.53,0.54,0.5


In [23]:
model_table

Unnamed: 0,0.1 (1),0.2 (1),0.3 (2),0.4 (2),0.5 (3),0.6 (4),0.7 (4),0.8 (6),0.9 (8)
0.1 (1),0.5,0.48,0.47,0.45,0.44,0.42,0.41,0.4,0.39
0.2 (1),0.52,0.5,0.48,0.47,0.45,0.44,0.43,0.42,0.41
0.3 (2),0.53,0.52,0.5,0.48,0.47,0.46,0.44,0.43,0.42
0.4 (2),0.55,0.53,0.52,0.5,0.49,0.47,0.46,0.45,0.44
0.5 (3),0.56,0.55,0.53,0.51,0.5,0.49,0.47,0.46,0.45
0.6 (4),0.58,0.56,0.54,0.53,0.51,0.5,0.49,0.48,0.46
0.7 (5),0.59,0.57,0.56,0.54,0.53,0.51,0.5,0.49,0.48
0.8 (6),0.6,0.58,0.57,0.55,0.54,0.52,0.51,0.5,0.49
0.9 (8),0.61,0.59,0.58,0.56,0.55,0.54,0.52,0.51,0.5


In [24]:
# AAPL-spread 2
H_optimal, empirical_table, model_table, total_error = calculate_optimal_h_and_empirical_tables(data, 'AAPL', 'T', spread=.02)


  td['Direction'] = td['Direction'].replace(to_replace=0, method='bfill')


Estimated H: 0.6585937500000018
Success: True
Message: Optimization terminated successfully.


In [26]:
empirical_table.iloc[:,2:]=empirical_table.iloc[:,1:-1]
empirical_table.iloc[2:,:]=empirical_table.iloc[1:-1,:]

In [27]:
empirical_table

Unnamed: 0,0.1 (1),0.2 (2),0.3 (2),0.4 (3),0.5 (3),0.6 (4),0.7 (5),0.8 (6),0.9 (8)
0.1 (1),0.51,0.51,0.51,0.45,0.42,0.38,0.38,0.35,0.38
0.2 (2),0.5,0.5,0.5,0.47,0.44,0.42,0.4,0.39,0.38
0.3 (2),0.5,0.5,0.5,0.47,0.44,0.42,0.4,0.39,0.38
0.4 (3),0.58,0.54,0.54,0.51,0.48,0.46,0.44,0.43,0.42
0.5 (3),0.61,0.57,0.57,0.54,0.5,0.48,0.46,0.46,0.4
0.6 (4),0.62,0.59,0.59,0.55,0.5,0.49,0.48,0.46,0.44
0.7 (5),0.63,0.61,0.61,0.57,0.52,0.49,0.47,0.46,0.42
0.8 (6),0.65,0.63,0.63,0.59,0.55,0.5,0.52,0.49,0.46
0.9 (8),0.64,0.61,0.61,0.62,0.58,0.54,0.53,0.52,0.47


In [28]:
model_table

Unnamed: 0,0.1 (1),0.2 (2),0.3 (2),0.4 (3),0.5 (3),0.6 (4),0.7 (5),0.8 (6),0.9 (8)
0.1 (1),0.5,0.46,0.43,0.41,0.38,0.36,0.34,0.33,0.31
0.2 (2),0.54,0.5,0.47,0.44,0.42,0.4,0.38,0.36,0.34
0.3 (2),0.57,0.53,0.5,0.47,0.45,0.43,0.41,0.39,0.37
0.4 (3),0.59,0.56,0.53,0.5,0.48,0.45,0.43,0.41,0.4
0.5 (3),0.62,0.58,0.55,0.52,0.5,0.48,0.46,0.44,0.42
0.6 (4),0.64,0.6,0.57,0.55,0.52,0.5,0.48,0.46,0.44
0.7 (5),0.66,0.62,0.59,0.57,0.54,0.52,0.5,0.48,0.46
0.8 (6),0.67,0.64,0.61,0.59,0.56,0.54,0.52,0.5,0.48
0.9 (8),0.69,0.66,0.63,0.6,0.58,0.56,0.54,0.52,0.5


In [29]:
# AAPL-spread 3
H_optimal, empirical_table, model_table, total_error = calculate_optimal_h_and_empirical_tables(data, 'AAPL', 'T', spread=.03)


  td['Direction'] = td['Direction'].replace(to_replace=0, method='bfill')


Estimated H: 0.6118750000000014
Success: True
Message: Optimization terminated successfully.


In [31]:
empirical_table.iloc[:,2:]=empirical_table.iloc[:,1:-1]
empirical_table.iloc[2:,:]=empirical_table.iloc[1:-1,:]

In [32]:
empirical_table

Unnamed: 0,0.1 (1),0.2 (2),0.3 (2),0.4 (3),0.5 (3),0.6 (4),0.7 (5),0.8 (6),0.9 (8)
0.1 (1),0.5,0.5,0.5,0.44,0.41,0.37,0.36,0.35,0.38
0.2 (2),0.52,0.51,0.51,0.46,0.43,0.4,0.35,0.4,0.4
0.3 (2),0.52,0.51,0.51,0.46,0.43,0.4,0.35,0.4,0.4
0.4 (3),0.58,0.55,0.55,0.53,0.49,0.5,0.41,0.42,0.42
0.5 (3),0.62,0.57,0.57,0.55,0.51,0.49,0.46,0.46,0.37
0.6 (4),0.61,0.58,0.58,0.57,0.54,0.51,0.48,0.5,0.41
0.7 (5),0.64,0.61,0.61,0.58,0.56,0.51,0.48,0.45,0.42
0.8 (6),0.66,0.61,0.61,0.58,0.55,0.49,0.47,0.46,0.48
0.9 (8),0.62,0.62,0.62,0.6,0.59,0.55,0.54,0.51,0.58


In [33]:
model_table

Unnamed: 0,0.1 (1),0.2 (2),0.3 (2),0.4 (3),0.5 (3),0.6 (4),0.7 (5),0.8 (6),0.9 (8)
0.1 (1),0.5,0.46,0.43,0.4,0.38,0.35,0.34,0.32,0.3
0.2 (2),0.54,0.5,0.47,0.44,0.41,0.39,0.37,0.35,0.34
0.3 (2),0.57,0.53,0.5,0.47,0.45,0.42,0.4,0.38,0.37
0.4 (3),0.6,0.56,0.53,0.5,0.47,0.45,0.43,0.41,0.39
0.5 (3),0.62,0.59,0.55,0.53,0.5,0.48,0.46,0.44,0.42
0.6 (4),0.65,0.61,0.58,0.55,0.52,0.5,0.48,0.46,0.44
0.7 (5),0.66,0.63,0.6,0.57,0.54,0.52,0.5,0.48,0.46
0.8 (6),0.68,0.65,0.62,0.59,0.56,0.54,0.52,0.5,0.48
0.9 (8),0.7,0.66,0.63,0.61,0.58,0.56,0.54,0.52,0.5


In [40]:
# QQQQ
H_optimal, empirical_table, model_table, total_error = calculate_optimal_h_and_empirical_tables(data, 'QQQQ', 'P', spread=None)

  td['Direction'] = td['Direction'].replace(to_replace=0, method='bfill')


Estimated H: 0.15054687500000014
Success: True
Message: Optimization terminated successfully.


In [43]:
# JPM
H_optimal, empirical_table, model_table, total_error = calculate_optimal_h_and_empirical_tables(data, 'JPM', 'P', spread=None)

  td['Direction'] = td['Direction'].replace(to_replace=0, method='bfill')


Estimated H: 0.47679687500000095
Success: True
Message: Optimization terminated successfully.


In [46]:
# XLF
H_optimal, empirical_table, model_table, total_error = calculate_optimal_h_and_empirical_tables(data, 'XLF', 'P', spread=None)

  td['Direction'] = td['Direction'].replace(to_replace=0, method='bfill')


Estimated H: 0.27773437500000026
Success: True
Message: Optimization terminated successfully.


In [57]:
# AAPL-spread 1
H_optimal, empirical_table, model_table, total_error = calculate_optimal_h_and_empirical_tables(data, 'AAPL', 'P', spread=.01)


  td['Direction'] = td['Direction'].replace(to_replace=0, method='bfill')


Estimated H: 6.219843750000022
Success: True
Message: Optimization terminated successfully.


In [70]:
# AAPL-spread 2
H_optimal, empirical_table, model_table, total_error = calculate_optimal_h_and_empirical_tables(data, 'AAPL', 'P', spread=.02)


  td['Direction'] = td['Direction'].replace(to_replace=0, method='bfill')


Estimated H: 2.371171875000008
Success: True
Message: Optimization terminated successfully.


In [77]:
# AAPL-spread 3
H_optimal, empirical_table, model_table, total_error = calculate_optimal_h_and_empirical_tables(data, 'AAPL', 'P', spread=.03)


  td['Direction'] = td['Direction'].replace(to_replace=0, method='bfill')


Estimated H: 1.534609375000005
Success: True
Message: Optimization terminated successfully.


In [79]:
# QQQQ
H_optimal, empirical_table, model_table, total_error = calculate_optimal_h_and_empirical_tables(data, 'QQQQ', 'Z', spread=None)

  td['Direction'] = td['Direction'].replace(to_replace=0, method='bfill')


Estimated H: 0.1913671875
Success: True
Message: Optimization terminated successfully.


In [82]:
# JPM
H_optimal, empirical_table, model_table, total_error = calculate_optimal_h_and_empirical_tables(data, 'JPM', 'Z', spread=None)

  td['Direction'] = td['Direction'].replace(to_replace=0, method='bfill')


Estimated H: 0.17507812500000003
Success: True
Message: Optimization terminated successfully.


In [85]:
# XLF
H_optimal, empirical_table, model_table, total_error = calculate_optimal_h_and_empirical_tables(data, 'XLF', 'Z', spread=None)

  td['Direction'] = td['Direction'].replace(to_replace=0, method='bfill')


Estimated H: 0.19968750000000002
Success: True
Message: Optimization terminated successfully.


In [88]:
# AAPL-spread 1
H_optimal, empirical_table, model_table, total_error = calculate_optimal_h_and_empirical_tables(data, 'AAPL', 'Z', spread=.01)


  td['Direction'] = td['Direction'].replace(to_replace=0, method='bfill')


Estimated H: 1080863910568922.2
Success: False
Message: Maximum number of function evaluations has been exceeded.


In [90]:
# AAPL-spread 2
H_optimal, empirical_table, model_table, total_error = calculate_optimal_h_and_empirical_tables(data, 'AAPL', 'Z', spread=.02)


  td['Direction'] = td['Direction'].replace(to_replace=0, method='bfill')


Estimated H: 2.0043750000000062
Success: True
Message: Optimization terminated successfully.


In [92]:
# AAPL-spread 3
H_optimal, empirical_table, model_table, total_error = calculate_optimal_h_and_empirical_tables(data, 'AAPL', 'Z', spread=.03)


  td['Direction'] = td['Direction'].replace(to_replace=0, method='bfill')


Estimated H: 1.5323437500000048
Success: True
Message: Optimization terminated successfully.
