## Imports and Functions

We begin by loading the necessary packages/modules and functions to calculate the log price ratio for our regression and to view dataframes.

In [1]:
import os
import pandas as pd
import numpy as np
import statsmodels.api as sm
from scipy.stats import norm
from IPython.display import display, HTML

def calculate_log_price_ratio(last_trade_prices):
    """
    Calculates the logarithmic price ratio (log(vi+1/vi)) of a series of last trade prices.

    Args:
    last_trade_prices (pandas Series): A pandas Series of the last trade prices of a stock.

    Returns:
    pandas Series: A Series containing the logarithmic price ratios. The first element is NaN 
        since there's no previous price to compare the first price against.
    """
    # calc 
    return np.log(last_trade_prices / last_trade_prices.shift(1))

def display_scrollable(df):
    """
    Displays a pandas DataFrame as a scrollable HTML table in a Jupyter Notebook.

    Args:
    df (pandas.DataFrame): The DataFrame to display.
    """
    df_html = df.to_html()
    scrollable_table = f'''
    <div style="overflow-x: auto; white-space: nowrap; max-height:400px; overflow-y:scroll;">
        {df_html}
    </div>
    '''
    display(HTML(scrollable_table))

## Initialization

Initialize a list to hold regression results and define the significance level. We then import and sort [`tick_data.csv`](<..\data\processed\tick_data.csv>) by time after converting 'Timestamp' to a datatime object and adjusting 'Volume' based on tick direction in 'Tick'.

In [2]:
results = []
sig_lvl = 0.05

df = pd.read_csv(os.path.join('..', 'data', 'processed', 'tick_data.csv'))

# convert 'Timestamp' to a datetime object and sort
df['Timestamp'] = pd.to_datetime(df['Timestamp'])
df.sort_values(by=['RIC', 'Timestamp'], inplace=True)

# adjust 'Volume' based on 'Tick' direction
df['Adjusted Volume'] = df.apply(lambda row: row['Volume'] if row['Tick'] == 'UP' or row['Tick'] == 'n' else -row['Volume'], axis=1)

# display first 20 rows of sorted dataframe
display_scrollable(df.head(20))

Unnamed: 0,RIC,Timestamp,Tick,Last Trade,Volume,Adjusted Volume
11349,ABBN.S,2023-12-21 09:00:00.007,n,37.13,77694.0,77694.0
11348,ABBN.S,2023-12-21 09:00:00.017,DOWN,37.12,78523.0,-78523.0
11347,ABBN.S,2023-12-21 09:00:00.035,UP,37.15,78523.0,78523.0
11342,ABBN.S,2023-12-21 09:00:00.079,UP,37.16,78993.0,78993.0
11340,ABBN.S,2023-12-21 09:00:00.084,DOWN,37.15,78523.0,-78523.0
11337,ABBN.S,2023-12-21 09:00:00.173,UP,37.16,78859.0,78859.0
11324,ABBN.S,2023-12-21 09:00:00.485,UP,37.16,76079.0,76079.0
11319,ABBN.S,2023-12-21 09:00:00.613,UP,37.16,76083.0,76083.0
11310,ABBN.S,2023-12-21 09:00:00.975,DOWN,37.15,74600.0,-74600.0
11306,ABBN.S,2023-12-21 09:00:01.144,DOWN,37.12,74591.0,-74591.0


## Running the Regression

We perform a nested loop through each equity and trading day to obtain daily liquidity estimates. We then append the results into a dataframe and save it to the [`liq_param`](<..\models\liq_param>) file directory.

In [3]:
for ric, ric_group in df.groupby('RIC'):
    for date, group in ric_group.groupby(ric_group['Timestamp'].dt.date):
        # check if the group is empty
        if group.empty:
            continue

        # sort again just in case
        group.sort_values('Timestamp', inplace=True)

        # calc yi using 'Last Trade' column
        group['log_price_ratio'] = calculate_log_price_ratio(group['Last Trade'])

        # calculate predictors
        group['ti_diff'] = group['Timestamp'].diff().dt.total_seconds()
        group['wi'] = (group['Adjusted Volume'].diff() / group['ti_diff'].pow(0.5))
        group['zi'] = group['ti_diff'].pow(0.5)

        # remove nan rows due to diff
        group = group.dropna(subset=['log_price_ratio', 'wi', 'zi'])

        # check if group has enough points to fit
        if group.shape[0] > 1:
            # prep vars for regression
            X = group[['wi', 'zi']]
            y = group['log_price_ratio']
            
            # add intercept
            X_ = sm.add_constant(X)

            # fit model
            model = sm.OLS(y, X_).fit()
            
            # extract parameters and p-values
            gamma_est = model.params['wi']
            eta_est = model.params['zi']
            gamma_p = model.pvalues['wi']
            eta_p = model.pvalues['zi']

            # check sig
            gamma_sig = gamma_p < sig_lvl
            eta_sig = eta_p < sig_lvl
            full_sig = gamma_sig == True & eta_sig == True

            # calc confidence
            c_int = model.conf_int()
            
            # f-test
            f_stat = model.fvalue
            f_p = model.f_pvalue
            model_sig = f_p < sig_lvl

            # append results
            results.append({
                'Ticker': ric,
                'Date': date,
                'Gamma Estimate': model.params['wi'],
                'Gamma Significance': gamma_sig,
                'Eta Estimate': model.params['zi'],
                'Eta Significance': eta_sig,
                'Full Model Significance': full_sig,
                'CI Lower Bound (Gamma)': c_int.loc['wi', 0],
                'CI Upper Bound (Gamma)': c_int.loc['wi', 1],
                'F-statistic': f_stat,
                'Model p-value': f_p,
                'Model Significance': model_sig
            })

# convert to dataframe and save
results_df = pd.DataFrame(results)
results_df['Ticker'] = results_df['Ticker'].str.replace('.S', '', regex=False)
results_df.to_csv(os.path.join('..', 'models', 'liq_param', 'liq_estimates.csv'), index=False)

# display dataframe
display_scrollable(results_df)

  return self.mse_model/self.mse_resid
  return self.mse_model/self.mse_resid


Unnamed: 0,Ticker,Date,Gamma Estimate,Gamma Significance,Eta Estimate,Eta Significance,Full Model Significance,CI Lower Bound (Gamma),CI Upper Bound (Gamma),F-statistic,Model p-value,Model Significance
0,ABBN,2023-12-21,1.021534e-10,True,-5.060023e-06,False,False,8.303633e-11,1.212705e-10,55.226406,3.2795409999999998e-24,True
1,ABBN,2023-12-22,7.511271e-11,False,-2.07733e-06,False,False,-3.095871e-11,1.811841e-10,0.97389,0.3778031,False
2,ABBN,2023-12-27,7.792762e-11,True,-2.249701e-06,False,False,6.225694e-11,9.35983e-11,47.608643,6.027478e-21,True
3,ABBN,2023-12-28,6.964727e-11,False,-5.893593e-06,False,False,-5.321738e-11,1.925119e-10,0.628805,0.5333388,False
4,ABBN,2023-12-29,1.468496e-10,True,-4.809091e-06,False,False,1.11292e-10,1.824072e-10,33.027682,9.400913e-15,True
5,ABBN,2024-01-03,5.207383e-11,True,2.262591e-05,True,True,3.356559e-11,7.058206e-11,16.902386,4.95502e-08,True
6,ABBN,2024-01-04,1.494187e-10,True,-2.629378e-06,False,False,1.223034e-10,1.76534e-10,58.38085,1.255031e-25,True
7,ABBN,2024-01-05,6.237246e-11,True,-3.865757e-06,False,False,3.960757e-11,8.513735e-11,14.898859,3.72385e-07,True
8,ABBN,2024-01-08,1.497857e-10,True,-1.158277e-05,False,False,1.115382e-10,1.880331e-10,30.431351,9.23176e-14,True
9,ABBN,2024-01-09,1.798929e-10,True,3.412312e-06,False,False,1.464236e-10,2.133621e-10,55.781334,2.0999899999999998e-24,True
