## Imports and Functions

We begin by loading the necessary packages/modules and define functions to calculate the log price ratio for our regression and for viewing dataframes.

In [43]:
import os
import pandas as pd
import numpy as np
import statsmodels.api as sm
from IPython.display import display, HTML

def calculate_log_price_ratio(last_trade_prices):
    # calc log(vi+1/vi)
    return np.log(last_trade_prices / last_trade_prices.shift(1))

def display_scrollable(df):
    """
    Displays a pandas DataFrame as a scrollable HTML table in a Jupyter Notebook.

    Args:
    df (pandas.DataFrame): The DataFrame to display.
    """
    df_html = df.to_html()
    scrollable_table = f'''
    <div style="overflow-x: auto; white-space: nowrap; max-height:400px; overflow-y:scroll;">
        {df_html}
    </div>
    '''
    display(HTML(scrollable_table))

## Initialization

Initialize a list to hold regression results. We then import and sort [`tick_data.csv`](<..\data\processed\tick_data.csv>) by time after converting 'Timestamp' to a datatime object and adjusting size based on tick direction.

In [44]:
results = []

df = pd.read_csv(os.path.join('..', 'data', 'processed', 'tick_data.csv'))

# convert 'Timestamp' to a datetime object and sort
df['Timestamp'] = pd.to_datetime(df['Timestamp'])
df.sort_values(by=['RIC', 'Timestamp'], inplace=True)

# adjust 'Volume' based on 'Tick' direction
df['Adjusted Volume'] = df.apply(lambda row: row['Volume'] if row['Tick'] == 'UP' or row['Tick'] == 'n' else -row['Volume'], axis=1)
display_scrollable(df.head(20))

Unnamed: 0,RIC,Timestamp,Tick,Last Trade,Volume,Adjusted Volume
23141,ABBN.S,2024-01-26 09:00:00.038,n,36.66,28432.0,28432.0
23140,ABBN.S,2024-01-26 09:00:00.101,UP,36.68,28388.0,28388.0
23139,ABBN.S,2024-01-26 09:00:00.118,DOWN,36.66,29532.0,-29532.0
23138,ABBN.S,2024-01-26 09:00:00.221,DOWN,36.59,29643.0,-29643.0
23136,ABBN.S,2024-01-26 09:00:00.228,UP,36.62,30583.0,30583.0
23134,ABBN.S,2024-01-26 09:00:00.235,UP,36.62,30718.0,30718.0
23133,ABBN.S,2024-01-26 09:00:00.237,UP,36.62,30624.0,30624.0
23128,ABBN.S,2024-01-26 09:00:00.518,UP,36.62,30872.0,30872.0
23126,ABBN.S,2024-01-26 09:00:01.034,UP,36.62,30574.0,30574.0
23125,ABBN.S,2024-01-26 09:00:01.392,DOWN,36.6,31316.0,-31316.0


## Run the regression

We perform a nested loop through each equity and trading day to obtain daily liquidity estimates. We then append the results into a dataframe and save it to the [`liquidity parameter`](<..\models\liquidity parameter>) file directory.

In [45]:
for ric, ric_group in df.groupby('RIC'):
    for date, group in ric_group.groupby(ric_group['Timestamp'].dt.date):
        # check if the group is empty
        if group.empty:
            continue

        # sort again just in case
        group.sort_values('Timestamp', inplace=True)

        # calc yi using 'Last Trade' column
        group['log_price_ratio'] = calculate_log_price_ratio(group['Last Trade'])

        # calculate predictors
        group['ti_diff'] = group['Timestamp'].diff().dt.total_seconds()
        group['wi'] = (group['Adjusted Volume'].diff() / group['ti_diff'].pow(0.5))
        group['zi'] = group['ti_diff'].pow(0.5)

        # remove nan rows due to diff
        group = group.dropna(subset=['log_price_ratio', 'wi', 'zi'])

        # check if group has enough points to fit
        if group.shape[0] > 1:
            # prep vars for regression
            X = group[['wi', 'zi']]
            y = group['log_price_ratio']

            # fit ols
            model = sm.OLS(y, X).fit()
            
            # extract names
            predictors = model.params.index
            
            # check if desired predictor is present
            if 'wi' in predictors:
                # calc confidence intervals
                confidence_intervals = model.conf_int()

                # append to results list
                results.append({
                    'RIC': ric,
                    'Date': date,
                    'Gamma Estimate': model.params['wi'],
                    'CI Lower Bound': confidence_intervals.loc['wi', 0],
                    'CI Upper Bound': confidence_intervals.loc['wi', 1]
                })
            else:
                print(f"Predictor 'wi' not found in the model's parameters for RIC {ric} on {date}")

# convert list to dataframe and save
results_df = pd.DataFrame(results)
results_df.to_csv(os.path.join('..', 'models', 'liquidity parameter', 'liq_results.csv'))
display_scrollable(results_df)

  return np.dot(wresid, wresid) / self.df_resid
  return np.dot(wresid, wresid) / self.df_resid
  return np.dot(wresid, wresid) / self.df_resid
  return np.dot(wresid, wresid) / self.df_resid
  return np.dot(wresid, wresid) / self.df_resid


Unnamed: 0,RIC,Date,Gamma Estimate,CI Lower Bound,CI Upper Bound
0,ABBN.S,2024-01-26,4.191601e-11,3.040212e-11,5.342991e-11
1,ABBN.S,2024-01-29,2.273759e-11,9.294902e-12,3.618027e-11
2,ABBN.S,2024-01-30,4.221952e-11,2.774471e-11,5.669434e-11
3,ABBN.S,2024-01-31,4.090968e-11,1.99782e-11,6.184117e-11
4,ABBN.S,2024-02-01,5.952353e-11,4.044658e-11,7.860047e-11
5,ABBN.S,2024-02-02,4.12269e-11,3.026069e-11,5.21931e-11
6,ABBN.S,2024-02-05,3.152679e-11,1.946595e-11,4.358764e-11
7,ABBN.S,2024-02-06,4.068532e-11,4.014604e-13,8.096917e-11
8,ABBN.S,2024-02-07,5.974943e-11,4.036805e-11,7.91308e-11
9,ABBN.S,2024-02-08,1.43925e-10,1.132134e-10,1.746366e-10
