In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import ttest_rel
import utils
import gc
import importlib
importlib.reload(utils)

<module 'utils' from 'D:\\Master Thesis\\Code\\codes - clean\\utils.py'>

In [2]:
gc.collect()

52

In [3]:
df = pd.read_csv('data/data_merged_fluidshift.csv')

In [4]:
df.columns

Index(['PERMNO', 'y', 'qtr', 'GIND', 'SIC', 'RET', 'vwretd', 'SALES', 'COGS',
       'GP', 'OPEX', 'EBITDA', 'D&A', 'EBIT', 'NI', 'INVENTORY', 'CA', 'CL',
       'COA', 'COL', 'TA', 'NOA', 'PSTK', 'BE', 'LT', 'STDEBT', 'LTDEBT',
       'EQIS', 'MI', 'NWC', 'NCOA', 'COWC', 'CASHST', 'NCOFA', 'NCOL',
       'NETDEBT', 'NNCOA', 'OACC', 'OCF', 'MC', 'resff3_12_1', 'mom_12_1',
       'zero_trades_252', 'zero_trades_21', 'turnover_126d', 'ivol_ff3_21d',
       'rmax5_rvol_21d', 'firm_age', 'seas_6_10an', 'seas_6_10na',
       'seas_11_15na', 'seas_20_16an'],
      dtype='object')

### Defining the Variables

In [10]:
# Lagged NI for CHIN and INTWO
df['NI_lag'] = df.groupby('PERMNO')['NI'].shift(1)

# TLTA: Total Liabilities to Total Assets
df['TLTA'] = df['LT'] / df['TA']

# WCTA: Working Capital to Total Assets
df['WCTA'] = (df['CA'] - df['CL']) / df['TA']

# CLCA: Current Liabilities to Current Assets
df['CLCA'] = df['CL'] / df['CA']

# NITA: Net Income to Total Assets
df['NITA'] = df['NI'] / df['TA']

# FUTL: Funds from Operations to Total Liabilities (proxied as OCF / LT)
df['FUTL'] = df['OCF'] / df['LT']

# OENEG: 1 if total liabilities > total assets
df['OENEG'] = (df['LT'] > df['TA']).astype(int)

# INTWO: Net income negative in last two years
df['INTWO'] = ((df['NI'] < 0) & (df['NI_lag'] < 0)).astype(int)

# CHIN: Change in NI / (|NI| + |NI_lag|)
denom = df['NI'].abs() + df['NI_lag'].abs()
df['CHIN'] = (df['NI'] - df['NI_lag']) / denom.replace(0, np.nan)

# Avoid infinite values
df['CHIN'] = df['CHIN'].replace([np.inf, -np.inf], np.nan)

# O-Score Calculation
df['o_score'] = (
    -1.32
    - 0.407 * np.log(df['TA'].replace(0, np.nan))
    + 6.03 * df['TLTA']
    - 1.43 * df['WCTA']
    + 0.0757 * df['CLCA']
    - 1.72 * df['OENEG']           
    - 2.37 * df['NITA']
    - 1.83 * df['FUTL']
    + 0.285 * df['INTWO']
    - 0.521 * df['CHIN']
)

  result = getattr(ufunc, method)(*inputs, **kwargs)


In [12]:
# Change in NWC
df['NWC_lag'] = df.groupby('PERMNO')['NWC'].shift(1)
df['delta_NWC'] = df['NWC'] - df['NWC_lag']
df['cop_at'] = (df['EBITDA'] - df['delta_NWC']) / df['TA']

# Change in NOA over lagged total assets
df['NOA_lag'] = df.groupby('PERMNO')['NOA'].shift(1)
df['TA_lag'] = df.groupby('PERMNO')['TA'].shift(1)
df['noa_gr1a'] = (df['NOA'] - df['NOA_lag']) / df['TA_lag']

# Sales growth
df['SALES_lag'] = df.groupby('PERMNO')['SALES'].shift(1)
df['saleq_gr1'] = (df['SALES'] - df['SALES_lag']) / df['SALES_lag'].abs()

# Debt to market cap
df['debt_me'] = (df['LT'] + df['STDEBT']) / df['MC']

# change in current operating working capital
df['cowc_gr1a'] = (df['COWC'] - df.groupby('PERMNO')['COWC'].shift(1)) / df.groupby('PERMNO')['TA'].shift(1)

# Change in non-current operating assets over lagged total assets
df['NNCOA_lag'] = df.groupby('PERMNO')['NNCOA'].shift(1)
df['nncoa_gr1a'] = (df['NNCOA'] - df['NNCOA_lag']) / df['TA_lag']

# Operating cash flow to market cap
df['ocf_me'] = df['OCF'] / df['MC']

# Quarterly Net Income to Total Assets
df['niq_at'] = df['NI'] / df['TA']

# Compute rolling autocorrelation of NI with lag 1
m = 8
df['ni_arl'] = df.groupby('PERMNO')['NI'].transform(
    lambda x: x.rolling(m, min_periods=m).apply(lambda y: y.autocorr(lag=1), raw=False)
)

# Earnings to Price Ratio
df['ni_me'] = df['NI'] / df['MC']

# Sales Change minus Inventory Change
df['SALES_lag'] = df.groupby('PERMNO')['SALES'].shift(1)
df['INVENTORY_lag'] = df.groupby('PERMNO')['INVENTORY'].shift(1)
df['dsale_dinv'] = ((df['SALES'] - df['SALES_lag']) / df['SALES_lag'].abs() - (df['INVENTORY'] - df['INVENTORY_lag']) / df['INVENTORY_lag'].abs())

# Return on Equity
df['ni_be'] = df['NI'] / df['BE']

# NOA to AT
df['noa_at'] = df['NOA'] / df['TA']

# Compute change in net financial assets: cash + short-term investments - total debt-1
df['TA_lag'] = df.groupby('PERMNO')['TA'].shift(1)
df['nfna'] = df['CASHST'].fillna(0) - (df['LTDEBT'].fillna(0) + df['STDEBT'].fillna(0))
df['nfna_grla'] = df['nfna'] / df['TA_lag']

# Total Assets divided by Market Cap
df['at_me'] = df['TA'] / df['MC']
gc.collect()

9

In [13]:
df_mc_filtered = df[df['MC'] >= 50].copy()

cols_to_keep = [
    'PERMNO', 'y', 'qtr', 'GIND', 'SIC', 'RET', 'vwretd', 'MC',
    'cop_at', 'noa_gr1a', 'saleq_gr1', 'resff3_12_1', 'seas_6_10an', 'debt_me',
    'seas_6_10na', 'zero_trades_252', 'zero_trades_21', 'cowc_gr1a', 'nncoa_gr1a', 'ocf_me',
    'turnover_126d', 'rmax5_rvol_21d', 'seas_11_15na', 'o_score', 'niq_at',
    'seas_20_16an', 'ni_arl', 'ivol_ff3_21d', 'ni_me', 'dsale_dinv', 'ni_be',
    'noa_at', 'firm_age', 'mom_12_1', 'nfna_grla', 'at_me'
]

data_merged_clean = df_mc_filtered[cols_to_keep].copy()
data_merged_clean.to_csv("data_merged_fluidshift.csv", index=False)

In [14]:
df = pd.read_csv("data_merged_fluidshift.csv")

# Create quarter label
df['quarter_label'] = df['y'].astype(str) + 'Q' + df['qtr'].astype(str)

# Choose industry grouping
industry_group = 'GIND' 

# List of variables to rank
rank_vars = [
    'cop_at', 'noa_gr1a', 'saleq_gr1', 'resff3_12_1', 'seas_6_10an', 'debt_me',
    'seas_6_10na', 'zero_trades_252', 'zero_trades_21', 'cowc_gr1a', 'nncoa_gr1a', 'ocf_me',
    'turnover_126d', 'rmax5_rvol_21d', 'seas_11_15na', 'o_score', 'niq_at',
    'seas_20_16an', 'ni_arl', 'ivol_ff3_21d', 'ni_me', 'dsale_dinv', 'ni_be',
    'noa_at', 'firm_age', 'mom_12_1', 'nfna_grla', 'at_me'
]

# Compute percentile ranks
for var in rank_vars:
    df[f'{var}_pct'] = (
        df.groupby([industry_group, 'quarter_label'])[var]
        .transform(lambda x: x.rank(pct=True))
    )

# Final columns: identifiers + only _pct variables
id_cols = ['PERMNO', 'y', 'qtr', 'GIND', 'SIC', 'RET', 'vwretd', 'MC']
pct_cols = [f'{var}_pct' for var in rank_vars]
data_merged_pct = df[id_cols + pct_cols].copy()

# Save to CSV
data_merged_pct.to_csv("data_merged_pctfluidshiftpct.csv", index=False)