In [233]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
from tqdm import tqdm
tqdm.pandas()

# My Python Modules
import FFIndustry as ffi
import FYP_fns as fyp
import importlib
importlib.reload(ffi)
importlib.reload(fyp)

import numpy.linalg as la
from scipy import stats

def hausman(fe, re):
    b = fe.params
    B = re.params
    v_b = fe.cov
    v_B = re.cov
    df = b[np.abs(b) < 1e8].size
    chi2 = np.dot((b - B).T, la.inv(v_b - v_B).dot(b - B)) 
 
    pval = stats.chi2.sf(chi2, df)
    return chi2, df, pval

def final_processing(df):
    # winsorize regression variables
    winsor_more = ['know_cap_AT', 'organ_cap_AT', 'intan_lessgdwl_AT',
                   'Market_to_Book', 'Op_profit', 'Cash_liq'] 

    winsor_less = ['LEV_TD', 'Mkt_LEV', 
                   'intan_cap_AT', 'PPENT_AT', 'intan_AT',
                   'log_asset']
    
    df_w = fyp.get_winsor_df(df, winsor_less, winsor_more, pct1=0.05, pct2=0.1)
    
    # divide firms with tangibility above their industry median tangibility: high tangibility=1, 0 otherwise
    df_w = fyp.Tangibility_dummy(df_w, visualize=False)
    # divide the fyears into two periods: After 2015 = 1, 0 otherwise
    df_w = fyp.Time_period_dummy(df_w, T=2015)
    df_w = fyp.heterogeneity_vars(df_w)

    industry_dummies = list(df_w['sic'].unique())
    industry_dummies.remove('Other') # base dummy variable for industry to be omitted to avoid dummy trap
    columns = ['gvkey','sic','High_Tan','After2015'] + industry_dummies
    for cat_var in columns:
        df_w[cat_var] = pd.Categorical(df_w[cat_var])
        
    return df_w

def label_sig_level(res):
    alpha_10 = res.pvalues < 0.1
    alpha_5 = res.pvalues < 0.05
    alpha_1 = res.pvalues < 0.01
    significance = pd.concat([res.params, alpha_10, alpha_5, alpha_1], axis=1)
    significance = significance.set_axis(['parameter','alpha_10','alpha_5','alpha_1'], axis=1)

    col         = ['alpha_10', 'alpha_5', 'alpha_1']
    conditions  = [significance[col].sum(axis=1) == 3, 
                   significance[col].sum(axis=1) == 2, 
                   significance[col].sum(axis=1) == 1]
    choices     = ["***", '**', '*']
    significance['sig_level'] = np.select(conditions, choices, default='')
    significance['parameter'] = significance['parameter'].round(4).astype(str) + significance['sig_level']
    significance = significance.drop(columns=col+['sig_level'])
    return significance

def adj_rsq(res):
    r_sq = res.rsquared
    N = res.nobs
    k = len(Book_Lev_Reg[1:])
    adj_r_sq = 1 - (((1-r_sq) * ((N-1))/(N-k-1)))
    return adj_r_sq

def run_twfem(df, reg_model):
    # Book Leverage TWFEM
    mod = PanelOLS(df[reg_model[0]], sm.add_constant(df[reg_model[1:]]), entity_effects=True, time_effects=True)
    res = mod.fit(cov_type='clustered', cluster_entity=True, cluster_time=True)

    adj_r = adj_rsq(res)
    twfem = label_sig_level(res).rename(columns={'parameter': 'twfem_book'})
    twfem.loc['adj_rsq'] = round(adj_r, 4)
    twfem.loc['Observations'] = res.nobs

    reg_report = twfem.iloc[1:]
    return reg_report

In [166]:
# call regression sample data
reg_df = pd.read_parquet('Generated Data/CleanCode_reg_df_des.parquet')
print('unbalanced reg data:', reg_df['gvkey'].nunique(), reg_df.shape)

robust_raw = pd.read_parquet('Generated Data/CleanCode_robust_raw_des.parquet')
print('robust data:', robust_raw['gvkey'].nunique(), robust_raw.shape)

reg_df_bal = pd.read_parquet('Generated Data/CleanCode_reg_df_bal_des.parquet')
print('balanced reg data:', reg_df_bal['gvkey'].nunique(), reg_df_bal.shape)

# process the sample data for regression variables
reg_df_w = final_processing(reg_df)
robust_raw_w = final_processing(robust_raw)
reg_df_bal_w = final_processing(reg_df_bal)

unbalanced reg data: 3416 (32908, 48)
robust data: 6383 (126514, 48)
balanced reg data: 1641 (21333, 48)


In [235]:
# defining regression variable labels
DV = ['LEV_TD', 'Mkt_LEV']
key_IV = ['intan_cap_AT', 'PPENT_AT']
key_IV_parts = ['know_cap_AT', 'organ_cap_AT', 'intan_AT', 'PPENT_AT']
key_IV_parts_LGDWL = ['know_cap_AT', 'organ_cap_AT', 'intan_lessgdwl_AT', 'PPENT_AT']
control = ['age', 'log_asset', 'Market_to_Book', 'Op_profit', 'Cash_liq', 'MTR_AI']
Hetero_vars = ['intan_cap_AT', 'High_Tan', 'After2015', 'HiTan_INT', 'T2_INT', 'HiTan_T2_INT']

# Main regression model
Book_Lev_Reg = [DV[0]] + key_IV + control
Mkt_Lev_Reg = [DV[1]] + key_IV + control

# Heterogeneity Analysis regression
Hetero_reg_book = [DV[0]] + Hetero_vars + Controls
Hetero_reg_mkt = [DV[1]] + Hetero_vars + Controls

# P&T Intangibles Subcomponent regression

# Benchmark main regression (Book Leverage)
Book_Lev_Reg = [DV[0]] + key_IV + control
subcom_reg_book1 = [DV[0]] + key_IV_parts + control
subcom_reg_book2 = [DV[0]] + key_IV_parts_LGDWL + control

# Benchmark main regression (Market Leverage)
Mkt_Lev_Reg = [DV[1]] + key_IV + control
subcom_reg_mkt1 = [DV[1]] + key_IV_parts + control
subcom_reg_mkt2 = [DV[1]] + key_IV_parts_LGDWL + control

df = reg_df_w.set_index(['gvkey', 'fyear'])

In [257]:
# FE und RE model
from linearmodels import PooledOLS
from linearmodels import PanelOLS
from linearmodels import RandomEffects
import statsmodels.api as sm

# # Perform PooledOLS
# mod = PooledOLS(df[Book_Lev_Reg[0]], sm.add_constant(df[Book_Lev_Reg[1:]]))
# res = mod.fit(cov_type='clustered', cluster_entity=True, cluster_time=True)

## BOOK LEVERAGE MODEL
# random effects model
model_re = RandomEffects(df[Book_Lev_Reg[0]], sm.add_constant(df[Book_Lev_Reg[1:]])) 
re_res = model_re.fit(cov_type='clustered', cluster_entity=True, cluster_time=True)

# fixed effects model
mod = PanelOLS(df[Book_Lev_Reg[0]], sm.add_constant(df[Book_Lev_Reg[1:]]), entity_effects=True, time_effects=True)
fe_res = mod.fit(cov_type='clustered', cluster_entity=True, cluster_time=True)

hausman_results = hausman(fe_res, re_res)
print('---BOOK LEVERAGE MODEL---')
print('chi-Squared: ' + str(hausman_results[0]))
print('degrees of freedom: ' + str(hausman_results[1]))
print('p-Value: ' + str(round(hausman_results[2],4)))


## MARKET LEVERAGE MODEL
# random effects model
model_re = RandomEffects(df[Mkt_Lev_Reg[0]], sm.add_constant(df[Mkt_Lev_Reg[1:]])) 
re_res = model_re.fit(cov_type='clustered', cluster_entity=True, cluster_time=True)

# fixed effects model
mod = PanelOLS(df[Mkt_Lev_Reg[0]], sm.add_constant(df[Mkt_Lev_Reg[1:]]), entity_effects=True, time_effects=True)
fe_res = mod.fit(cov_type='clustered', cluster_entity=True, cluster_time=True)

hausman_results = hausman(fe_res, re_res)
print('---MARKET LEVERAGE MODEL---')
print('chi-Squared: ' + str(hausman_results[0]))
print('degrees of freedom: ' + str(hausman_results[1]))
print('p-Value: ' + str(round(hausman_results[2],4)))

---BOOK LEVERAGE MODEL---
chi-Squared: 88.87909391956627
degrees of freedom: 9
p-Value: 0.0
---MARKET LEVERAGE MODEL---
chi-Squared: 48.724059003284765
degrees of freedom: 9
p-Value: 0.0


In [222]:
from linearmodels.panel import PanelOLS

# Main Book Leverage TWFEM
mod = PanelOLS(df[Book_Lev_Reg[0]], sm.add_constant(df[Book_Lev_Reg[1:]]), entity_effects=True, time_effects=True)
res = mod.fit(cov_type='clustered', cluster_entity=True, cluster_time=True)

adj_r = adj_rsq(res)
twfem_book = label_sig_level(res).rename(columns={'parameter': 'twfem_book'})
twfem_book.loc['adj_rsq'] = round(adj_r, 4)
twfem_book.loc['Observations'] = res.nobs

# Main Market Leverage TWFEM
mod = PanelOLS(df[Mkt_Lev_Reg[0]], sm.add_constant(df[Mkt_Lev_Reg[1:]]), entity_effects=True, time_effects=True)
res = mod.fit(cov_type='clustered', cluster_entity=True, cluster_time=True)

adj_r = adj_rsq(res)
twfem_mkt = label_sig_level(res).rename(columns={'parameter': 'twfem_mkt'})
twfem_mkt.loc['adj_rsq'] = round(adj_r, 4)
twfem_mkt.loc['Observations'] = res.nobs


# Main regression report
main_report = pd.concat([twfem_book, twfem_mkt], axis=1).iloc[1:]

## Appendix G: Table 4
main_report

Unnamed: 0,twfem_book,twfem_mkt
intan_cap_AT,0.0258***,0.0173***
PPENT_AT,0.2246***,0.1651***
age,-0.0931**,-0.0686**
log_asset,0.0213***,0.0312***
Market_to_Book,-0.001,-0.0108***
Op_profit,-0.1713***,-0.2244***
Cash_liq,-0.1294***,-0.0725***
MTR_AI,0.0032,-0.0236***
adj_rsq,0.0683,0.1549
Observations,32908,32908


In [223]:
# Heterogeneity Analysis of P&T Intangibles FEM with Firm Fixed Effects

# Book Leverage Model
mod = PanelOLS(df[Hetero_reg_book[0]], sm.add_constant(df[Hetero_reg_book[1:]]), entity_effects=True)
res = mod.fit(cov_type='clustered', cluster_entity=True, cluster_time=True)

adj_r = adj_rsq(res)
hetero_book = label_sig_level(res).rename(columns={'parameter': 'twfem_book'})
hetero_book.loc['adj_rsq'] = round(adj_r, 4)
hetero_book.loc['Observations'] = res.nobs

# Market Leverage Model
mod = PanelOLS(df[Hetero_reg_mkt[0]], sm.add_constant(df[Hetero_reg_mkt[1:]]), entity_effects=True)
res = mod.fit(cov_type='clustered', cluster_entity=True, cluster_time=True)

adj_r = adj_rsq(res)
hetero_mkt = label_sig_level(res).rename(columns={'parameter': 'twfem_book'})
hetero_mkt.loc['adj_rsq'] = round(adj_r, 4)
hetero_mkt.loc['Observations'] = res.nobs

# Main regression report
hetero_report = pd.concat([hetero_book, hetero_mkt], axis=1).iloc[1:]

## Appendix G: Table 5
hetero_report

Unnamed: 0,twfem_book,twfem_book.1
intan_cap_AT,0.0151,0.0164**
High_Tan.1,0.0286**,0.0234***
After2015.1,-0.0054,0.0054
HiTan_INT,-0.0174,-0.0152**
T2_INT,0.0046,-0.0046
HiTan_T2_INT,0.0274***,0.0121**
age,0.0062***,0.0022*
log_asset,0.0193***,0.0296***
Market_to_Book,-0.0011,-0.0111***
Op_profit,-0.2002***,-0.2419***


In [226]:
# Measurement Bias Test: Subcomponent regression of P&T Intanglible Assets
main_reg_book = run_twfem(df, Book_Lev_Reg)
sub_reg_book1 = run_twfem(df, subcom_reg_book1)
sub_reg_book2 = run_twfem(df, subcom_reg_book2)

main_reg_mkt = run_twfem(df, Mkt_Lev_Reg)
sub_reg_mkt1 = run_twfem(df, subcom_reg_mkt1)
sub_reg_mkt2 = run_twfem(df, subcom_reg_mkt2)

subcomp_reg_report = pd.concat([main_reg_book, sub_reg_book1, sub_reg_book2,
                                main_reg_mkt, sub_reg_mkt1, sub_reg_mkt2], axis=1).replace({np.nan: ''})

subcomp_reg_report = subcomp_reg_report.set_axis(['main_reg_book', 'sub_reg_book1', 'sub_reg_book2',
                                                  'main_reg_mkt', 'sub_reg_mkt1', 'sub_reg_mkt2'], axis=1)

# row_to_move = df.iloc[3]

# df = df.drop(3)
# df = pd.concat([row_to_move.to_frame().T, df])

# Table 6
subcomp_reg_report

Unnamed: 0,main_reg_book,sub_reg_book1,sub_reg_book2,main_reg_mkt,sub_reg_mkt1,sub_reg_mkt2
intan_cap_AT,0.004,,,0.0099**,,
PPENT_AT,0.1131***,0.1683***,0.1365***,0.0916***,0.1236***,0.1037***
age,0.0073,0.0061,0.0073,0.0152,0.0143,0.0151
log_asset,0.0118***,0.0051***,0.008***,0.0214***,0.0179***,0.0198***
Market_to_Book,0.0048***,0.0047***,0.0045***,-0.0152***,-0.0152***,-0.0153***
Op_profit,-0.2921***,-0.2895***,-0.2934***,-0.2889***,-0.2883***,-0.2907***
Cash_liq,-0.1964***,-0.1641***,-0.1831***,-0.1278***,-0.1087***,-0.1206***
MTR_AI,-0.0684***,-0.0739***,-0.0755***,-0.0739***,-0.0766***,-0.0776***
adj_rsq,0.0985,0.1137,0.1081,0.1874,0.1949,0.1915
Observations,126514,126514,126514,126514,126514,126514


In [227]:
# Survivorship Bias Robustness Test

# Original regression sample (Unbalanced Panel Data, 2009-2021)
df = reg_df_w.set_index(['gvkey', 'fyear'])
main_reg_book = run_twfem(df, Book_Lev_Reg)
main_reg_mkt = run_twfem(df, Mkt_Lev_Reg)

# Strongly Balanced regression sample(2009-2021): higher exposure to survivorship bias
df = reg_df_bal_w.set_index(['gvkey', 'fyear'])
most_bias_book = run_twfem(df, Book_Lev_Reg)
most_bias_mkt = run_twfem(df, Mkt_Lev_Reg)


# Unbalanced Panel Data minimizing survivorship bias (1975-2021)
df = robust_raw_w.set_index(['gvkey', 'fyear'])
least_bias_book = run_twfem(df, Book_Lev_Reg)
least_bias_mkt = run_twfem(df, Mkt_Lev_Reg)


bias_reg_report = pd.concat([main_reg_book, most_bias_book, least_bias_book,
                             main_reg_mkt, most_bias_mkt, least_bias_mkt], axis=1).replace({np.nan: ''})


bias_reg_report = bias_reg_report.set_axis(['main_reg_book', 'most_bias_book', 'least_bias_book',
                                            'main_reg_mkt', 'most_bias_mkt', 'least_bias_mkt'], axis=1)
# Table 7
bias_reg_report

Unnamed: 0,main_reg_book,most_bias_book,least_bias_book,main_reg_mkt,most_bias_mkt,least_bias_mkt
intan_cap_AT,0.0258***,0.0572***,0.004,0.0173***,0.0223**,0.0099**
PPENT_AT,0.2246***,0.2793***,0.1131***,0.1651***,0.2157***,0.0916***
age,-0.0931**,-0.0715,0.0073,-0.0686**,-0.0354,0.0152
log_asset,0.0213***,0.0371***,0.0118***,0.0312***,0.0296***,0.0214***
Market_to_Book,-0.001,0.0045***,0.0048***,-0.0108***,-0.0117***,-0.0152***
Op_profit,-0.1713***,-0.2821***,-0.2921***,-0.2244***,-0.351***,-0.2889***
Cash_liq,-0.1294***,-0.1008***,-0.1964***,-0.0725***,-0.0805***,-0.1278***
MTR_AI,0.0032,-0.0309***,-0.0684***,-0.0236***,-0.0269***,-0.0739***
adj_rsq,0.0683,0.1003,0.0985,0.1549,0.1898,0.1874
Observations,32908,21333,126514,32908,21333,126514


In [232]:
# Survivorship Bias and Measurement Bias Robustness Test

# Original regression sample (Unbalanced Panel Data, 2009-2021)
df = reg_df_w.set_index(['gvkey', 'fyear'])
main_reg_book = run_twfem(df, subcom_reg_book1)
main_reg_mkt = run_twfem(df, subcom_reg_mkt1)

# Strongly Balanced regression sample(2009-2021): higher exposure to survivorship bias
df = reg_df_bal_w.set_index(['gvkey', 'fyear'])
most_bias_book = run_twfem(df, subcom_reg_book1)
most_bias_mkt = run_twfem(df, subcom_reg_mkt1)


# Unbalanced Panel Data minimizing survivorship bias (1975-2021)
df = robust_raw_w.set_index(['gvkey', 'fyear'])
least_bias_book = run_twfem(df, subcom_reg_book1)
least_bias_mkt = run_twfem(df, subcom_reg_mkt1)


both_bias_reg_report = pd.concat([main_reg_book, most_bias_book, least_bias_book,
                                  main_reg_mkt, most_bias_mkt, least_bias_mkt], axis=1).replace({np.nan: ''})


both_bias_reg_report = both_bias_reg_report.set_axis(['main_reg_book', 'most_bias_book', 'least_bias_book',
                                                      'main_reg_mkt', 'most_bias_mkt', 'least_bias_mkt'], axis=1)
# Table 7
both_bias_reg_report

Unnamed: 0,main_reg_book,most_bias_book,least_bias_book,main_reg_mkt,most_bias_mkt,least_bias_mkt
know_cap_AT,-0.0461*,0.0349,-0.0999***,-0.0243,0.0033,-0.061***
organ_cap_AT,-0.0201,-0.0337,-0.0449***,0.0164,-0.0376,0.0022
intan_AT,0.1576***,0.2178***,0.166***,0.0937***,0.1249***,0.1067***
PPENT_AT,0.291***,0.3645***,0.1683***,0.2012***,0.271***,0.1236***
age,-0.0934**,-0.0855,0.0061,-0.0683**,-0.0436,0.0143
log_asset,0.0105***,0.0177***,0.0051***,0.0263***,0.0177***,0.0179***
Market_to_Book,-0.0006,0.0045***,0.0047***,-0.0106***,-0.0117***,-0.0152***
Op_profit,-0.1748***,-0.2866***,-0.2895***,-0.2252***,-0.3508***,-0.2883***
Cash_liq,-0.0862***,-0.0471***,-0.1641***,-0.048***,-0.0461***,-0.1087***
MTR_AI,-0.003,-0.0319***,-0.0739***,-0.0259***,-0.0277***,-0.0766***
