In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
import missingno as msno
import seaborn as sns
pd.set_option('display.max_columns', None)
pd.set_option('display.float_format', lambda x: '%.4f' % x)
import warnings
warnings.simplefilter('ignore') # 
from pandarallel import pandarallel
 
# Initialization
pandarallel.initialize()

INFO: Pandarallel will run on 12 workers.
INFO: Pandarallel will use standard multiprocessing data transfer (pipe) to transfer data between the main process and workers.


In [2]:
mutual_fund_holding = pd.read_csv('mutual_fund_holding_noDrops.csv')

In [30]:
unique_wficn_counts = mutual_fund_holding.groupby(['permno', 'quarter'])['wficn'].nunique().reset_index(name='num_fund_hold')

In [17]:
# Step 1: Calculate the total shares for each stock at each quarter
total_shares = mutual_fund_holding.groupby(['permno', 'quarter'])['shares'].sum().reset_index(name='total_shares')

# Step 2: Merge the total shares back to the original DataFrame
df_merged = pd.merge(mutual_fund_holding, total_shares, on=['permno', 'quarter'])

# Step 3: Calculate the share of each fund for each stock at each quarter
df_merged['s_j_i_t'] = df_merged['shares'] / df_merged['total_shares']

# Step 4: Compute the squared share of each fund
df_merged['s_j_i_t_squared'] = df_merged['s_j_i_t'] ** 2

# Step 5: Sum the squared shares to get the HHI for each stock at each quarter
hhi = df_merged.groupby(['permno', 'quarter'])['s_j_i_t_squared'].sum().reset_index(name='HHI')


In [31]:
investor_concentration_proxy = hhi.merge(unique_wficn_counts,on=['permno','quarter'],how='outer')

In [32]:
investor_concentration_proxy

Unnamed: 0,permno,quarter,HHI,num_fund_hold
0,10001,1993Q4,1.0000,1
1,10001,1994Q1,0.5470,2
2,10001,1994Q2,0.5470,2
3,10001,1994Q3,0.5470,2
4,10001,1994Q4,0.4949,3
...,...,...,...,...
812341,93436,2019Q3,0.0642,205
812342,93436,2019Q4,0.0616,219
812343,93436,2020Q1,0.0614,271
812344,93436,2020Q2,0.0630,331


In [36]:
stock_sparsity =pd.read_csv('2024_06_04_stock_sparsity_added_avg_ret_alpha_AC_htv_proxies.csv')

In [24]:
coverage_ratio = pd.read_csv('2024_04_20_stock_sparisty_coverage_ratio.csv')

In [33]:
investor_concentration_proxy_cr = coverage_ratio[['permno', 'quarter','percent_within_3_coverage_ratio',
       'percent_within_7_coverage_ratio', 'percent_within_11_coverage_ratio',
       'percent_within_15_coverage_ratio', 'percent_within_19_coverage_ratio',
       'percent_benchmark_0_coverage_ratio',
       'percent_benchmark_3_coverage_ratio',
       'percent_benchmark_7_coverage_ratio',
       'percent_benchmark_11_coverage_ratio',
       'percent_benchmark_15_coverage_ratio',
       'percent_benchmark_19_coverage_ratio',
       'percent_benchmark_median_passive_coverage_ratio',
       'percent_benchmark_avg_passive_coverage_ratio',
       'percent_benchmark_largest_passive_coverage_ratio']].merge(investor_concentration_proxy,on=['permno','quarter'],how='outer')

In [39]:
investor_concentration_proxy_cr

Unnamed: 0,permno,quarter,percent_within_3_coverage_ratio,percent_within_7_coverage_ratio,percent_within_11_coverage_ratio,percent_within_15_coverage_ratio,percent_within_19_coverage_ratio,percent_benchmark_0_coverage_ratio,percent_benchmark_3_coverage_ratio,percent_benchmark_7_coverage_ratio,percent_benchmark_11_coverage_ratio,percent_benchmark_15_coverage_ratio,percent_benchmark_19_coverage_ratio,percent_benchmark_median_passive_coverage_ratio,percent_benchmark_avg_passive_coverage_ratio,percent_benchmark_largest_passive_coverage_ratio,HHI,num_fund_hold
0,10001,1993Q4,,,,,,,,,,,,,,,1.0000,1
1,10001,1994Q1,,,,,,,,,,,,,,,0.5470,2
2,10001,1994Q2,,,,,,,,,,,,,,,0.5470,2
3,10001,1994Q3,0.0047,0.0047,0.0047,0.0047,0.0047,,,,,,,,,,0.5470,2
4,10001,1994Q4,,,,,,,,,,,,,,,0.4949,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
812341,93436,2019Q3,0.0391,0.0386,0.0352,0.0305,0.0304,0.0189,0.0206,0.0220,0.0219,0.0219,0.0219,0.0226,0.0220,0.0220,0.0642,205
812342,93436,2019Q4,0.0671,0.0669,0.0669,0.0669,0.0669,0.0360,0.0382,0.0382,0.0382,0.0382,0.0382,0.0384,0.0381,0.0393,0.0616,219
812343,93436,2020Q1,0.0476,0.0431,0.0362,0.0359,0.0359,0.0225,0.0238,0.0238,0.0238,0.0238,0.0238,0.0240,0.0249,0.0239,0.0614,271
812344,93436,2020Q2,0.0771,0.0771,0.0771,0.0771,0.0771,0.0356,0.0376,0.0376,0.0376,0.0376,0.0376,0.0412,0.0399,0.0399,0.0630,331


In [40]:
stock_sparsity_icp = stock_sparsity.merge(investor_concentration_proxy_cr,on=['permno','quarter'],how='outer')

In [42]:
stock_sparsity_icp.drop(columns=['Unnamed: 0'],inplace=True)

In [44]:
stock_sparsity_icp.to_csv('2024_06_16_stock_sparsity_added_avg_ret_alpha_AC_htv_ivc_proxies.csv',index=False)