In [1]:
import pandas as pd
import numpy as np
from tqdm import tqdm
from pandas.tseries.offsets import BQuarterEnd
pd.set_option('display.max_columns', None)
import warnings
warnings.simplefilter('ignore') #

## Calculate Forward Returns and Alpha Daily Average

In [2]:
daily_returns = pd.read_csv('daily_rets_and_alphas.csv')

In [3]:
#Cut the dataframe, To be more memory-efficent
daily_returns = daily_returns[['PERMNO','DATE','RET','alpha']]

In [4]:
# Convert the 'DATE' column to a datetime
daily_returns['DATE'] = pd.to_datetime(daily_returns['DATE'])

# Convert RET column to numeric, stripping out the '%' and converting to float
# Replace non-numeric values with NaN to avoid conversion errors
daily_returns['RET'] = pd.to_numeric(daily_returns['RET'].str.replace('%', ''), errors='coerce') / 100
daily_returns['alpha'] = pd.to_numeric(daily_returns['alpha'], errors='coerce')

# Calculate the last day of each quarter for each date
daily_returns['quarter'] = daily_returns['DATE'].dt.to_period('Q')

## Convert daily return to quarterly return first, to simplify the for loop later

In [5]:
quarterly_returns = daily_returns.groupby(['PERMNO', 'quarter'])[['RET', 'alpha']].mean()

In [6]:
quarterly_returns.reset_index(inplace=True)

# Start Calculating the Returns and Alpha for each h

In [7]:
# Define the h values
h_values = [0, 3, 7, 11, 15, 19]

In [8]:
# Initialize the forward_returns dictionary with lists for each key
forward_returns = {
    'PERMNO': [],
    'quarter': []
}
forward_returns.update({f'forward_{h}_avg_RET': [] for h in h_values})
forward_returns.update({f'forward_{h}_avg_alpha': [] for h in h_values})
forward_returns.update({f'if_ret_missing_{h}': [] for h in h_values})
forward_returns.update({f'if_alpha_missing_{h}': [] for h in h_values})
forward_returns.update({f'num_available_quarters_ret_{h}': [] for h in h_values})
forward_returns.update({f'num_available_quarters_alpha_{h}': [] for h in h_values})

In [9]:
forward_returns

{'PERMNO': [],
 'quarter': [],
 'forward_0_avg_RET': [],
 'forward_3_avg_RET': [],
 'forward_7_avg_RET': [],
 'forward_11_avg_RET': [],
 'forward_15_avg_RET': [],
 'forward_19_avg_RET': [],
 'forward_0_avg_alpha': [],
 'forward_3_avg_alpha': [],
 'forward_7_avg_alpha': [],
 'forward_11_avg_alpha': [],
 'forward_15_avg_alpha': [],
 'forward_19_avg_alpha': [],
 'if_ret_missing_0': [],
 'if_ret_missing_3': [],
 'if_ret_missing_7': [],
 'if_ret_missing_11': [],
 'if_ret_missing_15': [],
 'if_ret_missing_19': [],
 'if_alpha_missing_0': [],
 'if_alpha_missing_3': [],
 'if_alpha_missing_7': [],
 'if_alpha_missing_11': [],
 'if_alpha_missing_15': [],
 'if_alpha_missing_19': [],
 'num_available_quarters_ret_0': [],
 'num_available_quarters_ret_3': [],
 'num_available_quarters_ret_7': [],
 'num_available_quarters_ret_11': [],
 'num_available_quarters_ret_15': [],
 'num_available_quarters_ret_19': [],
 'num_available_quarters_alpha_0': [],
 'num_available_quarters_alpha_3': [],
 'num_available_qu

In [10]:
quarterly_returns['quarter'] = pd.PeriodIndex(quarterly_returns['quarter'], freq='Q')

In [11]:
#quarterly_returns

In [12]:
grouped = quarterly_returns.groupby('PERMNO')

#### Precalculate the future_quarters to be speed up the calcualtion

In [13]:
def precalculate_future_quarters(start_quarters, h_values):
    """
    Pre-calculate future quarter ranges for a list of start quarters and horizon values.
    
    :param start_quarters: A list or series of unique starting quarters.
    :param h_values: A list of horizon values to calculate future quarters for.
    :return: A dictionary where each key is a tuple (start_quarter, h) and the value is the corresponding future quarters range.
    """
    future_quarters_dict = {}
    for start_quarter in start_quarters:
        for h in h_values:
            # Generate the range starting from the next quarter after the start quarter, for h quarters
            future_quarters_range = pd.period_range(start=start_quarter, periods=h+2, freq='Q')[1:]
            future_quarters_dict[(start_quarter, h)] = future_quarters_range
    return future_quarters_dict

# Usage example:
start_quarters = quarterly_returns['quarter'].unique()  # Example start quarters, replace with your actual data

# Pre-calculate future quarters ranges
precalculated_ranges = precalculate_future_quarters(start_quarters, h_values)

In [14]:
precalculated_ranges 

{(Period('1987Q1', 'Q-DEC'),
  0): PeriodIndex(['1987Q2'], dtype='period[Q-DEC]'),
 (Period('1987Q1', 'Q-DEC'),
  3): PeriodIndex(['1987Q2', '1987Q3', '1987Q4', '1988Q1'], dtype='period[Q-DEC]'),
 (Period('1987Q1', 'Q-DEC'),
  7): PeriodIndex(['1987Q2', '1987Q3', '1987Q4', '1988Q1', '1988Q2', '1988Q3',
              '1988Q4', '1989Q1'],
             dtype='period[Q-DEC]'),
 (Period('1987Q1', 'Q-DEC'),
  11): PeriodIndex(['1987Q2', '1987Q3', '1987Q4', '1988Q1', '1988Q2', '1988Q3',
              '1988Q4', '1989Q1', '1989Q2', '1989Q3', '1989Q4', '1990Q1'],
             dtype='period[Q-DEC]'),
 (Period('1987Q1', 'Q-DEC'),
  15): PeriodIndex(['1987Q2', '1987Q3', '1987Q4', '1988Q1', '1988Q2', '1988Q3',
              '1988Q4', '1989Q1', '1989Q2', '1989Q3', '1989Q4', '1990Q1',
              '1990Q2', '1990Q3', '1990Q4', '1991Q1'],
             dtype='period[Q-DEC]'),
 (Period('1987Q1', 'Q-DEC'),
  19): PeriodIndex(['1987Q2', '1987Q3', '1987Q4', '1988Q1', '1988Q2', '1988Q3',
              '1988

In [15]:
# Loop through each group
for permno, group in tqdm(grouped, desc='Calculating forward returns'):
    # Iterate over rows in group
    for i in range(len(group)-1):
        # Get the current quarter
        current_quarter = group.iloc[i]['quarter']
        # Append permno and current quarter to the dictionary
        forward_returns['PERMNO'].append(permno)
        forward_returns['quarter'].append(current_quarter)
        
        # Loop for each horizon h
        for h in h_values:
            # Calculate the future quarters range, excluding the current quarter
            #future_quarters_range = pd.period_range(start=current_quarter, periods=h+2, freq='Q')[1:]
            future_quarters_range = precalculated_ranges[(current_quarter, h)]
            future_quarters = group[group['quarter'].isin(future_quarters_range)]
            # Calculate the product of 'RET_one' and 'alpha_one' for the future quarters
            ret_mean = future_quarters['RET'].mean()
            alpha_mean = future_quarters['alpha'].mean()

            # Check if the number of future quarters is less than h
            len_future_quarters = len(future_quarters)

            if len_future_quarters < h+1:
                if_ret_missing = 1
                if_alpha_missing = 1
            else:
                if_ret_missing = 0
                if_alpha_missing = 0
            
            # Append results and available quarter counts
            forward_returns[f'forward_{h}_avg_RET'].append(ret_mean)
            forward_returns[f'forward_{h}_avg_alpha'].append(alpha_mean)
            forward_returns[f'if_ret_missing_{h}'].append(if_ret_missing)
            forward_returns[f'if_alpha_missing_{h}'].append(if_alpha_missing)
            forward_returns[f'num_available_quarters_ret_{h}'].append(len_future_quarters)
            forward_returns[f'num_available_quarters_alpha_{h}'].append(len_future_quarters)
# Convert the dictionary to a DataFrame
forward_returns_df = pd.DataFrame(forward_returns)

Calculating forward returns: 100%|███████████████████████████████████████████████| 34723/34723 [25:53<00:00, 22.35it/s]


In [16]:
forward_returns_df

Unnamed: 0,PERMNO,quarter,forward_0_avg_RET,forward_3_avg_RET,forward_7_avg_RET,forward_11_avg_RET,forward_15_avg_RET,forward_19_avg_RET,forward_0_avg_alpha,forward_3_avg_alpha,forward_7_avg_alpha,forward_11_avg_alpha,forward_15_avg_alpha,forward_19_avg_alpha,if_ret_missing_0,if_ret_missing_3,if_ret_missing_7,if_ret_missing_11,if_ret_missing_15,if_ret_missing_19,if_alpha_missing_0,if_alpha_missing_3,if_alpha_missing_7,if_alpha_missing_11,if_alpha_missing_15,if_alpha_missing_19,num_available_quarters_ret_0,num_available_quarters_ret_3,num_available_quarters_ret_7,num_available_quarters_ret_11,num_available_quarters_ret_15,num_available_quarters_ret_19,num_available_quarters_alpha_0,num_available_quarters_alpha_3,num_available_quarters_alpha_7,num_available_quarters_alpha_11,num_available_quarters_alpha_15,num_available_quarters_alpha_19
0,10000,1987Q1,-0.002268,-0.002268,-0.002268,-0.002268,-0.002268,-0.002268,-0.005898,-0.005898,-0.005898,-0.005898,-0.005898,-0.005898,0,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
1,10001,1987Q1,0.000124,0.000893,0.001074,0.001396,0.001120,0.001164,0.000244,0.000427,0.000488,0.000635,0.000785,0.000852,0,0,0,0,0,0,0,0,0,0,0,0,1,4,8,12,16,20,1,4,8,12,16,20
2,10001,1987Q2,0.002068,0.001188,0.001207,0.001403,0.001232,0.001183,0.000281,0.000516,0.000535,0.000720,0.000832,0.000889,0,0,0,0,0,0,0,0,0,0,0,0,1,4,8,12,16,20,1,4,8,12,16,20
3,10001,1987Q3,-0.000142,0.000911,0.001501,0.001274,0.001225,0.001292,0.000580,0.000584,0.000603,0.000803,0.000880,0.000923,0,0,0,0,0,0,0,0,0,0,0,0,1,4,8,12,16,20,1,4,8,12,16,20
4,10001,1987Q4,0.001522,0.001236,0.001844,0.001288,0.001492,0.001267,0.000602,0.000575,0.000664,0.000862,0.000923,0.000934,0,0,0,0,0,0,0,0,0,0,0,0,1,4,8,12,16,20,1,4,8,12,16,20
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1441784,93436,2021Q3,0.005559,0.000869,-0.001538,-0.001538,-0.001538,-0.001538,0.003097,0.003628,0.003549,0.003549,0.003549,0.003549,0,0,1,1,1,1,0,0,1,1,1,1,1,4,5,5,5,5,1,4,5,5,5,5
1441785,93436,2021Q4,0.001283,-0.003313,-0.003313,-0.003313,-0.003313,-0.003313,0.003440,0.003662,0.003662,0.003662,0.003662,0.003662,0,0,1,1,1,1,0,0,1,1,1,1,1,4,4,4,4,4,1,4,4,4,4,4
1441786,93436,2022Q1,-0.006441,-0.004845,-0.004845,-0.004845,-0.004845,-0.004845,0.003976,0.003736,0.003736,0.003736,0.003736,0.003736,0,1,1,1,1,1,0,1,1,1,1,1,1,3,3,3,3,3,1,3,3,3,3,3
1441787,93436,2022Q2,0.003074,-0.004046,-0.004046,-0.004046,-0.004046,-0.004046,0.004000,0.003616,0.003616,0.003616,0.003616,0.003616,0,1,1,1,1,1,0,1,1,1,1,1,1,2,2,2,2,2,1,2,2,2,2,2


In [17]:
forward_returns_df.describe().round(4)

Unnamed: 0,PERMNO,forward_0_avg_RET,forward_3_avg_RET,forward_7_avg_RET,forward_11_avg_RET,forward_15_avg_RET,forward_19_avg_RET,forward_0_avg_alpha,forward_3_avg_alpha,forward_7_avg_alpha,forward_11_avg_alpha,forward_15_avg_alpha,forward_19_avg_alpha,if_ret_missing_0,if_ret_missing_3,if_ret_missing_7,if_ret_missing_11,if_ret_missing_15,if_ret_missing_19,if_alpha_missing_0,if_alpha_missing_3,if_alpha_missing_7,if_alpha_missing_11,if_alpha_missing_15,if_alpha_missing_19,num_available_quarters_ret_0,num_available_quarters_ret_3,num_available_quarters_ret_7,num_available_quarters_ret_11,num_available_quarters_ret_15,num_available_quarters_ret_19,num_available_quarters_alpha_0,num_available_quarters_alpha_3,num_available_quarters_alpha_7,num_available_quarters_alpha_11,num_available_quarters_alpha_15,num_available_quarters_alpha_19
count,1441789.0,1433334.0,1434685.0,1435595.0,1436541.0,1437585.0,1438411.0,1439594.0,1440135.0,1440473.0,1440782.0,1441067.0,1441245.0,1441789.0,1441789.0,1441789.0,1441789.0,1441789.0,1441789.0,1441789.0,1441789.0,1441789.0,1441789.0,1441789.0,1441789.0,1441789.0,1441789.0,1441789.0,1441789.0,1441789.0,1441789.0,1441789.0,1441789.0,1441789.0,1441789.0,1441789.0,1441789.0
mean,52998.98,0.0008,0.0008,0.0008,0.0008,0.0008,0.0008,0.0003,0.0003,0.0003,0.0003,0.0003,0.0003,0.0005,0.0705,0.1537,0.2279,0.2943,0.3531,0.0005,0.0705,0.1537,0.2279,0.2943,0.3531,0.9995,3.8565,7.364,10.5639,13.4899,16.173,0.9995,3.8565,7.364,10.5639,13.4899,16.173
std,29237.63,0.0065,0.0057,0.0054,0.0053,0.0052,0.0052,0.0015,0.0015,0.0015,0.0014,0.0014,0.0014,0.0223,0.256,0.3606,0.4195,0.4557,0.4779,0.0223,0.256,0.3606,0.4195,0.4557,0.4779,0.0223,0.5657,1.6865,3.0435,4.5355,6.1062,0.0223,0.5657,1.6865,3.0435,4.5355,6.1062
min,10000.0,-0.8462,-0.8462,-0.8462,-0.8462,-0.8462,-0.8462,-0.018,-0.018,-0.018,-0.018,-0.018,-0.018,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,21856.0,-0.0011,-0.0003,0.0,0.0001,0.0002,0.0002,-0.0003,-0.0002,-0.0002,-0.0002,-0.0001,-0.0001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,4.0,8.0,12.0,13.0,13.0,1.0,4.0,8.0,12.0,13.0,13.0
50%,55862.0,0.0005,0.0006,0.0006,0.0006,0.0007,0.0007,0.0002,0.0002,0.0002,0.0002,0.0002,0.0002,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,4.0,8.0,12.0,16.0,20.0,1.0,4.0,8.0,12.0,16.0,20.0
75%,80632.0,0.0024,0.0017,0.0015,0.0014,0.0013,0.0013,0.0007,0.0007,0.0007,0.0007,0.0006,0.0006,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,4.0,8.0,12.0,16.0,20.0,1.0,4.0,8.0,12.0,16.0,20.0
max,93436.0,2.4688,1.2846,1.2846,1.2846,1.2846,1.2846,0.1511,0.1498,0.1443,0.137,0.137,0.137,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,4.0,8.0,12.0,16.0,20.0,1.0,4.0,8.0,12.0,16.0,20.0


In [18]:
forward_returns_df.rename(columns={'PERMNO': 'permno'}, inplace=True)

#### I do not need num_available_quarters_ret/alpha to merge

In [19]:
fr_columns_to_drop = [f'num_available_quarters_ret_{h}' for h in h_values] + [f'num_available_quarters_alpha_{h}' for h in h_values]

In [20]:
forward_returns_df

Unnamed: 0,permno,quarter,forward_0_avg_RET,forward_3_avg_RET,forward_7_avg_RET,forward_11_avg_RET,forward_15_avg_RET,forward_19_avg_RET,forward_0_avg_alpha,forward_3_avg_alpha,forward_7_avg_alpha,forward_11_avg_alpha,forward_15_avg_alpha,forward_19_avg_alpha,if_ret_missing_0,if_ret_missing_3,if_ret_missing_7,if_ret_missing_11,if_ret_missing_15,if_ret_missing_19,if_alpha_missing_0,if_alpha_missing_3,if_alpha_missing_7,if_alpha_missing_11,if_alpha_missing_15,if_alpha_missing_19,num_available_quarters_ret_0,num_available_quarters_ret_3,num_available_quarters_ret_7,num_available_quarters_ret_11,num_available_quarters_ret_15,num_available_quarters_ret_19,num_available_quarters_alpha_0,num_available_quarters_alpha_3,num_available_quarters_alpha_7,num_available_quarters_alpha_11,num_available_quarters_alpha_15,num_available_quarters_alpha_19
0,10000,1987Q1,-0.002268,-0.002268,-0.002268,-0.002268,-0.002268,-0.002268,-0.005898,-0.005898,-0.005898,-0.005898,-0.005898,-0.005898,0,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
1,10001,1987Q1,0.000124,0.000893,0.001074,0.001396,0.001120,0.001164,0.000244,0.000427,0.000488,0.000635,0.000785,0.000852,0,0,0,0,0,0,0,0,0,0,0,0,1,4,8,12,16,20,1,4,8,12,16,20
2,10001,1987Q2,0.002068,0.001188,0.001207,0.001403,0.001232,0.001183,0.000281,0.000516,0.000535,0.000720,0.000832,0.000889,0,0,0,0,0,0,0,0,0,0,0,0,1,4,8,12,16,20,1,4,8,12,16,20
3,10001,1987Q3,-0.000142,0.000911,0.001501,0.001274,0.001225,0.001292,0.000580,0.000584,0.000603,0.000803,0.000880,0.000923,0,0,0,0,0,0,0,0,0,0,0,0,1,4,8,12,16,20,1,4,8,12,16,20
4,10001,1987Q4,0.001522,0.001236,0.001844,0.001288,0.001492,0.001267,0.000602,0.000575,0.000664,0.000862,0.000923,0.000934,0,0,0,0,0,0,0,0,0,0,0,0,1,4,8,12,16,20,1,4,8,12,16,20
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1441784,93436,2021Q3,0.005559,0.000869,-0.001538,-0.001538,-0.001538,-0.001538,0.003097,0.003628,0.003549,0.003549,0.003549,0.003549,0,0,1,1,1,1,0,0,1,1,1,1,1,4,5,5,5,5,1,4,5,5,5,5
1441785,93436,2021Q4,0.001283,-0.003313,-0.003313,-0.003313,-0.003313,-0.003313,0.003440,0.003662,0.003662,0.003662,0.003662,0.003662,0,0,1,1,1,1,0,0,1,1,1,1,1,4,4,4,4,4,1,4,4,4,4,4
1441786,93436,2022Q1,-0.006441,-0.004845,-0.004845,-0.004845,-0.004845,-0.004845,0.003976,0.003736,0.003736,0.003736,0.003736,0.003736,0,1,1,1,1,1,0,1,1,1,1,1,1,3,3,3,3,3,1,3,3,3,3,3
1441787,93436,2022Q2,0.003074,-0.004046,-0.004046,-0.004046,-0.004046,-0.004046,0.004000,0.003616,0.003616,0.003616,0.003616,0.003616,0,1,1,1,1,1,0,1,1,1,1,1,1,2,2,2,2,2,1,2,2,2,2,2


In [21]:
forward_returns_df.columns

Index(['permno', 'quarter', 'forward_0_avg_RET', 'forward_3_avg_RET',
       'forward_7_avg_RET', 'forward_11_avg_RET', 'forward_15_avg_RET',
       'forward_19_avg_RET', 'forward_0_avg_alpha', 'forward_3_avg_alpha',
       'forward_7_avg_alpha', 'forward_11_avg_alpha', 'forward_15_avg_alpha',
       'forward_19_avg_alpha', 'if_ret_missing_0', 'if_ret_missing_3',
       'if_ret_missing_7', 'if_ret_missing_11', 'if_ret_missing_15',
       'if_ret_missing_19', 'if_alpha_missing_0', 'if_alpha_missing_3',
       'if_alpha_missing_7', 'if_alpha_missing_11', 'if_alpha_missing_15',
       'if_alpha_missing_19', 'num_available_quarters_ret_0',
       'num_available_quarters_ret_3', 'num_available_quarters_ret_7',
       'num_available_quarters_ret_11', 'num_available_quarters_ret_15',
       'num_available_quarters_ret_19', 'num_available_quarters_alpha_0',
       'num_available_quarters_alpha_3', 'num_available_quarters_alpha_7',
       'num_available_quarters_alpha_11', 'num_available_quarte

#### Import stock_sparsity and prepare to merge and drop if_ret/alpha_missing because last time it was calculated incorrectly, suppose to compare len(future_quarters) with h+1 not h. It was corrected for this version

In [22]:
stock_sparsity = pd.read_csv('2024_04_20_stock_sparisty_coverage_ratio.csv')

In [23]:
stock_sparsity['quarter'] = pd.PeriodIndex(stock_sparsity['quarter'], freq='Q')

In [24]:
columns_to_drop = [f'if_ret_missing_{h}' for h in h_values] + [f'if_alpha_missing_{h}' for h in h_values]

In [25]:
stock_sparsity

Unnamed: 0,permno,quarter,percent_benchmark_3_equ_w_mean,percent_benchmark_7_equ_w_mean,percent_benchmark_11_equ_w_mean,percent_benchmark_15_equ_w_mean,percent_benchmark_19_equ_w_mean,percent_within_3_equ_w_mean,percent_within_7_equ_w_mean,percent_within_11_equ_w_mean,percent_within_15_equ_w_mean,percent_within_19_equ_w_mean,percent_benchmark_median_passive_equ_w_mean,percent_benchmark_avg_passive_equ_w_mean,percent_benchmark_largest_passive_equ_w_mean,percent_benchmark_0_equ_w_mean,percent_benchmark_3_ow_weighted,percent_benchmark_7_ow_weighted,percent_benchmark_11_ow_weighted,percent_benchmark_15_ow_weighted,percent_benchmark_19_ow_weighted,percent_within_3_ow_weighted,percent_within_7_ow_weighted,percent_within_11_ow_weighted,percent_within_15_ow_weighted,percent_within_19_ow_weighted,percent_benchmark_median_passive_ow_weighted,percent_benchmark_avg_passive_ow_weighted,percent_benchmark_largest_passive_ow_weighted,percent_benchmark_0_ow_weighted,percent_within_3_coverage_ratio,percent_within_7_coverage_ratio,percent_within_11_coverage_ratio,percent_within_15_coverage_ratio,percent_within_19_coverage_ratio,percent_benchmark_0_coverage_ratio,percent_benchmark_3_coverage_ratio,percent_benchmark_7_coverage_ratio,percent_benchmark_11_coverage_ratio,percent_benchmark_15_coverage_ratio,percent_benchmark_19_coverage_ratio,percent_benchmark_median_passive_coverage_ratio,percent_benchmark_avg_passive_coverage_ratio,percent_benchmark_largest_passive_coverage_ratio
0,10001,1993Q4,,,,,,0.978810,0.978810,0.978810,0.978810,0.978810,,,,,,,,,,0.978810,0.978810,0.978810,0.978810,0.978810,,,,,,,,,,,,,,,,,,
1,10001,1994Q1,,,,,,0.956258,0.941814,0.937538,0.937538,0.937538,,,,,,,,,,0.951808,0.933620,0.928033,0.928033,0.928033,,,,,,,,,,,,,,,,,,
2,10001,1994Q2,,,,,,0.963313,0.953012,0.948519,0.948519,0.948519,,,,,,,,,,0.965632,0.952818,0.946948,0.946948,0.946948,,,,,,,,,,,,,,,,,,
3,10001,1994Q3,,,,,,0.968474,0.951422,0.947058,0.947058,0.947058,,,,,,,,,,0.966233,0.946068,0.940365,0.940365,0.940365,,,,,0.004679,0.004679,0.004679,0.004679,0.004679,,,,,,,,,
4,10001,1994Q4,,,,,,0.990351,0.981302,0.975410,0.975410,0.975410,,,,,,,,,,0.987407,0.973160,0.962410,0.962410,0.962410,,,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
812341,93436,2019Q3,0.064899,0.063676,0.054593,0.051276,0.049703,0.876811,0.789123,0.732157,0.684718,0.659270,0.323656,0.271712,0.275869,0.069832,0.065968,0.065718,0.060771,0.056680,0.054804,0.878578,0.780463,0.641075,0.477119,0.446252,0.326321,0.300734,0.305760,0.061765,0.039110,0.038556,0.035195,0.030489,0.030381,0.018870,0.020632,0.021977,0.021942,0.021942,0.021942,0.022574,0.021964,0.021964
812342,93436,2019Q4,0.050611,0.046551,0.044405,0.042102,0.043499,0.582023,0.509180,0.470403,0.444405,0.427560,0.391169,0.331805,0.400720,0.062188,0.049850,0.045352,0.041167,0.038567,0.037066,0.622953,0.561318,0.522804,0.484759,0.462582,0.367609,0.331796,0.379821,0.057648,0.067115,0.066873,0.066869,0.066869,0.066869,0.036032,0.038170,0.038170,0.038171,0.038171,0.038173,0.038447,0.038070,0.039333
812343,93436,2020Q1,0.068654,0.060344,0.055515,0.052360,0.050424,0.806737,0.703349,0.637854,0.594294,0.566024,0.273953,0.307138,0.252042,0.076724,0.059704,0.053595,0.049265,0.046214,0.044368,0.800263,0.649629,0.466609,0.418400,0.387704,0.236444,0.266161,0.233034,0.059871,0.047576,0.043115,0.036156,0.035884,0.035888,0.022494,0.023839,0.023838,0.023838,0.023838,0.023838,0.023989,0.024884,0.023895
812344,93436,2020Q2,0.068550,0.062575,0.058215,0.055174,0.053548,0.785646,0.688432,0.633761,0.595140,0.566579,0.308194,0.302060,0.291221,0.078116,0.064292,0.058968,0.054780,0.052147,0.050711,0.863481,0.784060,0.735706,0.696130,0.657709,0.373092,0.355845,0.351365,0.065285,0.077093,0.077093,0.077093,0.077093,0.077093,0.035592,0.037640,0.037640,0.037640,0.037640,0.037640,0.041159,0.039940,0.039862


In [26]:
columns_to_drop

['if_ret_missing_0',
 'if_ret_missing_3',
 'if_ret_missing_7',
 'if_ret_missing_11',
 'if_ret_missing_15',
 'if_ret_missing_19',
 'if_alpha_missing_0',
 'if_alpha_missing_3',
 'if_alpha_missing_7',
 'if_alpha_missing_11',
 'if_alpha_missing_15',
 'if_alpha_missing_19']

In [31]:
stock_sparsity_updated = stock_sparsity_updated.drop(columns=columns_to_drop, axis=1)

#### New Merged Datset contain 70 columns as desired

In [29]:
stock_sparsity_updated = pd.merge(stock_sparsity, forward_returns_df, on=['permno', 'quarter'], how='left')

In [32]:
stock_sparsity_updated

Unnamed: 0,permno,quarter,percent_benchmark_3_equ_w_mean,percent_benchmark_7_equ_w_mean,percent_benchmark_11_equ_w_mean,percent_benchmark_15_equ_w_mean,percent_benchmark_19_equ_w_mean,percent_within_3_equ_w_mean,percent_within_7_equ_w_mean,percent_within_11_equ_w_mean,percent_within_15_equ_w_mean,percent_within_19_equ_w_mean,percent_benchmark_median_passive_equ_w_mean,percent_benchmark_avg_passive_equ_w_mean,percent_benchmark_largest_passive_equ_w_mean,percent_benchmark_0_equ_w_mean,percent_benchmark_3_ow_weighted,percent_benchmark_7_ow_weighted,percent_benchmark_11_ow_weighted,percent_benchmark_15_ow_weighted,percent_benchmark_19_ow_weighted,percent_within_3_ow_weighted,percent_within_7_ow_weighted,percent_within_11_ow_weighted,percent_within_15_ow_weighted,percent_within_19_ow_weighted,percent_benchmark_median_passive_ow_weighted,percent_benchmark_avg_passive_ow_weighted,percent_benchmark_largest_passive_ow_weighted,percent_benchmark_0_ow_weighted,percent_within_3_coverage_ratio,percent_within_7_coverage_ratio,percent_within_11_coverage_ratio,percent_within_15_coverage_ratio,percent_within_19_coverage_ratio,percent_benchmark_0_coverage_ratio,percent_benchmark_3_coverage_ratio,percent_benchmark_7_coverage_ratio,percent_benchmark_11_coverage_ratio,percent_benchmark_15_coverage_ratio,percent_benchmark_19_coverage_ratio,percent_benchmark_median_passive_coverage_ratio,percent_benchmark_avg_passive_coverage_ratio,percent_benchmark_largest_passive_coverage_ratio,forward_0_avg_RET,forward_3_avg_RET,forward_7_avg_RET,forward_11_avg_RET,forward_15_avg_RET,forward_19_avg_RET,forward_0_avg_alpha,forward_3_avg_alpha,forward_7_avg_alpha,forward_11_avg_alpha,forward_15_avg_alpha,forward_19_avg_alpha,num_available_quarters_ret_0,num_available_quarters_ret_3,num_available_quarters_ret_7,num_available_quarters_ret_11,num_available_quarters_ret_15,num_available_quarters_ret_19,num_available_quarters_alpha_0,num_available_quarters_alpha_3,num_available_quarters_alpha_7,num_available_quarters_alpha_11,num_available_quarters_alpha_15,num_available_quarters_alpha_19
0,10001,1993Q4,,,,,,0.978810,0.978810,0.978810,0.978810,0.978810,,,,,,,,,,0.978810,0.978810,0.978810,0.978810,0.978810,,,,,,,,,,,,,,,,,,,-0.000712,-0.000004,0.000726,0.000542,0.000656,0.000641,0.001160,0.000963,0.000711,0.000553,0.000456,0.000428,1.0,4.0,8.0,12.0,16.0,20.0,1.0,4.0,8.0,12.0,16.0,20.0
1,10001,1994Q1,,,,,,0.956258,0.941814,0.937538,0.937538,0.937538,,,,,,,,,,0.951808,0.933620,0.928033,0.928033,0.928033,,,,,,,,,,,,,,,,,,,0.000526,0.000135,0.000958,0.000754,0.000688,0.000627,0.000969,0.000811,0.000623,0.000462,0.000401,0.000388,1.0,4.0,8.0,12.0,16.0,20.0,1.0,4.0,8.0,12.0,16.0,20.0
2,10001,1994Q2,,,,,,0.963313,0.953012,0.948519,0.948519,0.948519,,,,,,,,,,0.965632,0.952818,0.946948,0.946948,0.946948,,,,,,,,,,,,,,,,,,,0.001682,0.000653,0.000623,0.000710,0.000664,0.000614,0.001017,0.000710,0.000537,0.000406,0.000356,0.000359,1.0,4.0,8.0,12.0,16.0,20.0,1.0,4.0,8.0,12.0,16.0,20.0
3,10001,1994Q3,,,,,,0.968474,0.951422,0.947058,0.947058,0.947058,,,,,,,,,,0.966233,0.946068,0.940365,0.940365,0.940365,,,,,0.004679,0.004679,0.004679,0.004679,0.004679,,,,,,,,,,-0.001511,0.000412,0.000662,0.000710,0.000651,0.000504,0.000703,0.000563,0.000434,0.000332,0.000317,0.000331,1.0,4.0,8.0,12.0,16.0,20.0,1.0,4.0,8.0,12.0,16.0,20.0
4,10001,1994Q4,,,,,,0.990351,0.981302,0.975410,0.975410,0.975410,,,,,,,,,,0.987407,0.973160,0.962410,0.962410,0.962410,,,,,,,,,,,,,,,,,,,-0.000155,0.001456,0.000815,0.000876,0.000803,0.000651,0.000554,0.000460,0.000349,0.000287,0.000295,0.000316,1.0,4.0,8.0,12.0,16.0,20.0,1.0,4.0,8.0,12.0,16.0,20.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
812341,93436,2019Q3,0.064899,0.063676,0.054593,0.051276,0.049703,0.876811,0.789123,0.732157,0.684718,0.659270,0.323656,0.271712,0.275869,0.069832,0.065968,0.065718,0.060771,0.056680,0.054804,0.878578,0.780463,0.641075,0.477119,0.446252,0.326321,0.300734,0.305760,0.061765,0.039110,0.038556,0.035195,0.030489,0.030381,0.018870,0.020632,0.021977,0.021942,0.021942,0.021942,0.022574,0.021964,0.021964,0.009118,0.010194,0.006538,0.004648,0.003432,0.003432,0.000275,0.001077,0.001915,0.002486,0.002544,0.002544,1.0,4.0,8.0,12.0,13.0,13.0,1.0,4.0,8.0,12.0,13.0,13.0
812342,93436,2019Q4,0.050611,0.046551,0.044405,0.042102,0.043499,0.582023,0.509180,0.470403,0.444405,0.427560,0.391169,0.331805,0.400720,0.062188,0.049850,0.045352,0.041167,0.038567,0.037066,0.622953,0.561318,0.522804,0.484759,0.462582,0.367609,0.331796,0.379821,0.057648,0.067115,0.066873,0.066869,0.066869,0.066869,0.036032,0.038170,0.038170,0.038171,0.038171,0.038173,0.038447,0.038070,0.039333,0.006426,0.010028,0.006093,0.002958,0.002958,0.002958,0.000865,0.001616,0.002268,0.002733,0.002733,0.002733,1.0,4.0,8.0,12.0,12.0,12.0,1.0,4.0,8.0,12.0,12.0,12.0
812343,93436,2020Q1,0.068654,0.060344,0.055515,0.052360,0.050424,0.806737,0.703349,0.637854,0.594294,0.566024,0.273953,0.307138,0.252042,0.076724,0.059704,0.053595,0.049265,0.046214,0.044368,0.800263,0.649629,0.466609,0.418400,0.387704,0.236444,0.266161,0.233034,0.059871,0.047576,0.043115,0.036156,0.035884,0.035888,0.022494,0.023839,0.023838,0.023838,0.023838,0.023838,0.023989,0.024884,0.023895,0.012577,0.008458,0.005451,0.002643,0.002643,0.002643,0.001219,0.002109,0.002590,0.002902,0.002902,0.002902,1.0,4.0,8.0,11.0,11.0,11.0,1.0,4.0,8.0,11.0,11.0,11.0
812344,93436,2020Q2,0.068550,0.062575,0.058215,0.055174,0.053548,0.785646,0.688432,0.633761,0.595140,0.566579,0.308194,0.302060,0.291221,0.078116,0.064292,0.058968,0.054780,0.052147,0.050711,0.863481,0.784060,0.735706,0.696130,0.657709,0.373092,0.355845,0.351365,0.065285,0.077093,0.077093,0.077093,0.077093,0.077093,0.035592,0.037640,0.037640,0.037640,0.037640,0.037640,0.041159,0.039940,0.039862,0.012654,0.005485,0.003073,0.001649,0.001649,0.001649,0.001950,0.002509,0.002934,0.003071,0.003071,0.003071,1.0,4.0,8.0,10.0,10.0,10.0,1.0,4.0,8.0,10.0,10.0,10.0


In [33]:
stock_sparsity_updated.columns

Index(['permno', 'quarter', 'percent_benchmark_3_equ_w_mean',
       'percent_benchmark_7_equ_w_mean', 'percent_benchmark_11_equ_w_mean',
       'percent_benchmark_15_equ_w_mean', 'percent_benchmark_19_equ_w_mean',
       'percent_within_3_equ_w_mean', 'percent_within_7_equ_w_mean',
       'percent_within_11_equ_w_mean', 'percent_within_15_equ_w_mean',
       'percent_within_19_equ_w_mean',
       'percent_benchmark_median_passive_equ_w_mean',
       'percent_benchmark_avg_passive_equ_w_mean',
       'percent_benchmark_largest_passive_equ_w_mean',
       'percent_benchmark_0_equ_w_mean', 'percent_benchmark_3_ow_weighted',
       'percent_benchmark_7_ow_weighted', 'percent_benchmark_11_ow_weighted',
       'percent_benchmark_15_ow_weighted', 'percent_benchmark_19_ow_weighted',
       'percent_within_3_ow_weighted', 'percent_within_7_ow_weighted',
       'percent_within_11_ow_weighted', 'percent_within_15_ow_weighted',
       'percent_within_19_ow_weighted',
       'percent_benchmark_med

In [34]:
stock_sparsity_updated.to_csv('2024_04_20_stock_sparsity_ret_coverage_ratio.csv',index=False)

In [None]:
#stock_sparsity_updated=pd.read_csv('2024_03_01_stock_sparsity_added_avg_ret_alpha.csv')

In [None]:
stock_sparsity_updated