In [1]:
import pandas as pd
import numpy as np
import os

## Define file path depending on which index to examine

## S&P500
index_file_path = "H:/Tech Hardware Shared/$Mike/Quant/SnP500_Constit.csv"
SOX_file_path = "H:/Tech Hardware Shared/$Mike/Quant/SOX_Constit.csv"
csv_directory = "H:/Tech Hardware Shared/$Mike/Quant/CSV_files"
export_path = "H:/Tech Hardware Shared/$Mike/Quant/Python_Outputs/"
export_audit_file_name = 'SnP500_ex_SOX_audit.xlsx'
export_index_file_name = 'SnP500_ex_SOX_Index_Avgs.xlsx'
export_LS_file_name = 'SnP500_ex_SOX_LS.xlsx'
export_dispersion_file_name = 'SnP500_ex_SOX_dispersion.xlsx'
export_examine_CSV_file_name = 'SnP500_ex_SOX_indiv_CSV.xlsx'

##### CREATING DATABASE DATAFRAME ######

# Load index file
df_index = pd.read_csv(index_file_path, header=0)
df_index.columns = pd.to_datetime(df_index.columns, format='%d-%b-%y')
df_index = pd.melt(df_index, id_vars=[], var_name='Date', value_name='ISIN')
df_index['Date'] = df_index['Date'] + pd.offsets.MonthEnd(0)
df_index['ISIN'].fillna('placeholder', inplace=True)

# Load SOX
df_SOX = pd.read_csv(SOX_file_path, header=0)
df_SOX.columns = pd.to_datetime(df_SOX.columns, format='%d-%b-%y')
df_SOX = pd.melt(df_SOX, id_vars=[], var_name='Date', value_name='ISIN')
df_SOX['Date'] = df_SOX['Date'] + pd.offsets.MonthEnd(0)
df_SOX['ISIN'].fillna('placeholder', inplace=True)

# Create index file excluding SOX constituents
# performs left join, so keeps all rows from df_index and only the matching rows from df_SOX, based on columns 'Date' and 'ISIN'
# if there are rows in df_index that don't have matches in df_SOX, those rows will appear in result, but unmatched rows from df_SOX will have NaN values
# indicator = True adds a special column called _merge to the resulting dataframe, wh indicates the source of each row
# 'left_only': row is presenonly in df_index; 'right_only': row is is present only in df_SOX; 'both': row is present in both
# subsequent line filters the dataframe to keep only those rows where the _merge column is 'left_only' - ie only keeps rows from df_finces that DID NOT have a match in df_SOX
# after filtering, the code drops the _merge column since it is no longer needed
df = pd.merge(df_index, df_SOX, on=['Date', 'ISIN'], how='left', indicator=True)
df = df[df['_merge'] == 'left_only'].drop(columns=['_merge'])

# Create a list to store each ISIN's data
all_isin_data = []

# Iterate through each ISIN in the 'ISIN' column 
for isin in df['ISIN'].unique():
    # Skip if the ISIN is 'placeholder'
    if isin != 'placeholder':
        # Construct the file path for the CSV file
        index_file_path = os.path.join(csv_directory, f'{isin}.csv')
        # Check if the CSV file exists
        if os.path.exists(index_file_path):        
            # Read the CSV file
            isin_data = pd.read_csv(index_file_path, header=0)  # Assuming header is in row 2 and 'Date' is the label
            isin_data['Date'] = pd.to_datetime(isin_data['Date'], unit='D', origin='1899-12-30')
            isin_data['Date'] = isin_data['Date'] + pd.offsets.MonthEnd(0)
            isin_data['ISIN'] = isin
            # Convert all data columns to numeric
                                  
            numeric_columns = ['Mkt_Cap','TR', 'PCH',
                               'NTM_RevGrowth','2y_RevGrowth',
                               'NTM_EBITDA_Margin','2y_EBITDA_Margin',
                               'RoE_TTM', 'RoE_1y_Fwd', 'RoE_2y_Fwd',
                               'CoE', 'Assumed_G', 
                               'Sales_EV_1y_Fwd', 'EV_Sales_1y_Fwd', 'EBITDA_EV_1y_Fwd','EV_EBITDA_1y_Fwd',
                               'EY_TTM','PE_TTM', 'EY_1y_Fwd','PE_1y_Fwd','EY_2y_Fwd','PE_2y_Fwd',
                               'BY_TTM','PB_TTM', 'BY_1y_Fwd','PB_1y_Fwd','BY_2y_Fwd','PB_2y_Fwd',
                               'DY_TTM','DY_1y_Fwd','DY_2y_Fwd',
                               'NOSH_Chg_LTM',
                               'TIME',
                               'Mkt_Cap_SOM','TR_LTM_SOM','TR_L6M_SOM','TR_L3M_SOM',
                               'NTM_RevGrowth_SOM','NTM_EBITDA_Margin_SOM','NTM_RoE_SOM',
                               'NTM_RevGrowth_3mChg_SOM','NTM_Rev_3mChg_SOM',
                               'NTM_EBITDA_Margin_3mChg_SOM',
                               'Sales_EV_1y_Fwd_SOM','EBITDA_EV_1y_Fwd_SOM','EY_1y_Fwd_SOM','EY_2y_Fwd_SOM','BY_1y_Fwd_SOM','CoE_SOM']            
            isin_data[numeric_columns] = isin_data[numeric_columns].apply(lambda x: pd.to_numeric(x, errors='coerce'))
            
            # nix out negative BVPS data & other floors / ceilings
            neg_cols = ['BY_TTM','PB_TTM', 'BY_1y_Fwd','PB_1y_Fwd','BY_2y_Fwd','PB_2y_Fwd','CoE']
            for col in neg_cols:
                isin_data[col] = np.where(isin_data[col] < 0, np.nan, isin_data[col])
            
            upper_bound = 0.5 # Max CoE = 50%
            isin_data['CoE'] = isin_data['CoE'].clip(upper=upper_bound)
                                   
            lower_bound = -0.02 # Max multiple = -50x
            upper_bound = 1 # Min multiple = 1x
            isin_data['Sales_EV_1y_Fwd'] = isin_data['Sales_EV_1y_Fwd'].clip(upper=upper_bound,lower=lower_bound)
            isin_data['EBITDA_EV_1y_Fwd'] = isin_data['EBITDA_EV_1y_Fwd'].clip(upper=upper_bound,lower=lower_bound)
            isin_data['EY_TTM'] = isin_data['EY_TTM'].clip(upper=upper_bound,lower=lower_bound)
            isin_data['EY_1y_Fwd'] = isin_data['EY_1y_Fwd'].clip(upper=upper_bound,lower=lower_bound)
            isin_data['EY_2y_Fwd'] = isin_data['EY_2y_Fwd'].clip(upper=upper_bound,lower=lower_bound)
            
            lower_bound = -0.5
            upper_bound = 1.5
            isin_data['RoE_TTM'] = isin_data['RoE_TTM'].clip(upper=upper_bound,lower=lower_bound)
            isin_data['RoE_1y_Fwd'] = isin_data['RoE_1y_Fwd'].clip(upper=upper_bound,lower=lower_bound)
            isin_data['RoE_2y_Fwd'] = isin_data['RoE_2y_Fwd'].clip(upper=upper_bound,lower=lower_bound)
                                        
            # Append the data to the list
            all_isin_data.append(isin_data)

# Concatenate all ISIN data into a single DataFrame
isin_final_df = pd.concat(all_isin_data, ignore_index=True)

# Merge the additional columns into the melted DataFrame using 'Date' and 'ISIN' as the keys 
df = pd.merge(df, isin_final_df, on=['Date','ISIN'])

df.head()

#full_export_path = export_path + export_audit_file_name
#df.to_excel(full_export_path, index=False)


Unnamed: 0,Date,ISIN,Mkt_Cap,Mkt_Cap_SOM,TR,NTM_RevGrowth_EOM,2y_RevGrowth_EOM,NTM_RevGrowth_3mChg_SOM,NTM_Rev_3mChg_SOM,NTM_EBITDA_Margin_EOM,...,PB_TTM,BY_1y_Fwd,PB_1y_Fwd,BY_2y_Fwd,PB_2y_Fwd,DY_TTM,DY_1y_Fwd,DY_2y_Fwd,NOSH_Chg_LTM,TIME
0,1989-09-30,US0318971019,4888.74,,,,,,,,...,,,,,,,,,,36251.0
1,1989-09-30,US0017651060,5101.33,,,,,,,,...,,,,,,,,,,41614.0
2,1989-09-30,US0434131035,1432.45,,,,,,,,...,,,,,,,,,,36481.0
3,1989-09-30,US0019575051,47338.82,,,,,,,,...,,,,,,,,,,38674.0
4,1989-09-30,US0028241000,14258.13,,,,,,,,...,,,,,,,,,,45580.0


In [2]:
#### CREATE DATAFRAME OF EQUAL WEIGHTED AND MARKET CAP WEIGHTED AVERAGE RATIOS ####


### Equal Weight Calculation
EqualWeightIndex_df = df.groupby('Date')[['TR',
                                          'NTM_RevGrowth','2y_RevGrowth','NTM_RevGrowth_3mChg_SOM','NTM_Rev_3mChg_SOM',
                                          'NTM_EBITDA_Margin','2y_EBITDA_Margin','NTM_EBITDA_Margin_3mChg_SOM',
                                          'RoE_TTM', 'RoE_1y_Fwd', 'RoE_2y_Fwd',
                                          'CoE', 'Assumed_G',
                                          'Sales_EV_1y_Fwd','EBITDA_EV_1y_Fwd',
                                          'EY_TTM','EY_1y_Fwd','EY_2y_Fwd',
                                          'BY_TTM','BY_1y_Fwd','BY_2y_Fwd',
                                          'DY_TTM','DY_1y_Fwd','DY_2y_Fwd','NOSH_Chg_LTM']].mean().reset_index()

EqualWeightIndex_df.columns = [f'{col}_Eq_Wgt' if col in ['TR',
                                          'NTM_RevGrowth','2y_RevGrowth','NTM_RevGrowth_3mChg_SOM','NTM_Rev_3mChg_SOM',
                                          'NTM_EBITDA_Margin','2y_EBITDA_Margin','NTM_EBITDA_Margin_3mChg_SOM',
                                          'RoE_TTM', 'RoE_1y_Fwd', 'RoE_2y_Fwd',
                                          'CoE', 'Assumed_G',
                                          'Sales_EV_1y_Fwd','EBITDA_EV_1y_Fwd',
                                          'EY_TTM','EY_1y_Fwd','EY_2y_Fwd',
                                          'BY_TTM','BY_1y_Fwd','BY_2y_Fwd',
                                          'DY_TTM','DY_1y_Fwd','DY_2y_Fwd','NOSH_Chg_LTM'] else col for col in EqualWeightIndex_df.columns]

EqualWeightIndex_df['EV_Sales_1y_Fwd_Eq_Wgt'] = 1/EqualWeightIndex_df['Sales_EV_1y_Fwd_Eq_Wgt']
EqualWeightIndex_df['EV_EBITDA_1y_Fwd_Eq_Wgt'] = 1/EqualWeightIndex_df['EBITDA_EV_1y_Fwd_Eq_Wgt']
EqualWeightIndex_df['PE_TTM_Eq_Wgt'] = 1/EqualWeightIndex_df['EY_TTM_Eq_Wgt']
EqualWeightIndex_df['PE_1y_Fwd_Eq_Wgt'] = 1/EqualWeightIndex_df['EY_1y_Fwd_Eq_Wgt']
EqualWeightIndex_df['PE_2y_Fwd_Eq_Wgt'] = 1/EqualWeightIndex_df['EY_2y_Fwd_Eq_Wgt']
EqualWeightIndex_df['PB_TTM_Eq_Wgt'] = 1/EqualWeightIndex_df['BY_TTM_Eq_Wgt']
EqualWeightIndex_df['PB_1y_Fwd_Eq_Wgt'] = 1/EqualWeightIndex_df['BY_1y_Fwd_Eq_Wgt']
EqualWeightIndex_df['PB_2y_Fwd_Eq_Wgt'] = 1/EqualWeightIndex_df['BY_2y_Fwd_Eq_Wgt']


### Median Weight Calculation
MedianIndex_df = df.groupby('Date')[['TR',
                                     'NTM_RevGrowth','2y_RevGrowth','NTM_RevGrowth_3mChg_SOM','NTM_Rev_3mChg_SOM',
                                     'NTM_EBITDA_Margin','2y_EBITDA_Margin','NTM_EBITDA_Margin_3mChg_SOM',
                                     'RoE_TTM', 'RoE_1y_Fwd', 'RoE_2y_Fwd',
                                     'CoE', 'Assumed_G',
                                     'Sales_EV_1y_Fwd','EBITDA_EV_1y_Fwd',
                                     'EY_TTM','EY_1y_Fwd','EY_2y_Fwd',
                                     'BY_TTM','BY_1y_Fwd','BY_2y_Fwd',
                                     'DY_TTM','DY_1y_Fwd','DY_2y_Fwd','NOSH_Chg_LTM']].median().reset_index()

MedianIndex_df.columns = [f'{col}_Median' if col in ['TR',
                                          'NTM_RevGrowth','2y_RevGrowth','NTM_RevGrowth_3mChg_SOM','NTM_Rev_3mChg_SOM',
                                          'NTM_EBITDA_Margin','2y_EBITDA_Margin','NTM_EBITDA_Margin_3mChg_SOM',
                                          'RoE_TTM', 'RoE_1y_Fwd', 'RoE_2y_Fwd',
                                          'CoE', 'Assumed_G',
                                          'Sales_EV_1y_Fwd','EBITDA_EV_1y_Fwd',
                                          'EY_TTM','EY_1y_Fwd','EY_2y_Fwd',
                                          'BY_TTM','BY_1y_Fwd','BY_2y_Fwd',
                                          'DY_TTM','DY_1y_Fwd','DY_2y_Fwd','NOSH_Chg_LTM'] else col for col in MedianIndex_df.columns]

MedianIndex_df['EV_Sales_1y_Fwd_Median'] = 1/MedianIndex_df['Sales_EV_1y_Fwd_Median']
MedianIndex_df['EV_EBITDA_1y_Fwd_Median'] = 1/MedianIndex_df['EBITDA_EV_1y_Fwd_Median']
MedianIndex_df['PE_TTM_Median'] = 1/MedianIndex_df['EY_TTM_Median']
MedianIndex_df['PE_1y_Fwd_Median'] = 1/MedianIndex_df['EY_1y_Fwd_Median']
MedianIndex_df['PE_2y_Fwd_Median'] = 1/MedianIndex_df['EY_2y_Fwd_Median']
MedianIndex_df['PB_TTM_Median'] = 1/MedianIndex_df['BY_TTM_Median']
MedianIndex_df['PB_1y_Fwd_Median'] = 1/MedianIndex_df['BY_1y_Fwd_Median']
MedianIndex_df['PB_2y_Fwd_Median'] = 1/MedianIndex_df['BY_2y_Fwd_Median']

### MKT CAP WEIGHTED CALCULATION

# Step 1: Create a function for weighted average calculation
def weighted_average(df, value_col, weight_col):
    valid_entries = ~df[value_col].isna()
    weights = df.loc[valid_entries, weight_col]
    values = df.loc[valid_entries, value_col]
    if weights.sum() == 0:
        # Handle the case where denominator is zero
        return 0
    else:
        weighted_avg = (weights * values).sum() / weights.sum()
        return weighted_avg

# Step 2: Create a function to calculate the market cap weighted averages
def calculate_weighted_averages(group):
    weighted_TR = weighted_average(group, 'TR', 'Mkt_Cap_SOM')
    weighted_NTM_RevGrowth = weighted_average(group, 'NTM_RevGrowth', 'Mkt_Cap')
    weighted_2y_RevGrowth = weighted_average(group, '2y_RevGrowth', 'Mkt_Cap')
    weighted_NTM_EBITDA_Margin = weighted_average(group, 'NTM_EBITDA_Margin', 'Mkt_Cap')
    weighted_2y_EBITDA_Margin = weighted_average(group, '2y_EBITDA_Margin', 'Mkt_Cap')
    weighted_RoE_TTM = weighted_average(group, 'RoE_TTM', 'Mkt_Cap')
    weighted_RoE_1y_Fwd = weighted_average(group, 'RoE_1y_Fwd', 'Mkt_Cap')
    weighted_RoE_2y_Fwd = weighted_average(group, 'RoE_2y_Fwd', 'Mkt_Cap')
    weighted_CoE = weighted_average(group, 'CoE', 'Mkt_Cap')
    weighted_Assumed_G = weighted_average(group, 'Assumed_G', 'Mkt_Cap')
    weighted_Sales_EV_1y_Fwd = weighted_average(group, 'Sales_EV_1y_Fwd', 'Mkt_Cap')
    weighted_EBITDA_EV_1y_Fwd = weighted_average(group, 'EBITDA_EV_1y_Fwd', 'Mkt_Cap')
    weighted_EY_TTM = weighted_average(group, 'EY_TTM', 'Mkt_Cap')
    weighted_EY_1y_Fwd = weighted_average(group, 'EY_1y_Fwd', 'Mkt_Cap')
    weighted_EY_2y_Fwd = weighted_average(group, 'EY_2y_Fwd', 'Mkt_Cap') 
    weighted_BY_TTM = weighted_average(group, 'BY_TTM', 'Mkt_Cap')
    weighted_BY_1y_Fwd = weighted_average(group, 'BY_1y_Fwd', 'Mkt_Cap')
    weighted_BY_2y_Fwd = weighted_average(group, 'BY_2y_Fwd', 'Mkt_Cap') 
    weighted_DY_TTM = weighted_average(group, 'DY_TTM', 'Mkt_Cap')
    weighted_DY_1y_Fwd = weighted_average(group, 'DY_1y_Fwd', 'Mkt_Cap')
    weighted_DY_2y_Fwd = weighted_average(group, 'DY_2y_Fwd', 'Mkt_Cap')
    weighted_NOSH_Chg_LTM = weighted_average(group, 'NOSH_Chg_LTM', 'Mkt_Cap')
    
    
    return pd.Series({
        'TR_MV_Wgt': weighted_TR,
        'NTM_RevGrowth_MV_Wgt': weighted_NTM_RevGrowth,
        '2y_RevGrowth_MV_Wgt': weighted_2y_RevGrowth,
        'NTM_EBITDA_Margin_MV_Wgt': weighted_NTM_EBITDA_Margin,
        '2y_EBITDA_Margin_MV_Wgt': weighted_2y_EBITDA_Margin,
        'RoE_TTM_MV_Wgt': weighted_RoE_TTM,
        'RoE_1y_Fwd_MV_Wgt': weighted_RoE_1y_Fwd,
        'RoE_2y_Fwd_MV_Wgt': weighted_RoE_2y_Fwd,
        'CoE_MV_Wgt': weighted_CoE,
        'Assumed_G_MV_Wgt': weighted_Assumed_G,
        'EV_Sales_1y_Fwd_MV_Wgt': weighted_Sales_EV_1y_Fwd, # execution of reciprocal below
        'EV_EBITDA_1y_Fwd_MV_Wgt': weighted_EBITDA_EV_1y_Fwd, # execution of reciprocal below
        'PE_TTM_MV_Wgt': weighted_EY_TTM, # execution of reciprocal below
        'PE_1y_Fwd_MV_Wgt': weighted_EY_1y_Fwd, # execution of reciprocal below
        'PE_2y_Fwd_MV_Wgt': weighted_EY_2y_Fwd, # execution of reciprocal below
        'PB_TTM_MV_Wgt': weighted_BY_TTM, # execution of reciprocal below
        'PB_1y_Fwd_MV_Wgt': weighted_BY_1y_Fwd, # execution of reciprocal below
        'PB_2y_Fwd_MV_Wgt': weighted_BY_2y_Fwd, # execution of reciprocal below
        'DY_TTM_MV_Wgt': weighted_DY_TTM,
        'DY_1y_Fwd_MV_Wgt': weighted_DY_1y_Fwd,
        'DY_2y_Fwd_MV_Wgt': weighted_DY_2y_Fwd,
        'NOSH_Chg_LTM_MV_Wgt': weighted_NOSH_Chg_LTM
    })

# Step 3: Apply the calculation to your DataFrame
Index_df = df.groupby('Date').apply(calculate_weighted_averages).reset_index()
Index_df['EV_Sales_1y_Fwd_MV_Wgt'] = 1/Index_df['EV_Sales_1y_Fwd_MV_Wgt']
Index_df['EV_EBITDA_1y_Fwd_MV_Wgt'] = 1/Index_df['EV_EBITDA_1y_Fwd_MV_Wgt']
Index_df['PE_TTM_MV_Wgt'] = 1/Index_df['PE_TTM_MV_Wgt']
Index_df['PE_1y_Fwd_MV_Wgt'] = 1/Index_df['PE_1y_Fwd_MV_Wgt']
Index_df['PE_2y_Fwd_MV_Wgt'] = 1/Index_df['PE_2y_Fwd_MV_Wgt']
Index_df['PB_TTM_MV_Wgt'] = 1/Index_df['PB_1y_Fwd_MV_Wgt']
Index_df['PB_1y_Fwd_MV_Wgt'] = 1/Index_df['PB_1y_Fwd_MV_Wgt']
Index_df['PB_2y_Fwd_MV_Wgt'] = 1/Index_df['PB_2y_Fwd_MV_Wgt']

# Step 4: Add in the equal weighted calculations from above into the Index dataframe
Index_df['TR_Eq_Wgt'] = EqualWeightIndex_df['TR_Eq_Wgt']
Index_df['NTM_RevGrowth_Eq_Wgt'] = EqualWeightIndex_df['NTM_RevGrowth_Eq_Wgt']
Index_df['2y_RevGrowth_Eq_Wgt'] = EqualWeightIndex_df['2y_RevGrowth_Eq_Wgt']
Index_df['NTM_EBITDA_Margin_Eq_Wgt'] = EqualWeightIndex_df['NTM_EBITDA_Margin_Eq_Wgt']
Index_df['2y_EBITDA_Margin_Eq_Wgt'] = EqualWeightIndex_df['2y_EBITDA_Margin_Eq_Wgt']
Index_df['RoE_TTM_Eq_Wgt'] = EqualWeightIndex_df['RoE_TTM_Eq_Wgt']
Index_df['RoE_1y_Fwd_Eq_Wgt'] = EqualWeightIndex_df['RoE_1y_Fwd_Eq_Wgt']
Index_df['RoE_2y_Fwd_Eq_Wgt'] = EqualWeightIndex_df['RoE_2y_Fwd_Eq_Wgt']
Index_df['CoE_Eq_Wgt'] = EqualWeightIndex_df['CoE_Eq_Wgt']
Index_df['Assumed_G_Eq_Wgt'] = EqualWeightIndex_df['Assumed_G_Eq_Wgt']
Index_df['EV_Sales_1y_Fwd_Eq_Wgt'] = EqualWeightIndex_df['EV_Sales_1y_Fwd_Eq_Wgt']
Index_df['EV_EBITDA_1y_Fwd_Eq_Wgt'] = EqualWeightIndex_df['EV_EBITDA_1y_Fwd_Eq_Wgt']
Index_df['PE_TTM_Eq_Wgt'] = EqualWeightIndex_df['PE_TTM_Eq_Wgt']
Index_df['PE_1y_Fwd_Eq_Wgt'] = EqualWeightIndex_df['PE_1y_Fwd_Eq_Wgt']
Index_df['PE_2y_Fwd_Eq_Wgt'] = EqualWeightIndex_df['PE_2y_Fwd_Eq_Wgt']
Index_df['PB_TTM_Eq_Wgt'] = EqualWeightIndex_df['PB_TTM_Eq_Wgt']
Index_df['PB_1y_Fwd_Eq_Wgt'] = EqualWeightIndex_df['PB_1y_Fwd_Eq_Wgt']
Index_df['PB_2y_Fwd_Eq_Wgt'] = EqualWeightIndex_df['PB_2y_Fwd_Eq_Wgt']
Index_df['DY_TTM_Eq_Wgt'] = EqualWeightIndex_df['DY_TTM_Eq_Wgt']
Index_df['DY_1y_Fwd_Eq_Wgt'] = EqualWeightIndex_df['DY_1y_Fwd_Eq_Wgt']
Index_df['DY_2y_Fwd_Eq_Wgt'] = EqualWeightIndex_df['DY_2y_Fwd_Eq_Wgt']
Index_df['NOSH_Chg_LTM_Eq_Wgt'] = EqualWeightIndex_df['NOSH_Chg_LTM_Eq_Wgt']

# Step 5: Add in the median calculations from above into the Index dataframe
Index_df['TR_Median'] = MedianIndex_df['TR_Median']
Index_df['NTM_RevGrowth_Median'] = MedianIndex_df['NTM_RevGrowth_Median']
Index_df['2y_RevGrowth_Median'] = MedianIndex_df['2y_RevGrowth_Median']
Index_df['NTM_EBITDA_Margin_Median'] = MedianIndex_df['NTM_EBITDA_Margin_Median']
Index_df['2y_EBITDA_Margin_Median'] = MedianIndex_df['2y_EBITDA_Margin_Median']
Index_df['RoE_TTM_Median'] = MedianIndex_df['RoE_TTM_Median']
Index_df['RoE_1y_Fwd_Median'] = MedianIndex_df['RoE_1y_Fwd_Median']
Index_df['RoE_2y_Fwd_Median'] = MedianIndex_df['RoE_2y_Fwd_Median']
Index_df['CoE_Median'] = MedianIndex_df['CoE_Median']
Index_df['Assumed_G_Median'] = MedianIndex_df['Assumed_G_Median']
Index_df['EV_Sales_1y_Fwd_Median'] = MedianIndex_df['EV_Sales_1y_Fwd_Median']
Index_df['EV_EBITDA_1y_Fwd_Median'] = MedianIndex_df['EV_EBITDA_1y_Fwd_Median']
Index_df['PE_TTM_Median'] = MedianIndex_df['PE_TTM_Median']
Index_df['PE_1y_Fwd_Median'] = MedianIndex_df['PE_1y_Fwd_Median']
Index_df['PE_2y_Fwd_Median'] = MedianIndex_df['PE_2y_Fwd_Median']
Index_df['PB_TTM_Median'] = MedianIndex_df['PB_TTM_Median']
Index_df['PB_1y_Fwd_Median'] = MedianIndex_df['PB_1y_Fwd_Median']
Index_df['PB_2y_Fwd_Median'] = MedianIndex_df['PB_2y_Fwd_Median']
Index_df['DY_TTM_Median'] = MedianIndex_df['DY_TTM_Median']
Index_df['DY_1y_Fwd_Median'] = MedianIndex_df['DY_1y_Fwd_Median']
Index_df['DY_2y_Fwd_Median'] = MedianIndex_df['DY_2y_Fwd_Median']
Index_df['NOSH_Chg_LTM_Median'] = MedianIndex_df['NOSH_Chg_LTM_Median']


# Display the updated DataFrame
#Index_df.tail()

# Export to xlxs
full_export_path = export_path + export_index_file_name 
Index_df.to_excel(full_export_path, index=False)


In [3]:
### LONG / SHORT RETURNS BY VARIABLE ###

# List of target columns you want to iterate over
target_columns = ['Mkt_Cap_SOM','TR_LTM_SOM','TR_L6M_SOM','TR_L3M_SOM',
                  'NTM_RevGrowth_SOM','NTM_EBITDA_Margin_SOM','NTM_RoE_SOM','NTM_RevGrowth_3mChg_SOM','NTM_Rev_3mChg_SOM',
                  'NTM_EBITDA_Margin_3mChg_SOM','Sales_EV_1y_Fwd_SOM',
                  'EBITDA_EV_1y_Fwd_SOM','EY_1y_Fwd_SOM','EY_2y_Fwd_SOM','BY_1y_Fwd_SOM','CoE_SOM']            

# Initialize the final result DataFrame with unique Dates
LS_results_df = pd.DataFrame(df['Date'].unique(), columns=['Date'])

# Loop over each target column
for target_column in target_columns:
    
    # Ensure both the target column and 'TR' are numeric
    df[target_column] = pd.to_numeric(df[target_column], errors='coerce')
    df['TR'] = pd.to_numeric(df['TR'], errors='coerce')
    
    # List to collect results for this target column
    results = []
    
    # Group by 'Date' and compute median and means for 'Long_' and 'Short_' prefixed series
    for date, group in df.groupby('Date'):
        median_value = group[target_column].median()
        
        # Long: mean TR where target_column is above the median
        long_mean_tr = group[group[target_column] > median_value]['TR'].mean()
        
        # Short: mean TR where target_column is below the median
        short_mean_tr = group[group[target_column] < median_value]['TR'].mean()
        
        # Append the results for each date
        results.append({
            'Date': date,
            f'Long_{target_column}': long_mean_tr,  # Dynamically naming the column
            f'Short_{target_column}': short_mean_tr  # Dynamically naming the column
        })
    
    # Create a DataFrame with the results for the current target column
    target_df = pd.DataFrame(results)
    
    # Add the "LS_<target_column>" column
    target_df[f'LS_{target_column}'] = (1 + target_df[f'Long_{target_column}']) / (1 + target_df[f'Short_{target_column}']) - 1
    
    # Merge the current results with the final results DataFrame
    LS_results_df = pd.merge(LS_results_df, target_df, on='Date', how='left')

# Output the final result
#print(LS_results_df)

# Export to xlxs
full_export_path = export_path + export_LS_file_name 
LS_results_df.to_excel(full_export_path, index=False)

In [4]:
### VALUATION DISPERSION BY VARIABLE ###


# List of target columns you want to iterate over
target_columns = ['NTM_RevGrowth', '2y_RevGrowth',
                  'NTM_EBITDA_Margin','2y_EBITDA_Margin',
                  'RoE_1y_Fwd','RoE_2y_Fwd',
                  'CoE','Assumed_G',
                  'Sales_EV_1y_Fwd','EBITDA_EV_1y_Fwd','EY_1y_Fwd','EY_2y_Fwd',]

# Initialize the final result DataFrame with unique Dates
dispersion_results_df = pd.DataFrame(df['Date'].unique(), columns=['Date'])

# Loop over each target column
for target_column in target_columns:
    
    # Ensure both the target column and 'TR' are numeric
    df[target_column] = pd.to_numeric(df[target_column], errors='coerce')
    
    # List to collect results for this target column
    results = []
    
    # Group by 'Date' and compute median and means for 'Long_' and 'Short_' prefixed series
    for date, group in df.groupby('Date'):
        median_value = group[target_column].median()
        
        # Long: mean of target_column where target_column is above the median
        long_mean_target = group[group[target_column] > median_value][target_column].mean()
        
        # Short: mean of target_column where target_column is below the median
        short_mean_target = group[group[target_column] < median_value][target_column].mean()
        
        # Append the results for each date
        results.append({
            'Date': date,
            f'above_{target_column}': long_mean_target,  # Dynamically naming the column
            f'below_{target_column}': short_mean_target  # Dynamically naming the column
        })
    
    # Create a DataFrame with the results for the current target column
    target_df = pd.DataFrame(results)
    
    # Add the "LS_<target_column>" column
    target_df[f'spread_{target_column}'] = (1 + target_df[f'above_{target_column}']) / (1 + target_df[f'below_{target_column}']) - 1
    
    # Merge the current results with the final results DataFrame
    dispersion_results_df = pd.merge(dispersion_results_df, target_df, on='Date', how='left')

# Output the final result
#print(dispersion_results_df)

# Export to xlxs
full_export_path = export_path + export_dispersion_file_name 
dispersion_results_df.to_excel(full_export_path, index=False)

In [5]:
# examine stock by stock data for a single month
examine_data = df[(df['Date'] >= '2024-09-30') & (df['Date'] <= '2024-09-30')]

# Export to xlsx
full_export_path = export_path + export_examine_CSV_file_name
examine_data.to_excel(full_export_path, index=False)