In [188]:
import pandas as pd
import numpy as np

from sklearn.linear_model import LinearRegression
import statsmodels.api as sm


In [98]:
vs = pd.read_excel('vs_method.xlsx', sheet_name=None)

vs_dct = {}
for i in list(vs.keys()):
    df = vs[i]
    df['Ticker'] = pd.to_datetime(df['Ticker']) + pd.offsets.BMonthEnd(1)
    vs_dct[i] = df.ffill().fillna(0)
    
y = vs_dct['2024 Return'] #target
del vs_dct['2024 Return']
x = vs_dct

rebal_dt = x['Active EQ MF_Ret']['Ticker']

In [189]:
reg_x = {}
for row in range(0,len(rebal_dt)):
    _dct = {}
    for k, v in x.items():
        _dct[k] = x[k].iloc[row]
    reg_x[row] = _dct

reg_y = {}
for row in range(0,len(rebal_dt)):
    _dct = {}
    for k, v in y.items():
        _dct[k] = y[k].iloc[row]
    reg_y[row] = _dct
  
    
results = {}
for rebal in range(0,len(rebal_dt)):
    reg_df = pd.concat([pd.DataFrame(reg_x[0]),pd.Series(reg_y[0])],axis=1)[1:]
    reg_df.rename(columns = {0:"Target"},inplace=True)
    
    #regression code block
    
sensitivity_results_clean = []

for rebal in range(len(rebal_dt)):
    reg_df = pd.concat([pd.DataFrame(reg_x[rebal]), pd.Series(reg_y[rebal])], axis=1)
    reg_df.columns = list(reg_df.columns[:-1]) + ['Target']
    reg_df = reg_df.dropna()
    
    if reg_df.shape[0] > 1:
        reg_df = reg_df.apply(pd.to_numeric, errors='coerce')  # force all columns to numeric
        reg_df = reg_df.dropna()

        X = reg_df.drop(columns='Target')
        y_target = reg_df['Target']
        X = sm.add_constant(X)

        try:
            model = sm.OLS(y_target, X).fit()
            coeffs = model.params.to_dict()
            coeffs['RebalDate'] = rebal_dt[rebal]
            sensitivity_results_clean.append(coeffs)
        except Exception as e:
            continue  # skip if regression fails due to data issues

# Convert to DataFrame and display
sensitivity_df_clean = pd.DataFrame(sensitivity_results_clean)

In [193]:
standardized_results = []

for _, row in sensitivity_df_clean.iterrows():
    row_data = row.drop(labels=['const', 'RebalDate'])
    abs_sum = np.sum(np.abs(row_data))
    if abs_sum != 0:
        standardized_row = (np.abs(row_data) / abs_sum) * 100
    else:
        standardized_row = row_data * 0  # handle zero division
    standardized_row['RebalDate'] = row['RebalDate']
    standardized_results.append(standardized_row)

standardized_df = pd.DataFrame(standardized_results)


In [194]:
standardized_df

Unnamed: 0,Active EQ MF_Ret,Active EQ MF_IR,Active EQ MF_SR,Active EQ MF_TE,Active EQ MF_Exp,Active EQ MF_PEG,Active EQ MF_Medalist,RebalDate
0,6.323278,11.080285,70.674975,0.033382,6.337818,0.619006,4.931255,2019-01-31
1,2.629991,21.61219,48.605664,0.020858,8.228607,12.819422,6.083269,2019-02-28
2,7.592327,6.024673,63.186977,4.375692,10.765946,3.836408,4.217976,2019-03-29
3,7.09087,10.607619,69.609683,3.938044,4.355862,4.321471,0.07645,2019-04-30
4,6.048009,7.134243,77.844534,2.485793,4.533773,0.873243,1.080405,2019-05-31
5,5.775069,12.894533,73.7266,2.529276,4.453149,0.184156,0.437217,2019-06-28
6,5.764779,12.189496,76.661205,1.69859,2.442273,0.479954,0.763703,2019-07-31
7,5.72658,10.015245,79.479725,1.331765,1.583993,1.119394,0.743298,2019-08-30
8,5.371662,10.007966,75.68751,1.226519,4.225064,2.495364,0.985916,2019-09-30
9,4.919479,16.579255,64.780472,1.416384,7.133404,4.761035,0.409971,2019-10-31


In [195]:
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression

# Collect standardized beta coefficients per rebal date
standardized_beta_results = []

for rebal in range(len(rebal_dt)):
    reg_df = pd.concat([pd.DataFrame(reg_x[rebal]), pd.Series(reg_y[rebal])], axis=1)
    reg_df.columns = list(reg_df.columns[:-1]) + ['Target']
    reg_df = reg_df.dropna()
    
    if reg_df.shape[0] > 1:
        reg_df = reg_df.apply(pd.to_numeric, errors='coerce').dropna()
        X = reg_df.drop(columns='Target')
        y_target = reg_df['Target']
        
        # Standardize both X and y
        scaler_X = StandardScaler()
        scaler_y = StandardScaler()
        X_scaled = scaler_X.fit_transform(X)
        y_scaled = scaler_y.fit_transform(y_target.values.reshape(-1, 1)).ravel()

        model = LinearRegression().fit(X_scaled, y_scaled)
        beta_coeffs = dict(zip(X.columns, model.coef_))
        beta_coeffs['RebalDate'] = rebal_dt[rebal]
        standardized_beta_results.append(beta_coeffs)

# Convert to DataFrame and display
standardized_beta_df = pd.DataFrame(standardized_beta_results)

In [196]:
standardized_beta_df

Unnamed: 0,Active EQ MF_Ret,Active EQ MF_IR,Active EQ MF_SR,Active EQ MF_TE,Active EQ MF_Exp,Active EQ MF_PEG,Active EQ MF_Medalist,RebalDate
0,-1.364351,0.208563,1.087409,-0.006126,0.156996,0.018015,0.262829,2019-01-31
1,-0.386532,0.275391,0.513195,0.002472,0.138857,0.270248,0.224136,2019-02-28
2,1.422075,0.119661,-0.892765,-0.740349,0.260556,0.124465,0.220702,2019-03-29
3,1.982862,0.326328,-1.471668,-0.914982,0.144175,0.180774,0.005825,2019-04-30
4,2.762822,0.404399,-2.540749,-1.139051,0.288857,0.071053,0.166122,2019-05-31
5,2.043114,0.518848,-1.892896,-0.744458,0.183821,0.010424,0.041503,2019-06-28
6,2.323038,0.61051,-2.2817,-0.625171,0.125162,0.0354,0.086119,2019-07-31
7,2.7202,0.66329,-2.802878,-0.586003,0.098138,-0.102007,0.098287,2019-08-30
8,2.764046,0.691756,-3.002779,-0.649689,0.329812,-0.2472,0.165593,2019-09-30
9,1.418954,0.615419,-1.53051,-0.375557,0.281413,-0.252124,-0.027002,2019-10-31
