In [9]:
import pandas as pd
import statsmodels.api as sm

# Load the merged dataset
merged_data = pd.read_excel('Merged_Funds_Factors_Data.xlsx')

# Define the periods
period_1_start = '2018-03-31'
period_1_end = '2021-03-31'
period_2_start = '2021-03-31'
period_2_end = '2024-03-31'

# Filter the data for each period
period_1_data = merged_data[(merged_data['Date'] >= period_1_start) & (merged_data['Date'] <= period_1_end)]
period_2_data = merged_data[(merged_data['Date'] >= period_2_start) & (merged_data['Date'] <= period_2_end)]

# Function to run regression for each fund and period
def run_regression(data, period_name):
    results = []
    funds = data['ISIN'].unique()
    for fund in funds:
        fund_data = data[data['ISIN'] == fund]
        X = fund_data[['Mkt-RF', 'SMB', 'HML', 'MOM', 'RMW', 'CMA']]
        y = fund_data['Return'] - fund_data['RF']
        X = sm.add_constant(X)
        model = sm.OLS(y, X).fit()
        beta_1 = model.params['Mkt-RF']
        results.append({'ISIN': fund, 'Period': period_name, 'Beta_1': beta_1, 'Model Summary': model.summary()})
    return results

# Run regression for each period
results_period_1 = run_regression(period_1_data, '2018-2021')
results_period_2 = run_regression(period_2_data, '2021-2024')

# Convert results to DataFrame
results_df_1 = pd.DataFrame(results_period_1)
results_df_2 = pd.DataFrame(results_period_2)

# Save the results
results_df_1.to_excel('Beta_1_Results_2018_2021.xlsx', index=False)
results_df_2.to_excel('Beta_1_Results_2021_2024.xlsx', index=False)

print("Regression results saved as 'Beta_1_Results_2018_2021.xlsx' and 'Beta_1_Results_2021_2024.xlsx'.")


Regression results saved as 'Beta_1_Results_2018_2021.xlsx' and 'Beta_1_Results_2021_2024.xlsx'.
