In [1]:
pip install pandas statsmodels numpy tqdm


Note: you may need to restart the kernel to use updated packages.


In [5]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
from tqdm import tqdm

# Load the datasets
alpha_data = pd.read_excel('Alpha_Results_No_Outliers.xlsx')  # Adjust the file path as needed
merged_data = pd.read_excel('Merged_Funds_Factors_Data.xlsx')  # Adjust the file path as needed

# Ensure the 'Date' columns are in datetime format
alpha_data['Date'] = pd.to_datetime(alpha_data['Date'])
merged_data['Date'] = pd.to_datetime(merged_data['Date'])

# Merge the datasets on 'ISIN' and 'Date'
data = pd.merge(alpha_data, merged_data, on=['ISIN', 'Date'])

# Filter the data for the relevant periods (3 years before and after SFDR)
pre_sfdr = data[data['Date'] < '2021-03-10']
post_sfdr = data[data['Date'] >= '2021-03-10']

# Create a treatment variable for SFDR classification
# Assuming Article 8 and Article 9 are treated groups
data['Treatment'] = np.where(data['Classification'].isin(['Article 8', 'Article 9']), 1, 0)

# Create a post-treatment period variable
data['Post'] = np.where(data['Date'] >= '2021-03-10', 1, 0)

# Interaction term between treatment and post period
data['Treatment_Post'] = data['Treatment'] * data['Post']

# Define the independent variables for the regression
X = data[['Treatment', 'Post', 'Treatment_Post']]
X = sm.add_constant(X)  # Adds a constant term to the predictor

# Define the dependent variable
y = data['Alpha']

# Run the D-i-D regression
model = sm.OLS(y, X).fit()

# Print the summary of the regression results
print(model.summary())

# Save the regression results to an Excel file
with pd.ExcelWriter('DiD_Results.xlsx') as writer:
    model_summary = model.summary2().tables[1]
    model_summary.to_excel(writer, sheet_name='DiD_Results')

# Provide a progress bar for the merging and filtering process
with tqdm(total=100) as pbar:
    pbar.update(50)
    data = pd.merge(alpha_data, merged_data, on=['ISIN', 'Date'])
    pbar.update(25)
    pre_sfdr = data[data['Date'] < '2021-03-10']
    post_sfdr = data[data['Date'] >= '2021-03-10']
    pbar.update(25)

# Save the filtered datasets for verification
pre_sfdr.to_excel('Pre_SFDR_Data.xlsx', index=False)
post_sfdr.to_excel('Post_SFDR_Data.xlsx', index=False)


KeyError: 'Date'