In [8]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
import statsmodels.formula.api as smf


In [9]:

# Load your data
# Replace 'your_file_path.xlsx' with the path to your Excel file
data = pd.read_excel('../data/USD_CAD_combined.xlsx')

data['Maturity'] = pd.to_datetime(data['Maturity'])

In [10]:

# Convert Notional to log notional
data['Ln_notional'] = np.log(data['Not.'])

# Convert 'Maturity' to a numerical format (e.g., days since the earliest date)
start_date = data['Maturity'].min()
data['Days_since_start'] = (data['Maturity'] - start_date).dt.days


data['Days_Between'] = (data['Effective'] - data['Trade Date']).dt.days
data = data[(data['Days_Between'] >= 0) & (data['Days_Between'] <= 90)]
data = data[data['Day Name'] != 'Sunday']
data = data[data['Day Name'] != 'Saturday']


data['tenor'] = round((data['Maturity'] - data['Effective']).dt.days / 365)
data['tenor'] = data['tenor'].astype('category')
advanced_did_model_formula = 'Difference ~ Group * Period + tenor + Ln_notional + Capped + Q("Trade Hour Categorical") + Q("Day Name")'

# Convert specified variables to categorical
categorical_columns = ['Group', 'Phase', 'Period', 'Capped', 'SEF', 'Day Name', 'Trade Hour Categorical']
for col in categorical_columns:
    data[col] = data[col].astype('category')

# Setting the categories for 'Day Name' and 'Trade Hour Categorical'
data['Day Name'] = data['Day Name'].cat.set_categories(["Wednesday", "Monday", "Tuesday", "Thursday", "Friday"])
data['Trade Hour Categorical'] = data['Trade Hour Categorical'].cat.set_categories(["Mid Day", "Morning", "Afternoon", "Off Hours"])


In [11]:
# Rerun the advanced DiD model
advanced_did_model = smf.ols(advanced_did_model_formula, data=data).fit()

# Displaying the summary of the model
print(advanced_did_model.summary())

advanced_did_model.params


                            OLS Regression Results                            
Dep. Variable:             Difference   R-squared:                       0.019
Model:                            OLS   Adj. R-squared:                  0.017
Method:                 Least Squares   F-statistic:                     8.605
Date:                Tue, 23 Jan 2024   Prob (F-statistic):           1.53e-63
Time:                        01:06:38   Log-Likelihood:            -1.0985e+05
No. Observations:               22786   AIC:                         2.198e+05
Df Residuals:                   22733   BIC:                         2.202e+05
Df Model:                          52                                         
Covariance Type:            nonrobust                                         
                                               coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------------------

Intercept                                  -2.307075e-01
Group[T.1]                                  8.028439e+00
Period[T.1]                                -1.262675e+00
tenor[T.1.0]                                1.002452e+01
tenor[T.2.0]                                9.202896e+00
tenor[T.3.0]                                5.597752e+00
tenor[T.4.0]                                3.031228e+00
tenor[T.5.0]                                6.916127e+00
tenor[T.6.0]                                6.119157e+00
tenor[T.7.0]                                4.338385e+00
tenor[T.8.0]                                5.844591e+00
tenor[T.9.0]                                5.880376e+00
tenor[T.10.0]                               5.074751e+00
tenor[T.11.0]                               4.944240e+00
tenor[T.12.0]                              -2.252100e+00
tenor[T.13.0]                              -3.226627e+01
tenor[T.14.0]                               1.239835e+00
tenor[T.15.0]                  