In [13]:
import numpy as np
import statsmodels.api as sm

# Simulate some data
np.random.seed(42)

#  X - Exogenous variable
X = np.random.randn(100, 2)

# y - Endogenous variable
endogenous_variable = 0.5 * X[:, 0] + np.random.randn(100)

# Instrumental variable
instrument = np.random.randn(100, 2)

# First stage regression
first_stage_model = sm.OLS(endogenous_variable, sm.add_constant(instrument)).fit()
predicted_endogenous = first_stage_model.predict(sm.add_constant(instrument))

# Second stage regression (2SLS)
iv_model = sm.OLS(endogenous_variable, sm.add_constant(predicted_endogenous)).fit()

# Print results
print("First Stage Regression Results:")
print(first_stage_model.summary())
# print("\nPredicted Endogenous Variable:")
# print(predicted_endogenous)

print("\nIV Regression Results (2SLS):")
print(iv_model.summary())


First Stage Regression Results:
                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.029
Model:                            OLS   Adj. R-squared:                  0.009
Method:                 Least Squares   F-statistic:                     1.460
Date:                Sat, 16 Dec 2023   Prob (F-statistic):              0.237
Time:                        10:43:06   Log-Likelihood:                -159.98
No. Observations:                 100   AIC:                             326.0
Df Residuals:                      97   BIC:                             333.8
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.014

In [30]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
from statsmodels.iolib.summary2 import summary_col

# Simulate some data
np.random.seed(42)

# Exogenous variable
X = np.random.randn(100, 2)

# Endogenous variable
endogenous_variable = 0.5 * X[:, 0] + np.random.randn(100)

# Instrumental variable
instrument = np.random.randn(100, 2)

# First stage regression
first_stage_model = sm.OLS(endogenous_variable, sm.add_constant(instrument)).fit()
predicted_endogenous = first_stage_model.predict(sm.add_constant(instrument))

# Second stage regression (2SLS)
iv_model = sm.OLS(endogenous_variable, sm.add_constant(predicted_endogenous)).fit()


In [32]:
custom_metrics

['rsquared', 'fvalue', 'f_pvalue']

In [40]:
summary_first_stage

Unnamed: 0,Coef.,Std.Err.,t,P>|t|,[0.025,0.975]
const,0.014568,0.121862,0.119547,0.905089,-0.227293,0.25643
x1,-0.105954,0.117381,-0.90265,0.368948,-0.338924,0.127015
x2,0.200483,0.133914,1.497095,0.137616,-0.0653,0.466266


In [None]:

# Create summary tables
summary_first_stage = first_stage_model.summary2().tables[1]
summary_iv_model = iv_model.summary2().tables[1]

# Customize metrics
custom_metrics = ['rsquared', 'fvalue', 'f_pvalue']  # Add metrics as needed

# Extract the relevant rows from the tables
summary_first_stage_custom = summary_first_stage.loc[custom_metrics]
summary_iv_model_custom = summary_iv_model.loc[custom_metrics]

# Combine the customized tables
custom_summary_table = pd.concat([summary_first_stage_custom, summary_iv_model_custom], axis=1)

# Rename the columns
custom_summary_table.columns = ['First Stage', 'IV Model']

print(custom_summary_table)


In [21]:
first_stage_model.summary2().tables[0]

Unnamed: 0,0,1,2,3
0,Model:,OLS,Adj. R-squared:,0.009
1,Dependent Variable:,y,AIC:,325.967
2,Date:,2023-12-16 10:51,BIC:,333.7825
3,No. Observations:,100,Log-Likelihood:,-159.98
4,Df Model:,2,F-statistic:,1.46
5,Df Residuals:,97,Prob (F-statistic):,0.237
6,R-squared:,0.029,Scale:,1.4803


In [22]:
first_stage_model.summary2().tables[1]

Unnamed: 0,Coef.,Std.Err.,t,P>|t|,[0.025,0.975]
const,0.014568,0.121862,0.119547,0.905089,-0.227293,0.25643
x1,-0.105954,0.117381,-0.90265,0.368948,-0.338924,0.127015
x2,0.200483,0.133914,1.497095,0.137616,-0.0653,0.466266


In [23]:
first_stage_model.summary2().tables[2]

Unnamed: 0,0,1,2,3
0,Omnibus:,0.188,Durbin-Watson:,2.21
1,Prob(Omnibus):,0.91,Jarque-Bera (JB):,0.136
2,Skew:,0.087,Prob(JB):,0.934
3,Kurtosis:,2.952,Condition No.:,1.0


In [24]:
len(first_stage_model.summary2().tables)

3

In [43]:
first_stage_model.summary2()

0,1,2,3
Model:,OLS,Adj. R-squared:,0.009
Dependent Variable:,y,AIC:,325.967
Date:,2023-12-16 11:04,BIC:,333.7825
No. Observations:,100,Log-Likelihood:,-159.98
Df Model:,2,F-statistic:,1.46
Df Residuals:,97,Prob (F-statistic):,0.237
R-squared:,0.029,Scale:,1.4803

0,1,2,3,4,5,6
,Coef.,Std.Err.,t,P>|t|,[0.025,0.975]
const,0.0146,0.1219,0.1195,0.9051,-0.2273,0.2564
x1,-0.1060,0.1174,-0.9027,0.3689,-0.3389,0.1270
x2,0.2005,0.1339,1.4971,0.1376,-0.0653,0.4663

0,1,2,3
Omnibus:,0.188,Durbin-Watson:,2.21
Prob(Omnibus):,0.91,Jarque-Bera (JB):,0.136
Skew:,0.087,Prob(JB):,0.934
Kurtosis:,2.952,Condition No.:,1.0


In [29]:
import pandas as pd
import numpy as np
import statsmodels.api as sm

# Simulate some data
np.random.seed(42)

# Exogenous variable
X = np.random.randn(100, 2)

# Endogenous variable
endogenous_variable = 0.5 * X[:, 0] + np.random.randn(100)

# Instrumental variable
instrument = np.random.randn(100, 2)

# First stage regression
first_stage_model = sm.OLS(endogenous_variable, sm.add_constant(instrument)).fit()
predicted_endogenous = first_stage_model.predict(sm.add_constant(instrument))

# Second stage regression (2SLS)
iv_model = sm.OLS(endogenous_variable, sm.add_constant(predicted_endogenous)).fit()

# Extract relevant metrics
custom_metrics = ['R-squared', 'F-statistic', 'Prob (F-statistic)']

first_stage_metrics = [first_stage_model.rsquared, first_stage_model.fvalue, first_stage_model.f_pvalue]
iv_model_metrics = [iv_model.rsquared, iv_model.fvalue, iv_model.f_pvalue]

# Create a DataFrame
custom_summary_table = pd.DataFrame(
    {'First Stage': first_stage_metrics, 'IV Model': iv_model_metrics},
    index=custom_metrics
)

# Print the custom summary table
print(custom_summary_table)


                    First Stage  IV Model
R-squared              0.029230  0.029230
F-statistic            1.460361  2.950833
Prob (F-statistic)     0.237211  0.088992


In [49]:
first_stage_model.summary2()

0,1,2,3
Model:,OLS,Adj. R-squared:,0.009
Dependent Variable:,y,AIC:,325.967
Date:,2023-12-16 11:09,BIC:,333.7825
No. Observations:,100,Log-Likelihood:,-159.98
Df Model:,2,F-statistic:,1.46
Df Residuals:,97,Prob (F-statistic):,0.237
R-squared:,0.029,Scale:,1.4803

0,1,2,3,4,5,6
,Coef.,Std.Err.,t,P>|t|,[0.025,0.975]
const,0.0146,0.1219,0.1195,0.9051,-0.2273,0.2564
x1,-0.1060,0.1174,-0.9027,0.3689,-0.3389,0.1270
x2,0.2005,0.1339,1.4971,0.1376,-0.0653,0.4663

0,1,2,3
Omnibus:,0.188,Durbin-Watson:,2.21
Prob(Omnibus):,0.91,Jarque-Bera (JB):,0.136
Skew:,0.087,Prob(JB):,0.934
Kurtosis:,2.952,Condition No.:,1.0
