In [1]:
# Importing necessary packages
import pandas as pd
import numpy as np
import statsmodels.api as sm

**If using Stargazer to export LaTeX code:** \
!pip install Stargazer \
from stargazer.stargazer import Stargazer

In [2]:
data = pd.read_csv('factordata_minus_portfolios.csv')

# Multiplying by Indicators
data['IndGMB_U'] = data['GMB_U'] * data['Method_Estimated']
data['IndGMB_S'] = data['GMB_S'] * data['Method_Estimated']
data['IndMkt-RF'] = data['Mkt-RF'] * data['Method_Estimated']
data['IndSMB'] = data['SMB'] * data['Method_Estimated']
data['IndHML'] = data['HML'] * data['Method_Estimated']
data['IndRMW'] = data['RMW'] * data['Method_Estimated']
data['IndCMA'] = data['CMA'] * data['Method_Estimated']

print(data)

           cusip  year  Month  monthyear  Mkt-RF   SMB   HML    RF   RMW  \
0       46603210  2015      1     201501   -3.11 -0.92 -3.59  0.00  1.61   
1       46603210  2015      2     201502    6.13  0.32 -1.86  0.00 -1.12   
2       46603210  2015      3     201503   -1.12  3.07 -0.38  0.00  0.09   
3       46603210  2015      4     201504    0.59 -3.09  1.82  0.00  0.06   
4       46603210  2015      5     201505    1.36  0.84 -1.15  0.00 -1.80   
...          ...   ...    ...        ...     ...   ...   ...   ...   ...   
247688  12503M10  2023      8     202308   -2.39 -3.68 -1.08  0.45  3.42   
247689  12503M10  2023      9     202309   -5.24 -1.79  1.45  0.43  1.85   
247690  12503M10  2023     10     202310   -3.18 -4.05  0.19  0.47  2.47   
247691  12503M10  2023     11     202311    8.83 -0.11  1.66  0.44 -3.81   
247692  12503M10  2023     12     202312    4.87  7.33  4.92  0.43 -3.04   

         CMA  ...  Method_Estimated     GMB_U     GMB_S  IndGMB_U  IndGMB_S  \
0      -

In [3]:
# Get sector dummies!
sectordummies = pd.get_dummies(data['sector'], dtype = int, prefix='sector')
# Concatenate dummy variables with main DataFrame
withsectors = pd.concat([data, sectordummies], axis=1)

# Multiplying by Indicators (REMOVING RETAIL (median))
withsectors['IndU_Acc'] = withsectors['GMB_U'] * withsectors['sector_AccomFood']
withsectors['IndU_Cons'] = withsectors['GMB_U'] * withsectors['sector_Construction']
withsectors['IndU_Fin'] = withsectors['GMB_U'] * withsectors['sector_FinanceIns']
withsectors['IndU_HC'] = withsectors['GMB_U'] * withsectors['sector_Healthcare']
withsectors['IndU_Info'] = withsectors['GMB_U'] * withsectors['sector_Information']
withsectors['IndU_Man'] = withsectors['GMB_U'] * withsectors['sector_Manufacturing']
withsectors['IndU_Oil'] = withsectors['GMB_U'] * withsectors['sector_OilGas']
withsectors['IndU_RE'] = withsectors['GMB_U'] * withsectors['sector_RealEstate']
withsectors['IndU_Serv'] = withsectors['GMB_U'] * withsectors['sector_Services']
withsectors['IndU_Transp'] = withsectors['GMB_U'] * withsectors['sector_TransportWarehouse']
withsectors['IndU_Util'] = withsectors['GMB_U'] * withsectors['sector_Utilities']
withsectors['IndU_Whol'] = withsectors['GMB_U'] * withsectors['sector_Wholesale']

withsectors['IndS_Acc'] = withsectors['GMB_S'] * withsectors['sector_AccomFood']
withsectors['IndS_Cons'] = withsectors['GMB_S'] * withsectors['sector_Construction']
withsectors['IndS_Fin'] = withsectors['GMB_S'] * withsectors['sector_FinanceIns']
withsectors['IndS_HC'] = withsectors['GMB_S'] * withsectors['sector_Healthcare']
withsectors['IndS_Info'] = withsectors['GMB_S'] * withsectors['sector_Information']
withsectors['IndS_Man'] = withsectors['GMB_S'] * withsectors['sector_Manufacturing']
withsectors['IndS_Oil'] = withsectors['GMB_S'] * withsectors['sector_OilGas']
withsectors['IndS_RE'] = withsectors['GMB_S'] * withsectors['sector_RealEstate']
withsectors['IndS_Serv'] = withsectors['GMB_S'] * withsectors['sector_Services']
withsectors['IndS_Transp'] = withsectors['GMB_S'] * withsectors['sector_TransportWarehouse']
withsectors['IndS_Util'] = withsectors['GMB_S'] * withsectors['sector_Utilities']
withsectors['IndS_Whol'] = withsectors['GMB_S'] * withsectors['sector_Wholesale']

In [4]:
# Get time dummies!
yeardummies = pd.get_dummies(data['year'], dtype = int, prefix='year')
# Drop first year (2002) to avoid multicollinearity with time dummies
yeardummies.drop('year_2002', axis=1, inplace=True)
# Concatenate dummy variables with main DataFrame
factordata = pd.concat([withsectors, yeardummies], axis=1)

# Table 1: Baseline

**Just the FF3**

In [5]:
T1X1 = sm.add_constant(factordata[['Mkt-RF', 'SMB', 'HML',
                                   'year_2003', 'year_2004', 'year_2005', 'year_2006', 'year_2007', 
                                   'year_2008', 'year_2009', 'year_2010', 'year_2011', 'year_2012', 
                                   'year_2013', 'year_2014', 'year_2015', 'year_2016', 'year_2017', 
                                   'year_2018', 'year_2019', 'year_2020', 'year_2021', 'year_2022', 'year_2023']])
T1y1 = factordata["ExcessReturn"]
T1model1 = sm.OLS(T1y1, T1X1).fit(cov_type='HC1')
print(T1model1.summary())

                            OLS Regression Results                            
Dep. Variable:           ExcessReturn   R-squared:                       0.530
Model:                            OLS   Adj. R-squared:                  0.530
Method:                 Least Squares   F-statistic:                 1.466e+04
Date:                Wed, 09 Apr 2025   Prob (F-statistic):               0.00
Time:                        22:57:25   Log-Likelihood:             1.5732e+05
No. Observations:              247693   AIC:                        -3.146e+05
Df Residuals:                  247668   BIC:                        -3.143e+05
Df Model:                          24                                         
Covariance Type:                  HC1                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const         -0.1284      0.002    -56.491      0.0

**Just the FF5**

In [6]:
T1X2 = sm.add_constant(factordata[['Mkt-RF', 'SMB', 'HML', 'RMW', 'CMA',
                                   'year_2003', 'year_2004', 'year_2005', 'year_2006', 'year_2007', 
                                   'year_2008', 'year_2009', 'year_2010', 'year_2011', 'year_2012', 
                                   'year_2013', 'year_2014', 'year_2015', 'year_2016', 'year_2017', 
                                   'year_2018', 'year_2019', 'year_2020', 'year_2021', 'year_2022', 'year_2023']])
T1y2 = factordata["ExcessReturn"]
T1model2 = sm.OLS(T1y2, T1X2).fit(cov_type='HC1')
print(T1model2.summary())

                            OLS Regression Results                            
Dep. Variable:           ExcessReturn   R-squared:                       0.531
Model:                            OLS   Adj. R-squared:                  0.531
Method:                 Least Squares   F-statistic:                 1.363e+04
Date:                Wed, 09 Apr 2025   Prob (F-statistic):               0.00
Time:                        22:57:25   Log-Likelihood:             1.5755e+05
No. Observations:              247693   AIC:                        -3.150e+05
Df Residuals:                  247666   BIC:                        -3.148e+05
Df Model:                          26                                         
Covariance Type:                  HC1                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const         -0.1248      0.002    -53.297      0.0

**FF3 + GMB_U**

In [7]:
T1X3 = sm.add_constant(factordata[['Mkt-RF', 'SMB', 'HML', 'GMB_U',
                                   'year_2003', 'year_2004', 'year_2005', 'year_2006', 'year_2007', 
                                   'year_2008', 'year_2009', 'year_2010', 'year_2011', 'year_2012', 
                                   'year_2013', 'year_2014', 'year_2015', 'year_2016', 'year_2017', 
                                   'year_2018', 'year_2019', 'year_2020', 'year_2021', 'year_2022', 'year_2023']])
T1y3 = factordata["ExcessReturn"]
T1model3 = sm.OLS(T1y3, T1X3).fit(cov_type='HC1')
print(T1model3.summary())

                            OLS Regression Results                            
Dep. Variable:           ExcessReturn   R-squared:                       0.534
Model:                            OLS   Adj. R-squared:                  0.534
Method:                 Least Squares   F-statistic:                 1.408e+04
Date:                Wed, 09 Apr 2025   Prob (F-statistic):               0.00
Time:                        22:57:25   Log-Likelihood:             1.5824e+05
No. Observations:              247693   AIC:                        -3.164e+05
Df Residuals:                  247667   BIC:                        -3.162e+05
Df Model:                          25                                         
Covariance Type:                  HC1                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const         -0.1289      0.002    -56.507      0.0

**FF5 + GMB_U**

In [8]:
T1X4 = sm.add_constant(factordata[['Mkt-RF', 'SMB', 'HML', 'RMW', 'CMA', 'GMB_U',
                                   'year_2003', 'year_2004', 'year_2005', 'year_2006', 'year_2007', 
                                   'year_2008', 'year_2009', 'year_2010', 'year_2011', 'year_2012', 
                                   'year_2013', 'year_2014', 'year_2015', 'year_2016', 'year_2017', 
                                   'year_2018', 'year_2019', 'year_2020', 'year_2021', 'year_2022', 'year_2023']])
T1y4 = factordata["ExcessReturn"]
T1model4 = sm.OLS(T1y4, T1X4).fit(cov_type='HC1')
print(T1model4.summary())

                            OLS Regression Results                            
Dep. Variable:           ExcessReturn   R-squared:                       0.536
Model:                            OLS   Adj. R-squared:                  0.536
Method:                 Least Squares   F-statistic:                 1.316e+04
Date:                Wed, 09 Apr 2025   Prob (F-statistic):               0.00
Time:                        22:57:26   Log-Likelihood:             1.5878e+05
No. Observations:              247693   AIC:                        -3.175e+05
Df Residuals:                  247665   BIC:                        -3.172e+05
Df Model:                          27                                         
Covariance Type:                  HC1                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const         -0.1176      0.002    -49.857      0.0

**FF3 + GMB_S**

In [9]:
T1X5 = sm.add_constant(factordata[['Mkt-RF', 'SMB', 'HML', 'GMB_S',
                                   'year_2003', 'year_2004', 'year_2005', 'year_2006', 'year_2007', 
                                   'year_2008', 'year_2009', 'year_2010', 'year_2011', 'year_2012', 
                                   'year_2013', 'year_2014', 'year_2015', 'year_2016', 'year_2017', 
                                   'year_2018', 'year_2019', 'year_2020', 'year_2021', 'year_2022', 'year_2023']])
T1y5 = factordata["ExcessReturn"]
T1model5 = sm.OLS(T1y5, T1X5).fit(cov_type='HC1')
print(T1model5.summary())

                            OLS Regression Results                            
Dep. Variable:           ExcessReturn   R-squared:                       0.533
Model:                            OLS   Adj. R-squared:                  0.533
Method:                 Least Squares   F-statistic:                 1.411e+04
Date:                Wed, 09 Apr 2025   Prob (F-statistic):               0.00
Time:                        22:57:26   Log-Likelihood:             1.5798e+05
No. Observations:              247693   AIC:                        -3.159e+05
Df Residuals:                  247667   BIC:                        -3.156e+05
Df Model:                          25                                         
Covariance Type:                  HC1                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const         -0.1299      0.002    -57.139      0.0

**FF5 + GMB_S**

In [10]:
T1X6 = sm.add_constant(factordata[['Mkt-RF', 'SMB', 'HML', 'RMW', 'CMA','GMB_S',
                                   'year_2003', 'year_2004', 'year_2005', 'year_2006', 'year_2007', 
                                   'year_2008', 'year_2009', 'year_2010', 'year_2011', 'year_2012', 
                                   'year_2013', 'year_2014', 'year_2015', 'year_2016', 'year_2017', 
                                   'year_2018', 'year_2019', 'year_2020', 'year_2021', 'year_2022', 'year_2023']])
T1y6 = factordata["ExcessReturn"]
T1model6 = sm.OLS(T1y6, T1X6).fit(cov_type='HC1')
print(T1model6.summary())

                            OLS Regression Results                            
Dep. Variable:           ExcessReturn   R-squared:                       0.535
Model:                            OLS   Adj. R-squared:                  0.535
Method:                 Least Squares   F-statistic:                 1.313e+04
Date:                Wed, 09 Apr 2025   Prob (F-statistic):               0.00
Time:                        22:57:26   Log-Likelihood:             1.5854e+05
No. Observations:              247693   AIC:                        -3.170e+05
Df Residuals:                  247665   BIC:                        -3.167e+05
Df Model:                          27                                         
Covariance Type:                  HC1                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const         -0.1210      0.002    -51.340      0.0

In [11]:
# T1stargazer = Stargazer([T1model1, T1model2, T1model3, T1model4, T1model5, T1model6])
# print(T1stargazer.render_latex())

# Table 2: How does the GMB Factor Change with the Sector?

## Table 2a: GMB_U

**(1) FF3 + GMB_U + (Sector Indicators)GMB_U + Time Fixed Effects**

In [12]:
T2aX1 = sm.add_constant(factordata[['Mkt-RF', 'SMB', 'HML', 'GMB_U', 
                          'IndU_Acc', 'IndU_Cons', 'IndU_Fin', 'IndU_HC', 'IndU_Info', 'IndU_Man', 
                          'IndU_Oil', 'IndU_RE', 'IndU_Serv', 'IndU_Transp', 'IndU_Util', 'IndU_Whol',
                          'year_2003', 'year_2004', 'year_2005', 'year_2006', 'year_2007', 
                          'year_2008', 'year_2009', 'year_2010', 'year_2011', 'year_2012', 
                          'year_2013', 'year_2014', 'year_2015', 'year_2016', 'year_2017', 
                          'year_2018', 'year_2019', 'year_2020', 'year_2021', 'year_2022', 'year_2023']])
T2ay1 = factordata["ExcessReturn"]
T2amodel1 = sm.OLS(T2ay1, T2aX1).fit(cov_type='HC1')
print(T2amodel1.summary())

                            OLS Regression Results                            
Dep. Variable:           ExcessReturn   R-squared:                       0.536
Model:                            OLS   Adj. R-squared:                  0.536
Method:                 Least Squares   F-statistic:                     9616.
Date:                Wed, 09 Apr 2025   Prob (F-statistic):               0.00
Time:                        22:57:27   Log-Likelihood:             1.5887e+05
No. Observations:              247693   AIC:                        -3.177e+05
Df Residuals:                  247655   BIC:                        -3.173e+05
Df Model:                          37                                         
Covariance Type:                  HC1                                         
                  coef    std err          z      P>|z|      [0.025      0.975]
-------------------------------------------------------------------------------
const          -0.1289      0.002    -56.369      

**(2) FF3 + GMB_U + (Sector Indicators)GMB_U + Time Fixed Effects + SECTOR FIXED EFFECTS**

In [13]:
T2aX2 = sm.add_constant(factordata[['Mkt-RF', 'SMB', 'HML', 'GMB_U', 
                          'IndU_Acc', 'IndU_Cons', 'IndU_Fin', 'IndU_HC', 'IndU_Info', 'IndU_Man', 
                          'IndU_Oil', 'IndU_RE', 'IndU_Serv', 'IndU_Transp', 'IndU_Util', 'IndU_Whol',
                          'year_2003', 'year_2004', 'year_2005', 'year_2006', 'year_2007', 
                          'year_2008', 'year_2009', 'year_2010', 'year_2011', 'year_2012', 
                          'year_2013', 'year_2014', 'year_2015', 'year_2016', 'year_2017', 
                          'year_2018', 'year_2019', 'year_2020', 'year_2021', 'year_2022', 'year_2023',
                          'sector_AccomFood', 'sector_Construction', 'sector_FinanceIns', 'sector_Healthcare', 
                          'sector_Information', 'sector_Manufacturing', 'sector_OilGas', 'sector_RealEstate', 
                          'sector_Services', 'sector_TransportWarehouse', 
                          'sector_Utilities', 'sector_Wholesale']])
T2ay2 = factordata["ExcessReturn"]
T2amodel2 = sm.OLS(T2ay2, T2aX2).fit(cov_type='HC1')
print(T2amodel2.summary())

                            OLS Regression Results                            
Dep. Variable:           ExcessReturn   R-squared:                       0.536
Model:                            OLS   Adj. R-squared:                  0.536
Method:                 Least Squares   F-statistic:                     7305.
Date:                Wed, 09 Apr 2025   Prob (F-statistic):               0.00
Time:                        22:57:27   Log-Likelihood:             1.5889e+05
No. Observations:              247693   AIC:                        -3.177e+05
Df Residuals:                  247643   BIC:                        -3.172e+05
Df Model:                          49                                         
Covariance Type:                  HC1                                         
                                coef    std err          z      P>|z|      [0.025      0.975]
---------------------------------------------------------------------------------------------
const                 

**(3) FF5 + GMB_U + (Sector Indicators)GMB_U + Time Fixed Effects**

In [14]:
T2aX3 = sm.add_constant(factordata[['Mkt-RF', 'SMB', 'HML', 'RMW', 'CMA', 'GMB_U', 
                          'IndU_Acc', 'IndU_Cons', 'IndU_Fin', 'IndU_HC', 'IndU_Info', 'IndU_Man', 
                          'IndU_Oil', 'IndU_RE', 'IndU_Serv', 'IndU_Transp', 'IndU_Util', 'IndU_Whol',
                          'year_2003', 'year_2004', 'year_2005', 'year_2006', 'year_2007', 
                          'year_2008', 'year_2009', 'year_2010', 'year_2011', 'year_2012', 
                          'year_2013', 'year_2014', 'year_2015', 'year_2016', 'year_2017', 
                          'year_2018', 'year_2019', 'year_2020', 'year_2021', 'year_2022', 'year_2023']])
T2ay3 = factordata["ExcessReturn"]
T2amodel3 = sm.OLS(T2ay3, T2aX3).fit(cov_type='HC1')
print(T2amodel3.summary())

                            OLS Regression Results                            
Dep. Variable:           ExcessReturn   R-squared:                       0.538
Model:                            OLS   Adj. R-squared:                  0.538
Method:                 Least Squares   F-statistic:                     9192.
Date:                Wed, 09 Apr 2025   Prob (F-statistic):               0.00
Time:                        22:57:28   Log-Likelihood:             1.5941e+05
No. Observations:              247693   AIC:                        -3.187e+05
Df Residuals:                  247653   BIC:                        -3.183e+05
Df Model:                          39                                         
Covariance Type:                  HC1                                         
                  coef    std err          z      P>|z|      [0.025      0.975]
-------------------------------------------------------------------------------
const          -0.1177      0.002    -49.780      

**(4) FF5 + GMB_U + (Sector Indicators)GMB_U + Time Fixed Effects + SECTOR FIXED EFFECTS**

In [15]:
T2aX4 = sm.add_constant(factordata[['Mkt-RF', 'SMB', 'HML', 'RMW', 'CMA', 'GMB_U', 
                          'IndU_Acc', 'IndU_Cons', 'IndU_Fin', 'IndU_HC', 'IndU_Info', 'IndU_Man', 
                          'IndU_Oil', 'IndU_RE', 'IndU_Serv', 'IndU_Transp', 'IndU_Util', 'IndU_Whol',
                          'year_2003', 'year_2004', 'year_2005', 'year_2006', 'year_2007', 
                          'year_2008', 'year_2009', 'year_2010', 'year_2011', 'year_2012', 
                          'year_2013', 'year_2014', 'year_2015', 'year_2016', 'year_2017', 
                          'year_2018', 'year_2019', 'year_2020', 'year_2021', 'year_2022', 'year_2023',
                          'sector_AccomFood', 'sector_Construction', 'sector_FinanceIns', 'sector_Healthcare', 
                          'sector_Information', 'sector_Manufacturing', 'sector_OilGas', 'sector_RealEstate', 
                          'sector_Services', 'sector_TransportWarehouse', 
                          'sector_Utilities', 'sector_Wholesale']])
T2ay4 = factordata["ExcessReturn"]
T2amodel4 = sm.OLS(T2ay4, T2aX4).fit(cov_type='HC1')
print(T2amodel4.summary())

                            OLS Regression Results                            
Dep. Variable:           ExcessReturn   R-squared:                       0.538
Model:                            OLS   Adj. R-squared:                  0.538
Method:                 Least Squares   F-statistic:                     7070.
Date:                Wed, 09 Apr 2025   Prob (F-statistic):               0.00
Time:                        22:57:28   Log-Likelihood:             1.5943e+05
No. Observations:              247693   AIC:                        -3.188e+05
Df Residuals:                  247641   BIC:                        -3.182e+05
Df Model:                          51                                         
Covariance Type:                  HC1                                         
                                coef    std err          z      P>|z|      [0.025      0.975]
---------------------------------------------------------------------------------------------
const                 

In [16]:
# T2astargazer = Stargazer([T2amodel1, T2amodel2, T2amodel3, T2amodel4])
# print(T2astargazer.render_latex())

## Table 2b: GMB_S

**(1) FF3 + GMB_S + (Sector Indicators)GMB_S + Time Fixed Effects**

In [17]:
T2bX1 = sm.add_constant(factordata[['Mkt-RF', 'SMB', 'HML', 'GMB_S', 
                          'IndS_Acc', 'IndS_Cons', 'IndS_Fin', 'IndS_HC', 'IndS_Info', 'IndS_Man', 
                          'IndS_Oil', 'IndS_RE', 'IndS_Serv', 'IndS_Transp', 'IndS_Util', 'IndS_Whol',
                          'year_2003', 'year_2004', 'year_2005', 'year_2006', 'year_2007', 
                          'year_2008', 'year_2009', 'year_2010', 'year_2011', 'year_2012', 
                          'year_2013', 'year_2014', 'year_2015', 'year_2016', 'year_2017', 
                          'year_2018', 'year_2019', 'year_2020', 'year_2021', 'year_2022', 'year_2023']])
T2by1 = factordata["ExcessReturn"]
T2bmodel1 = sm.OLS(T2by1, T2bX1).fit(cov_type='HC1')
print(T2bmodel1.summary())

                            OLS Regression Results                            
Dep. Variable:           ExcessReturn   R-squared:                       0.535
Model:                            OLS   Adj. R-squared:                  0.535
Method:                 Least Squares   F-statistic:                     9639.
Date:                Wed, 09 Apr 2025   Prob (F-statistic):               0.00
Time:                        22:57:29   Log-Likelihood:             1.5859e+05
No. Observations:              247693   AIC:                        -3.171e+05
Df Residuals:                  247655   BIC:                        -3.167e+05
Df Model:                          37                                         
Covariance Type:                  HC1                                         
                  coef    std err          z      P>|z|      [0.025      0.975]
-------------------------------------------------------------------------------
const          -0.1297      0.002    -57.045      

**(2) FF3 + GMB_S + (Sector Indicators)GMB_S + Time Fixed Effects + SECTOR Fixed Effects**

In [18]:
T2bX2 = sm.add_constant(factordata[['Mkt-RF', 'SMB', 'HML', 'GMB_S', 
                          'IndS_Acc', 'IndS_Cons', 'IndS_Fin', 'IndS_HC', 'IndS_Info', 'IndS_Man', 
                          'IndS_Oil', 'IndS_RE', 'IndS_Serv', 'IndS_Transp', 'IndS_Util', 'IndS_Whol',
                          'year_2003', 'year_2004', 'year_2005', 'year_2006', 'year_2007', 
                          'year_2008', 'year_2009', 'year_2010', 'year_2011', 'year_2012', 
                          'year_2013', 'year_2014', 'year_2015', 'year_2016', 'year_2017', 
                          'year_2018', 'year_2019', 'year_2020', 'year_2021', 'year_2022', 'year_2023',
                          'sector_AccomFood', 'sector_Construction', 'sector_FinanceIns', 'sector_Healthcare', 
                          'sector_Information', 'sector_Manufacturing', 'sector_OilGas', 'sector_RealEstate', 
                          'sector_Services', 'sector_TransportWarehouse', 
                          'sector_Utilities', 'sector_Wholesale']])
T2by2 = factordata["ExcessReturn"]
T2bmodel2 = sm.OLS(T2by2, T2bX2).fit(cov_type='HC1')
print(T2bmodel2.summary())

                            OLS Regression Results                            
Dep. Variable:           ExcessReturn   R-squared:                       0.535
Model:                            OLS   Adj. R-squared:                  0.535
Method:                 Least Squares   F-statistic:                     7322.
Date:                Wed, 09 Apr 2025   Prob (F-statistic):               0.00
Time:                        22:57:29   Log-Likelihood:             1.5861e+05
No. Observations:              247693   AIC:                        -3.171e+05
Df Residuals:                  247643   BIC:                        -3.166e+05
Df Model:                          49                                         
Covariance Type:                  HC1                                         
                                coef    std err          z      P>|z|      [0.025      0.975]
---------------------------------------------------------------------------------------------
const                 

**(3) FF5 + GMB_S + (Sector Indicators)GMB_S + Time Fixed Effects**

In [19]:
T2bX3 = sm.add_constant(factordata[['Mkt-RF', 'SMB', 'HML', 'RMW', 'CMA', 'GMB_S', 
                          'IndS_Acc', 'IndS_Cons', 'IndS_Fin', 'IndS_HC', 'IndS_Info', 'IndS_Man', 
                          'IndS_Oil', 'IndS_RE', 'IndS_Serv', 'IndS_Transp', 'IndS_Util', 'IndS_Whol',  
                          'year_2003', 'year_2004', 'year_2005', 'year_2006', 'year_2007', 
                          'year_2008', 'year_2009', 'year_2010', 'year_2011', 'year_2012', 
                          'year_2013', 'year_2014', 'year_2015', 'year_2016', 'year_2017', 
                          'year_2018', 'year_2019', 'year_2020', 'year_2021', 'year_2022', 'year_2023']])
T2by3 = factordata["ExcessReturn"]
T2bmodel3 = sm.OLS(T2by3, T2bX3).fit(cov_type='HC1')
print(T2bmodel3.summary())

                            OLS Regression Results                            
Dep. Variable:           ExcessReturn   R-squared:                       0.537
Model:                            OLS   Adj. R-squared:                  0.537
Method:                 Least Squares   F-statistic:                     9179.
Date:                Wed, 09 Apr 2025   Prob (F-statistic):               0.00
Time:                        22:57:30   Log-Likelihood:             1.5913e+05
No. Observations:              247693   AIC:                        -3.182e+05
Df Residuals:                  247653   BIC:                        -3.178e+05
Df Model:                          39                                         
Covariance Type:                  HC1                                         
                  coef    std err          z      P>|z|      [0.025      0.975]
-------------------------------------------------------------------------------
const          -0.1209      0.002    -51.297      

**(4) FF5 + GMB_S + (Sector Indicators)GMB_U + Time Fixed Effects + SECTOR FIXED EFFECTS**

In [20]:
T2bX4 = sm.add_constant(factordata[['Mkt-RF', 'SMB', 'HML', 'RMW', 'CMA', 'GMB_S', 
                          'IndS_Acc', 'IndS_Cons', 'IndS_Fin', 'IndS_HC', 'IndS_Info', 'IndS_Man', 
                          'IndS_Oil', 'IndS_RE', 'IndS_Serv', 'IndS_Transp', 'IndS_Util', 'IndS_Whol',
                          'year_2003', 'year_2004', 'year_2005', 'year_2006', 'year_2007', 
                          'year_2008', 'year_2009', 'year_2010', 'year_2011', 'year_2012', 
                          'year_2013', 'year_2014', 'year_2015', 'year_2016', 'year_2017', 
                          'year_2018', 'year_2019', 'year_2020', 'year_2021', 'year_2022', 'year_2023',
                          'sector_AccomFood', 'sector_Construction', 'sector_FinanceIns', 'sector_Healthcare', 
                          'sector_Information', 'sector_Manufacturing', 'sector_OilGas', 'sector_RealEstate', 
                          'sector_Services', 'sector_TransportWarehouse', 
                          'sector_Utilities', 'sector_Wholesale']])
T2by4 = factordata["ExcessReturn"]
T2bmodel4 = sm.OLS(T2by4, T2bX4).fit(cov_type='HC1')
print(T2bmodel4.summary())

                            OLS Regression Results                            
Dep. Variable:           ExcessReturn   R-squared:                       0.537
Model:                            OLS   Adj. R-squared:                  0.537
Method:                 Least Squares   F-statistic:                     7062.
Date:                Wed, 09 Apr 2025   Prob (F-statistic):               0.00
Time:                        22:57:31   Log-Likelihood:             1.5915e+05
No. Observations:              247693   AIC:                        -3.182e+05
Df Residuals:                  247641   BIC:                        -3.177e+05
Df Model:                          51                                         
Covariance Type:                  HC1                                         
                                coef    std err          z      P>|z|      [0.025      0.975]
---------------------------------------------------------------------------------------------
const                 

In [21]:
# T2bstargazer = Stargazer([T2bmodel1, T2bmodel2, T2bmodel3, T2bmodel4])
# print(T2bstargazer.render_latex())

# Table 3: Estimated vs. Reported Emissions

**FF3 + GMB_U + (Method Indicator)GMB_U + Method Ind.**

In [22]:
T3X1 = sm.add_constant(factordata[['Mkt-RF', 'SMB', 'HML', 'Method_Estimated', 'GMB_U', 'IndGMB_U',
                                   'year_2003', 'year_2004', 'year_2005', 'year_2006', 'year_2007', 
                                   'year_2008', 'year_2009', 'year_2010', 'year_2011', 'year_2012', 
                                   'year_2013', 'year_2014', 'year_2015', 'year_2016', 'year_2017', 
                                   'year_2018', 'year_2019', 'year_2020', 'year_2021', 'year_2022', 'year_2023']])
T3y1 = factordata["ExcessReturn"]
T3model1 = sm.OLS(T3y1, T3X1).fit(cov_type='HC1')
print(T3model1.summary())

                            OLS Regression Results                            
Dep. Variable:           ExcessReturn   R-squared:                       0.535
Model:                            OLS   Adj. R-squared:                  0.535
Method:                 Least Squares   F-statistic:                 1.311e+04
Date:                Wed, 09 Apr 2025   Prob (F-statistic):               0.00
Time:                        22:57:32   Log-Likelihood:             1.5853e+05
No. Observations:              247693   AIC:                        -3.170e+05
Df Residuals:                  247665   BIC:                        -3.167e+05
Df Model:                          27                                         
Covariance Type:                  HC1                                         
                       coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------------
const               -0.1281      0.002  

**FF5 + GMB_U + (Method Indicator)GMB_U + Method Ind.**

In [23]:
T3X2 = sm.add_constant(factordata[['Mkt-RF', 'SMB', 'HML', 'RMW', 'CMA', 'Method_Estimated', 'GMB_U', 'IndGMB_U',
                                   'year_2003', 'year_2004', 'year_2005', 'year_2006', 'year_2007', 
                                   'year_2008', 'year_2009', 'year_2010', 'year_2011', 'year_2012', 
                                   'year_2013', 'year_2014', 'year_2015', 'year_2016', 'year_2017', 
                                   'year_2018', 'year_2019', 'year_2020', 'year_2021', 'year_2022', 'year_2023']])
T3y2 = factordata["ExcessReturn"]
T3model2 = sm.OLS(T3y2, T3X2).fit(cov_type='HC1')
print(T3model2.summary())

                            OLS Regression Results                            
Dep. Variable:           ExcessReturn   R-squared:                       0.537
Model:                            OLS   Adj. R-squared:                  0.537
Method:                 Least Squares   F-statistic:                 1.230e+04
Date:                Wed, 09 Apr 2025   Prob (F-statistic):               0.00
Time:                        22:57:33   Log-Likelihood:             1.5908e+05
No. Observations:              247693   AIC:                        -3.181e+05
Df Residuals:                  247663   BIC:                        -3.178e+05
Df Model:                          29                                         
Covariance Type:                  HC1                                         
                       coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------------
const               -0.1167      0.002  

**FF3 + GMB_S + (Method Indicator)GMB_S + Method Ind.**

In [24]:
T3X3 = sm.add_constant(factordata[['Mkt-RF', 'SMB', 'HML', 'Method_Estimated', 'GMB_S', 'IndGMB_S',
                                   'year_2003', 'year_2004', 'year_2005', 'year_2006', 'year_2007', 
                                   'year_2008', 'year_2009', 'year_2010', 'year_2011', 'year_2012', 
                                   'year_2013', 'year_2014', 'year_2015', 'year_2016', 'year_2017', 
                                   'year_2018', 'year_2019', 'year_2020', 'year_2021', 'year_2022', 'year_2023']])
T3y3 = factordata["ExcessReturn"]
T3model3 = sm.OLS(T3y3, T3X3).fit(cov_type='HC1')
print(T3model3.summary())

                            OLS Regression Results                            
Dep. Variable:           ExcessReturn   R-squared:                       0.533
Model:                            OLS   Adj. R-squared:                  0.533
Method:                 Least Squares   F-statistic:                 1.311e+04
Date:                Wed, 09 Apr 2025   Prob (F-statistic):               0.00
Time:                        22:57:34   Log-Likelihood:             1.5807e+05
No. Observations:              247693   AIC:                        -3.161e+05
Df Residuals:                  247665   BIC:                        -3.158e+05
Df Model:                          27                                         
Covariance Type:                  HC1                                         
                       coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------------
const               -0.1303      0.002  

**FF3 + GMB_S + (Method Indicator)GMB_S + Method Ind.**

In [25]:
T3X4 = sm.add_constant(factordata[['Mkt-RF', 'SMB', 'HML', 'RMW', 'CMA', 'Method_Estimated', 'GMB_S', 'IndGMB_S',
                                   'year_2003', 'year_2004', 'year_2005', 'year_2006', 'year_2007', 
                                   'year_2008', 'year_2009', 'year_2010', 'year_2011', 'year_2012', 
                                   'year_2013', 'year_2014', 'year_2015', 'year_2016', 'year_2017', 
                                   'year_2018', 'year_2019', 'year_2020', 'year_2021', 'year_2022', 'year_2023']])
T3y4 = factordata["ExcessReturn"]
T3model4 = sm.OLS(T3y4, T3X4).fit(cov_type='HC1')
print(T3model4.summary())

                            OLS Regression Results                            
Dep. Variable:           ExcessReturn   R-squared:                       0.535
Model:                            OLS   Adj. R-squared:                  0.535
Method:                 Least Squares   F-statistic:                 1.225e+04
Date:                Wed, 09 Apr 2025   Prob (F-statistic):               0.00
Time:                        22:57:34   Log-Likelihood:             1.5863e+05
No. Observations:              247693   AIC:                        -3.172e+05
Df Residuals:                  247663   BIC:                        -3.169e+05
Df Model:                          29                                         
Covariance Type:                  HC1                                         
                       coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------------
const               -0.1214      0.002  

In [26]:
# T3stargazer = Stargazer([T3model1, T3model2, T3model3, T3model4])
# print(T3stargazer.render_latex())