In [68]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt
from linearmodels import PanelOLS
from scipy.stats.mstats import winsorize 
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso

In [69]:
regression_data = pd.read_csv('merged_data_sss.csv')

In [71]:
regression_data['gvkey'].nunique()

1423

In [72]:
# check
regression_data = regression_data.drop('Unnamed: 0', axis=1)
regression_data.describe()

Unnamed: 0,gvkey,fyear,month,RET,MOM,BETA,VOLAT,total_emission_scope1,total_emission_scope2,emission_growth_scope1,...,carbon_intensity_scope2,LOGSIZE,B/M,LEVERAGE,INVEST/A,ROE,LOGPPE,SALESGR,EPSGR,RET1
count,81605.0,81605.0,81605.0,81605.0,81605.0,81605.0,81605.0,81605.0,81605.0,81605.0,...,81605.0,81605.0,81605.0,81605.0,81605.0,81605.0,81605.0,81605.0,81605.0,81605.0
mean,61120.654923,2017.409767,6.522738,1.285111,0.120438,1.195033,0.103849,10.411239,10.627489,0.053134,...,0.314301,8.235747,0.577298,0.27588,0.035363,7.217507,6.841127,0.011243,0.004939,17.425595
std,69006.711553,2.158369,3.451686,12.473213,0.432692,0.727854,0.057046,2.689473,2.067171,0.296586,...,0.38109,1.762027,0.530395,0.215301,0.033117,33.035337,2.093872,0.328364,0.227102,53.736741
min,1004.0,2011.0,1.0,-87.697842,-0.832704,-1.04456,0.025872,0.693147,2.544276,-0.587399,...,0.008185,-4.612195,-0.040954,0.0,0.0,-109.224422,-2.465104,-2.260408,-1.598919,-95.067633
25%,10860.0,2016.0,4.0,-4.819277,-0.12458,0.757661,0.063082,8.641213,9.258057,-0.074867,...,0.085381,7.070696,0.230611,0.088988,0.012404,0.751054,5.498769,-0.015899,-0.022173,-9.791332
50%,24783.0,2018.0,7.0,1.053142,0.082349,1.11952,0.090176,10.219274,10.620766,0.025733,...,0.176111,8.231717,0.444195,0.251456,0.025776,10.30303,6.880971,0.02143,0.003362,11.590107
75%,133869.0,2019.0,10.0,6.870229,0.297905,1.546813,0.129241,11.925332,12.018952,0.137206,...,0.377914,9.407562,0.758144,0.412147,0.047656,19.134897,8.165563,0.073998,0.027182,34.970843
max,328795.0,2020.0,12.0,99.191686,3.25,4.147888,0.35674,18.713283,16.571679,1.124937,...,1.780249,14.627616,2.663034,0.814584,0.163615,91.432361,13.144907,1.647431,1.808117,1042.570281


In [73]:
regression_data.columns

Index(['gvkey', 'fyear', 'month', 'RET', 'MOM', 'BETA', 'VOLAT',
       'total_emission_scope1', 'total_emission_scope2',
       'emission_growth_scope1', 'emission_growth_scope2',
       'carbon_intensity_scope1', 'carbon_intensity_scope2', 'LOGSIZE', 'B/M',
       'LEVERAGE', 'INVEST/A', 'ROE', 'LOGPPE', 'SALESGR', 'EPSGR', 'RET1',
       'datatime', 'GICS_level_1'],
      dtype='object')

In [74]:
regression_data = regression_data.sort_values(by=['gvkey', 'fyear', 'month'])

columns_to_shift = ['RET', 'MOM', 'BETA', 'VOLAT',
       'total_emission_scope1', 'total_emission_scope2',
       'emission_growth_scope1', 'emission_growth_scope2',
       'carbon_intensity_scope1', 'carbon_intensity_scope2', 'LOGSIZE', 'B/M',
       'LEVERAGE', 'INVEST/A', 'ROE', 'LOGPPE', 'SALESGR', 'EPSGR', 'RET1']

def shift_if_consecutive(group):
    group = group.sort_values(by='datatime')
    
    group['datatime'] = pd.to_datetime(group['datatime'])
    
    # check if data is consecutive on year-month
    group['consecutive'] = group['datatime'].diff().dt.days <= 31
    
    for col in columns_to_shift:
        group[f'{col}_t-1'] = group[col].shift(1)
        group[f'{col}_t-1'] = group.apply(lambda row: row[col] if not row['consecutive'] else row[f'{col}_t-1'], axis=1)
    
    return group.drop(columns=['consecutive', 'datatime'])

# apply to each gvkey group
regression_data_shifted = regression_data.groupby('gvkey').apply(shift_if_consecutive)
regression_data_shifted.columns = [col.replace('/', '_') for col in regression_data_shifted.columns]

print(regression_data_shifted.head())

         gvkey  fyear  month       RET       MOM      BETA     VOLAT  \
gvkey                                                                  
1004  0   1004   2017      1 -1.815431  0.433590  1.728236  0.106410   
      1   1004   2017      2  3.790447  0.695722  1.697156  0.106333   
      2   1004   2017      3 -0.148456  0.499601  1.585366  0.106969   
      3   1004   2017      4  7.017544  0.432948  1.536825  0.107289   
      4   1004   2017      5 -3.834398  0.448269  1.553771  0.109533   

         total_emission_scope1  total_emission_scope2  emission_growth_scope1  \
gvkey                                                                           
1004  0              10.991192              10.418976               -0.004909   
      1              10.991192              10.418976               -0.004909   
      2              10.991192              10.418976               -0.004909   
      3              10.991192              10.418976               -0.004909   
      4  

  regression_data_shifted = regression_data.groupby('gvkey').apply(shift_if_consecutive)


In [75]:
regression_data_shifted = regression_data_shifted.replace([np.inf, -np.inf], np.nan)

In [76]:
regression_data_shifted.columns

Index(['gvkey', 'fyear', 'month', 'RET', 'MOM', 'BETA', 'VOLAT',
       'total_emission_scope1', 'total_emission_scope2',
       'emission_growth_scope1', 'emission_growth_scope2',
       'carbon_intensity_scope1', 'carbon_intensity_scope2', 'LOGSIZE', 'B_M',
       'LEVERAGE', 'INVEST_A', 'ROE', 'LOGPPE', 'SALESGR', 'EPSGR', 'RET1',
       'GICS_level_1', 'RET_t-1', 'MOM_t-1', 'BETA_t-1', 'VOLAT_t-1',
       'total_emission_scope1_t-1', 'total_emission_scope2_t-1',
       'emission_growth_scope1_t-1', 'emission_growth_scope2_t-1',
       'carbon_intensity_scope1_t-1', 'carbon_intensity_scope2_t-1',
       'LOGSIZE_t-1', 'B_M_t-1', 'LEVERAGE_t-1', 'INVEST_A_t-1', 'ROE_t-1',
       'LOGPPE_t-1', 'SALESGR_t-1', 'EPSGR_t-1', 'RET1_t-1'],
      dtype='object')

### Regression on Log(Emission Scope1)

In [77]:
shifted_columns1 = ['RET', 'RET1', 'total_emission_scope1', 'LOGSIZE_t-1', 'B_M_t-1',
                     'ROE_t-1', 'LEVERAGE_t-1', 'INVEST_A_t-1', 'LOGPPE_t-1', 
                     'MOM_t-1', 'BETA_t-1', 'VOLAT_t-1', 'SALESGR', 'EPSGR']
reg_data1 = regression_data_shifted.dropna(subset=shifted_columns1)

Y1 = reg_data1['RET']
X1 = reg_data1[['total_emission_scope1', 'LOGSIZE_t-1', 'B_M_t-1', 'ROE_t-1', 
                'LEVERAGE_t-1', 'INVEST_A_t-1', 'LOGPPE_t-1', 'MOM_t-1', 'BETA_t-1', 
                'VOLAT_t-1', 'SALESGR_t-1', 'EPSGR_t-1']]

X1 = sm.add_constant(X1)

model1 = sm.OLS(Y1, X1)

results1 = model1.fit(cov_type='cluster', cov_kwds={'groups': reg_data1[['gvkey']].values})

print(results1.summary())

                            OLS Regression Results                            
Dep. Variable:                    RET   R-squared:                       0.022
Model:                            OLS   Adj. R-squared:                  0.022
Method:                 Least Squares   F-statistic:                     48.63
Date:                Mon, 24 Jun 2024   Prob (F-statistic):           2.14e-97
Time:                        02:29:39   Log-Likelihood:            -3.2080e+05
No. Observations:               81605   AIC:                         6.416e+05
Df Residuals:                   81592   BIC:                         6.418e+05
Df Model:                          12                                         
Covariance Type:              cluster                                         
                            coef    std err          z      P>|z|      [0.025      0.975]
-----------------------------------------------------------------------------------------
const                    -7.23

In [78]:
coeffs = results1.params
p_vals = results1.pvalues
print(p_vals)

const                    3.966727e-54
total_emission_scope1    7.385100e-02
LOGSIZE_t-1              3.040090e-62
B_M_t-1                  9.470348e-01
ROE_t-1                  1.293784e-10
LEVERAGE_t-1             6.122237e-01
INVEST_A_t-1             1.678545e-02
LOGPPE_t-1               1.101411e-21
MOM_t-1                  3.550799e-24
BETA_t-1                 8.592897e-01
VOLAT_t-1                2.407262e-76
SALESGR_t-1              8.555477e-05
EPSGR_t-1                1.821797e-11
dtype: float64


In [100]:
Y2 = reg_data1['RET']
X2 = reg_data1[['total_emission_scope1', 'LOGSIZE_t-1', 'B_M_t-1', 'ROE_t-1', 
                'LEVERAGE_t-1', 'INVEST_A_t-1', 'LOGPPE_t-1', 'MOM_t-1', 'BETA_t-1', 
                'VOLAT_t-1', 'SALESGR_t-1', 'EPSGR_t-1']]

############# year fixed effect ##############
year_dummies = pd.get_dummies(reg_data1['fyear'], prefix='year')
year_dummies = year_dummies.iloc[:, 1:]
year_dummies = year_dummies.astype(int)

############# month fixed effect ##############
month_dummies = pd.get_dummies(reg_data1['month'], prefix='month')
month_dummies = month_dummies.iloc[:, 1:]
month_dummies = month_dummies.astype(int)

X2 = pd.concat([X2, year_dummies], axis=1)
X2 = pd.concat([X2, month_dummies], axis=1)

X2 = sm.add_constant(X2)

model2 = sm.OLS(Y2, X2)

results2 = model2.fit()

print(results2.summary())

                            OLS Regression Results                            
Dep. Variable:                   RET1   R-squared:                       0.290
Model:                            OLS   Adj. R-squared:                  0.289
Method:                 Least Squares   F-statistic:                     1039.
Date:                Mon, 24 Jun 2024   Prob (F-statistic):               0.00
Time:                        02:31:35   Log-Likelihood:            -4.2696e+05
No. Observations:               81605   AIC:                         8.540e+05
Df Residuals:                   81572   BIC:                         8.543e+05
Df Model:                          32                                         
Covariance Type:            nonrobust                                         
                            coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------
const                   -54.23

In [101]:
Y3 = reg_data1['RET']
X3 = reg_data1[['total_emission_scope1', 'LOGSIZE_t-1', 'B_M_t-1', 'ROE_t-1', 
                'LEVERAGE_t-1', 'INVEST_A_t-1', 'LOGPPE_t-1', 'MOM_t-1', 'BETA_t-1', 
                'VOLAT_t-1', 'SALESGR_t-1', 'EPSGR_t-1']]

############# year fixed effect ##############
year_dummies = pd.get_dummies(reg_data1['fyear'], prefix='year')
year_dummies = year_dummies.iloc[:, 1:]
year_dummies = year_dummies.astype(int)

############# month fixed effect ##############
month_dummies = pd.get_dummies(reg_data1['month'], prefix='month')
month_dummies = month_dummies.iloc[:, 1:]
month_dummies = month_dummies.astype(int)

############# industry fixed effect ##############
industry_dummies = pd.get_dummies(reg_data1['GICS_level_1'], prefix='industry')
industry_dummies = industry_dummies.iloc[:, 1:]
industry_dummies = industry_dummies.astype(int)

X3 = pd.concat([X3, year_dummies], axis=1)
X3 = pd.concat([X3, month_dummies], axis=1)
X3 = pd.concat([X3, industry_dummies], axis=1)

X3 = sm.add_constant(X3)

model3 = sm.OLS(Y3, X3)

results3 = model3.fit()

print(results3.summary())

                            OLS Regression Results                            
Dep. Variable:                   RET1   R-squared:                       0.294
Model:                            OLS   Adj. R-squared:                  0.294
Method:                 Least Squares   F-statistic:                     809.3
Date:                Mon, 24 Jun 2024   Prob (F-statistic):               0.00
Time:                        02:31:37   Log-Likelihood:            -4.2670e+05
No. Observations:               81605   AIC:                         8.535e+05
Df Residuals:                   81562   BIC:                         8.539e+05
Df Model:                          42                                         
Covariance Type:            nonrobust                                         
                                      coef    std err          t      P>|t|      [0.025      0.975]
---------------------------------------------------------------------------------------------------
const     

### Regression on Log(Emission Scope2)

In [108]:
shifted_columns2 = ['RET', 'RET1', 'total_emission_scope2', 'LOGSIZE_t-1', 'B_M_t-1',
                     'ROE_t-1', 'LEVERAGE_t-1', 'INVEST_A_t-1', 'LOGPPE_t-1', 
                     'MOM_t-1', 'BETA_t-1', 'VOLAT_t-1', 'SALESGR_t-1', 'EPSGR_t-1']
reg_data2 = regression_data_shifted.dropna(subset=shifted_columns2)

Y4 = reg_data2['RET']
X4 = reg_data2[['total_emission_scope2', 'LOGSIZE_t-1', 'B_M_t-1', 'ROE_t-1', 
                'LEVERAGE_t-1', 'INVEST_A_t-1', 'LOGPPE_t-1', 'MOM_t-1', 'BETA_t-1', 
                'VOLAT_t-1', 'SALESGR', 'EPSGR']]

X4 = sm.add_constant(X4)

model4 = sm.OLS(Y4, X4)

results4 = model4.fit()

print(results4.summary())

                            OLS Regression Results                            
Dep. Variable:                   RET1   R-squared:                       0.229
Model:                            OLS   Adj. R-squared:                  0.229
Method:                 Least Squares   F-statistic:                     2022.
Date:                Mon, 24 Jun 2024   Prob (F-statistic):               0.00
Time:                        02:36:12   Log-Likelihood:            -4.3029e+05
No. Observations:               81605   AIC:                         8.606e+05
Df Residuals:                   81592   BIC:                         8.607e+05
Df Model:                          12                                         
Covariance Type:            nonrobust                                         
                            coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------
const                   -46.10

In [106]:
Y5 = reg_data2['RET']
X5 = reg_data2[['total_emission_scope2', 'LOGSIZE_t-1', 'B_M_t-1', 'ROE_t-1', 
                'LEVERAGE_t-1', 'INVEST_A_t-1', 'LOGPPE_t-1', 'MOM_t-1', 'BETA_t-1', 
                'VOLAT_t-1', 'SALESGR_t-1', 'EPSGR_t-1']]

############# year fixed effect ##############
year_dummies = pd.get_dummies(reg_data2['fyear'], prefix='year')
year_dummies = year_dummies.iloc[:, 1:]
year_dummies = year_dummies.astype(int)

############# month fixed effect ##############
month_dummies = pd.get_dummies(reg_data2['month'], prefix='month')
month_dummies = month_dummies.iloc[:, 1:]
month_dummies = month_dummies.astype(int)

X5 = pd.concat([X5, year_dummies], axis=1)
X5 = pd.concat([X5, month_dummies], axis=1)

X5 = sm.add_constant(X5)

model5 = sm.OLS(Y5, X5)

results5 = model5.fit()

print(results5.summary())

                            OLS Regression Results                            
Dep. Variable:                   RET1   R-squared:                       0.290
Model:                            OLS   Adj. R-squared:                  0.289
Method:                 Least Squares   F-statistic:                     1039.
Date:                Mon, 24 Jun 2024   Prob (F-statistic):               0.00
Time:                        02:35:59   Log-Likelihood:            -4.2696e+05
No. Observations:               81605   AIC:                         8.540e+05
Df Residuals:                   81572   BIC:                         8.543e+05
Df Model:                          32                                         
Covariance Type:            nonrobust                                         
                            coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------
const                   -47.25

In [107]:
Y6 = reg_data2['RET']
X6 = reg_data2[['total_emission_scope2', 'LOGSIZE_t-1', 'B_M_t-1', 'ROE_t-1', 
                'LEVERAGE_t-1', 'INVEST_A_t-1', 'LOGPPE_t-1', 'MOM_t-1', 'BETA_t-1', 
                'VOLAT_t-1', 'SALESGR_t-1', 'EPSGR_t-1']]

############# year fixed effect ##############
year_dummies = pd.get_dummies(reg_data2['fyear'], prefix='year')
year_dummies = year_dummies.iloc[:, 1:]
year_dummies = year_dummies.astype(int)

############# month fixed effect ##############
month_dummies = pd.get_dummies(reg_data2['month'], prefix='month')
month_dummies = month_dummies.iloc[:, 1:]
month_dummies = month_dummies.astype(int)

############# industry fixed effect ##############
industry_dummies = pd.get_dummies(reg_data2['GICS_level_1'], prefix='industry')
industry_dummies = industry_dummies.iloc[:, 1:]
industry_dummies = industry_dummies.astype(int)

X6 = pd.concat([X6, year_dummies], axis=1)
X6 = pd.concat([X6, month_dummies], axis=1)
X6 = pd.concat([X6, industry_dummies], axis=1)

X6 = sm.add_constant(X6)

model6 = sm.OLS(Y6, X6)

results6 = model6.fit()

print(results6.summary())

                            OLS Regression Results                            
Dep. Variable:                   RET1   R-squared:                       0.294
Model:                            OLS   Adj. R-squared:                  0.294
Method:                 Least Squares   F-statistic:                     810.3
Date:                Mon, 24 Jun 2024   Prob (F-statistic):               0.00
Time:                        02:36:04   Log-Likelihood:            -4.2669e+05
No. Observations:               81605   AIC:                         8.535e+05
Df Residuals:                   81562   BIC:                         8.539e+05
Df Model:                          42                                         
Covariance Type:            nonrobust                                         
                                      coef    std err          t      P>|t|      [0.025      0.975]
---------------------------------------------------------------------------------------------------
const     

### Regression on Emission Growth Scope1

In [110]:
shifted_columns1 = ['RET', 'RET1', 'emission_growth_scope1', 'LOGSIZE_t-1', 'B_M_t-1',
                     'ROE_t-1', 'LEVERAGE_t-1', 'INVEST_A_t-1', 'LOGPPE_t-1', 
                     'MOM_t-1', 'BETA_t-1', 'VOLAT_t-1', 'SALESGR', 'EPSGR']
reg_data1 = regression_data_shifted.dropna(subset=shifted_columns1)

Y1 = reg_data1['RET']
X1 = reg_data1[['emission_growth_scope1', 'LOGSIZE_t-1', 'B_M_t-1', 'ROE_t-1', 
                'LEVERAGE_t-1', 'INVEST_A_t-1', 'LOGPPE_t-1', 'MOM_t-1', 'BETA_t-1', 
                'VOLAT_t-1', 'SALESGR_t-1', 'EPSGR_t-1']]

X1 = sm.add_constant(X1)

model1 = sm.OLS(Y1, X1)

results1 = model1.fit()

print(results1.summary())

                            OLS Regression Results                            
Dep. Variable:                    RET   R-squared:                       0.022
Model:                            OLS   Adj. R-squared:                  0.022
Method:                 Least Squares   F-statistic:                     155.9
Date:                Mon, 24 Jun 2024   Prob (F-statistic):               0.00
Time:                        02:51:33   Log-Likelihood:            -3.2080e+05
No. Observations:               81605   AIC:                         6.416e+05
Df Residuals:                   81592   BIC:                         6.418e+05
Df Model:                          12                                         
Covariance Type:            nonrobust                                         
                             coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------------
const                     -6

In [85]:
############# year fixed effect ##############
year_dummies = pd.get_dummies(reg_data1['fyear'], prefix='year')
year_dummies = year_dummies.iloc[:, 1:]
year_dummies = year_dummies.astype(int)

############# month fixed effect ##############
month_dummies = pd.get_dummies(reg_data1['month'], prefix='month')
month_dummies = month_dummies.iloc[:, 1:]
month_dummies = month_dummies.astype(int)

############# industry fixed effect ##############
industry_dummies = pd.get_dummies(reg_data1['GICS_level_1'], prefix='industry')
industry_dummies = industry_dummies.iloc[:, 1:]
industry_dummies = industry_dummies.astype(int)

In [86]:
Y2 = reg_data1['RET']
X2 = reg_data1[['emission_growth_scope1', 'LOGSIZE_t-1', 'B_M_t-1', 'ROE_t-1', 
                'LEVERAGE_t-1', 'INVEST_A_t-1', 'LOGPPE_t-1', 'MOM_t-1', 'BETA_t-1', 
                'VOLAT_t-1', 'SALESGR_t-1', 'EPSGR_t-1']]


X2 = pd.concat([X2, year_dummies], axis=1)
X2 = pd.concat([X2, month_dummies], axis=1)

X2 = sm.add_constant(X2)

model2 = sm.OLS(Y2, X2)

results2 = model2.fit()

print(results2.summary())

                            OLS Regression Results                            
Dep. Variable:                    RET   R-squared:                       0.067
Model:                            OLS   Adj. R-squared:                  0.067
Method:                 Least Squares   F-statistic:                     183.6
Date:                Mon, 24 Jun 2024   Prob (F-statistic):               0.00
Time:                        02:29:41   Log-Likelihood:            -3.1889e+05
No. Observations:               81605   AIC:                         6.378e+05
Df Residuals:                   81572   BIC:                         6.382e+05
Df Model:                          32                                         
Covariance Type:            nonrobust                                         
                             coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------------
const                     -4

In [87]:
Y3 = reg_data1['RET']
X3 = reg_data1[['emission_growth_scope1', 'LOGSIZE_t-1', 'B_M_t-1', 'ROE_t-1', 
                'LEVERAGE_t-1', 'INVEST_A_t-1', 'LOGPPE_t-1', 'MOM_t-1', 'BETA_t-1', 
                'VOLAT_t-1', 'SALESGR_t-1', 'EPSGR_t-1']]

X3 = pd.concat([X3, year_dummies], axis=1)
X3 = pd.concat([X3, month_dummies], axis=1)
X3 = pd.concat([X3, industry_dummies], axis=1)

X3 = sm.add_constant(X3)

model3 = sm.OLS(Y3, X3)

results3 = model3.fit()

print(results3.summary())

                            OLS Regression Results                            
Dep. Variable:                    RET   R-squared:                       0.069
Model:                            OLS   Adj. R-squared:                  0.068
Method:                 Least Squares   F-statistic:                     143.6
Date:                Mon, 24 Jun 2024   Prob (F-statistic):               0.00
Time:                        02:29:41   Log-Likelihood:            -3.1882e+05
No. Observations:               81605   AIC:                         6.377e+05
Df Residuals:                   81562   BIC:                         6.381e+05
Df Model:                          42                                         
Covariance Type:            nonrobust                                         
                                      coef    std err          t      P>|t|      [0.025      0.975]
---------------------------------------------------------------------------------------------------
const     

### Regression on Emission Growth Scope2

In [88]:
shifted_columns2 = ['RET', 'RET1', 'emission_growth_scope2', 'LOGSIZE_t-1', 'B_M_t-1',
                     'ROE_t-1', 'LEVERAGE_t-1', 'INVEST_A_t-1', 'LOGPPE_t-1', 
                     'MOM_t-1', 'BETA_t-1', 'VOLAT_t-1', 'SALESGR_t-1', 'EPSGR_t-1']
reg_data2 = regression_data_shifted.dropna(subset=shifted_columns2)

Y4 = reg_data2['RET']
X4 = reg_data2[['emission_growth_scope2', 'LOGSIZE_t-1', 'B_M_t-1', 'ROE_t-1', 
                'LEVERAGE_t-1', 'INVEST_A_t-1', 'LOGPPE_t-1', 'MOM_t-1', 'BETA_t-1', 
                'VOLAT_t-1', 'SALESGR_t-1', 'EPSGR_t-1']]

X4 = sm.add_constant(X4)

model4 = sm.OLS(Y4, X4)

results4 = model4.fit()

print(results4.summary())

                            OLS Regression Results                            
Dep. Variable:                    RET   R-squared:                       0.022
Model:                            OLS   Adj. R-squared:                  0.022
Method:                 Least Squares   F-statistic:                     155.9
Date:                Mon, 24 Jun 2024   Prob (F-statistic):               0.00
Time:                        02:29:41   Log-Likelihood:            -3.2080e+05
No. Observations:               81605   AIC:                         6.416e+05
Df Residuals:                   81592   BIC:                         6.418e+05
Df Model:                          12                                         
Covariance Type:            nonrobust                                         
                             coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------------
const                     -6

In [89]:
############# year fixed effect ##############
year_dummies = pd.get_dummies(reg_data2['fyear'], prefix='year')
year_dummies = year_dummies.iloc[:, 1:]
year_dummies = year_dummies.astype(int)

############# month fixed effect ##############
month_dummies = pd.get_dummies(reg_data2['month'], prefix='month')
month_dummies = month_dummies.iloc[:, 1:]
month_dummies = month_dummies.astype(int)

############# industry fixed effect ##############
industry_dummies = pd.get_dummies(reg_data2['GICS_level_1'], prefix='industry')
industry_dummies = industry_dummies.iloc[:, 1:]
industry_dummies = industry_dummies.astype(int)

In [90]:
Y5 = reg_data2['RET']
X5 = reg_data2[['emission_growth_scope2', 'LOGSIZE_t-1', 'B_M_t-1', 'ROE_t-1', 
                'LEVERAGE_t-1', 'INVEST_A_t-1', 'LOGPPE_t-1', 'MOM_t-1', 'BETA_t-1', 
                'VOLAT_t-1', 'SALESGR_t-1', 'EPSGR_t-1']]

X5 = pd.concat([X5, year_dummies], axis=1)
X5 = pd.concat([X5, month_dummies], axis=1)

X5 = sm.add_constant(X5)

model5 = sm.OLS(Y5, X5)

results5 = model5.fit()

print(results5.summary())

                            OLS Regression Results                            
Dep. Variable:                    RET   R-squared:                       0.067
Model:                            OLS   Adj. R-squared:                  0.067
Method:                 Least Squares   F-statistic:                     183.6
Date:                Mon, 24 Jun 2024   Prob (F-statistic):               0.00
Time:                        02:29:41   Log-Likelihood:            -3.1889e+05
No. Observations:               81605   AIC:                         6.378e+05
Df Residuals:                   81572   BIC:                         6.382e+05
Df Model:                          32                                         
Covariance Type:            nonrobust                                         
                             coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------------
const                     -4

In [91]:
Y6 = reg_data2['RET']
X6 = reg_data2[['emission_growth_scope2', 'LOGSIZE_t-1', 'B_M_t-1', 'ROE_t-1', 
                'LEVERAGE_t-1', 'INVEST_A_t-1', 'LOGPPE_t-1', 'MOM_t-1', 'BETA_t-1', 
                'VOLAT_t-1', 'SALESGR_t-1', 'EPSGR_t-1']]

X6 = pd.concat([X6, year_dummies], axis=1)
X6 = pd.concat([X6, month_dummies], axis=1)
X6 = pd.concat([X6, industry_dummies], axis=1)

X6 = sm.add_constant(X6)

model6 = sm.OLS(Y6, X6)

results6 = model6.fit()

print(results6.summary())

                            OLS Regression Results                            
Dep. Variable:                    RET   R-squared:                       0.069
Model:                            OLS   Adj. R-squared:                  0.068
Method:                 Least Squares   F-statistic:                     143.6
Date:                Mon, 24 Jun 2024   Prob (F-statistic):               0.00
Time:                        02:29:42   Log-Likelihood:            -3.1882e+05
No. Observations:               81605   AIC:                         6.377e+05
Df Residuals:                   81562   BIC:                         6.381e+05
Df Model:                          42                                         
Covariance Type:            nonrobust                                         
                                      coef    std err          t      P>|t|      [0.025      0.975]
---------------------------------------------------------------------------------------------------
const     

### Regression on Carbon Intensity Scope1

In [92]:
shifted_columns1 = ['RET', 'RET1', 'carbon_intensity_scope1', 'LOGSIZE_t-1', 'B_M_t-1',
                     'ROE_t-1', 'LEVERAGE_t-1', 'INVEST_A_t-1', 'LOGPPE_t-1', 
                     'MOM_t-1', 'BETA_t-1', 'VOLAT_t-1', 'SALESGR', 'EPSGR']
reg_data1 = regression_data_shifted.dropna(subset=shifted_columns1)

Y1 = reg_data1['RET']
X1 = reg_data1[['carbon_intensity_scope1', 'LOGSIZE_t-1', 'B_M_t-1', 'ROE_t-1', 
                'LEVERAGE_t-1', 'INVEST_A_t-1', 'LOGPPE_t-1', 'MOM_t-1', 'BETA_t-1', 
                'VOLAT_t-1', 'SALESGR_t-1', 'EPSGR_t-1']]

X1 = sm.add_constant(X1)

model1 = sm.OLS(Y1, X1)

results1 = model1.fit()

print(results1.summary())

                            OLS Regression Results                            
Dep. Variable:                    RET   R-squared:                       0.023
Model:                            OLS   Adj. R-squared:                  0.022
Method:                 Least Squares   F-statistic:                     156.5
Date:                Mon, 24 Jun 2024   Prob (F-statistic):               0.00
Time:                        02:29:42   Log-Likelihood:            -3.2080e+05
No. Observations:               81605   AIC:                         6.416e+05
Df Residuals:                   81592   BIC:                         6.417e+05
Df Model:                          12                                         
Covariance Type:            nonrobust                                         
                              coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------------------
const                     

In [93]:
############# year fixed effect ##############
year_dummies = pd.get_dummies(reg_data1['fyear'], prefix='year')
year_dummies = year_dummies.iloc[:, 1:]
year_dummies = year_dummies.astype(int)

############# month fixed effect ##############
month_dummies = pd.get_dummies(reg_data1['month'], prefix='month')
month_dummies = month_dummies.iloc[:, 1:]
month_dummies = month_dummies.astype(int)

############# industry fixed effect ##############
industry_dummies = pd.get_dummies(reg_data1['GICS_level_1'], prefix='industry')
industry_dummies = industry_dummies.iloc[:, 1:]
industry_dummies = industry_dummies.astype(int)

In [94]:
Y2 = reg_data1['RET']
X2 = reg_data1[['carbon_intensity_scope1', 'LOGSIZE_t-1', 'B_M_t-1', 'ROE_t-1', 
                'LEVERAGE_t-1', 'INVEST_A_t-1', 'LOGPPE_t-1', 'MOM_t-1', 'BETA_t-1', 
                'VOLAT_t-1', 'SALESGR_t-1', 'EPSGR_t-1']]


X2 = pd.concat([X2, year_dummies], axis=1)
X2 = pd.concat([X2, month_dummies], axis=1)

X2 = sm.add_constant(X2)

model2 = sm.OLS(Y2, X2)

results2 = model2.fit()

print(results2.summary())

                            OLS Regression Results                            
Dep. Variable:                    RET   R-squared:                       0.067
Model:                            OLS   Adj. R-squared:                  0.067
Method:                 Least Squares   F-statistic:                     183.8
Date:                Mon, 24 Jun 2024   Prob (F-statistic):               0.00
Time:                        02:29:42   Log-Likelihood:            -3.1889e+05
No. Observations:               81605   AIC:                         6.378e+05
Df Residuals:                   81572   BIC:                         6.381e+05
Df Model:                          32                                         
Covariance Type:            nonrobust                                         
                              coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------------------
const                     

In [95]:
Y3 = reg_data1['RET']
X3 = reg_data1[['carbon_intensity_scope1', 'LOGSIZE_t-1', 'B_M_t-1', 'ROE_t-1', 
                'LEVERAGE_t-1', 'INVEST_A_t-1', 'LOGPPE_t-1', 'MOM_t-1', 'BETA_t-1', 
                'VOLAT_t-1', 'SALESGR_t-1', 'EPSGR_t-1']]

X3 = pd.concat([X3, year_dummies], axis=1)
X3 = pd.concat([X3, month_dummies], axis=1)
X3 = pd.concat([X3, industry_dummies], axis=1)

X3 = sm.add_constant(X3)

model3 = sm.OLS(Y3, X3)

results3 = model3.fit()

print(results3.summary())

                            OLS Regression Results                            
Dep. Variable:                    RET   R-squared:                       0.069
Model:                            OLS   Adj. R-squared:                  0.068
Method:                 Least Squares   F-statistic:                     143.6
Date:                Mon, 24 Jun 2024   Prob (F-statistic):               0.00
Time:                        02:29:42   Log-Likelihood:            -3.1882e+05
No. Observations:               81605   AIC:                         6.377e+05
Df Residuals:                   81562   BIC:                         6.381e+05
Df Model:                          42                                         
Covariance Type:            nonrobust                                         
                                      coef    std err          t      P>|t|      [0.025      0.975]
---------------------------------------------------------------------------------------------------
const     

### Regression on Carbon Intensity Scope2

In [96]:
shifted_columns2 = ['RET', 'RET1', 'carbon_intensity_scope1', 'LOGSIZE_t-1', 'B_M_t-1',
                     'ROE_t-1', 'LEVERAGE_t-1', 'INVEST_A_t-1', 'LOGPPE_t-1', 
                     'MOM_t-1', 'BETA_t-1', 'VOLAT_t-1', 'SALESGR', 'EPSGR']
reg_data2 = regression_data_shifted.dropna(subset=shifted_columns2)

Y4 = reg_data2['RET']
X4 = reg_data2[['carbon_intensity_scope2', 'LOGSIZE_t-1', 'B_M_t-1', 'ROE_t-1', 
                'LEVERAGE_t-1', 'INVEST_A_t-1', 'LOGPPE_t-1', 'MOM_t-1', 'BETA_t-1', 
                'VOLAT_t-1', 'SALESGR_t-1', 'EPSGR_t-1']]

X4 = sm.add_constant(X4)

model4 = sm.OLS(Y4, X4)

results4 = model4.fit()

print(results4.summary())

                            OLS Regression Results                            
Dep. Variable:                    RET   R-squared:                       0.022
Model:                            OLS   Adj. R-squared:                  0.022
Method:                 Least Squares   F-statistic:                     156.0
Date:                Mon, 24 Jun 2024   Prob (F-statistic):               0.00
Time:                        02:29:42   Log-Likelihood:            -3.2080e+05
No. Observations:               81605   AIC:                         6.416e+05
Df Residuals:                   81592   BIC:                         6.418e+05
Df Model:                          12                                         
Covariance Type:            nonrobust                                         
                              coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------------------
const                     

In [97]:
############# year fixed effect ##############
year_dummies = pd.get_dummies(reg_data2['fyear'], prefix='year')
year_dummies = year_dummies.iloc[:, 1:]
year_dummies = year_dummies.astype(int)

############# month fixed effect ##############
month_dummies = pd.get_dummies(reg_data2['month'], prefix='month')
month_dummies = month_dummies.iloc[:, 1:]
month_dummies = month_dummies.astype(int)

############# industry fixed effect ##############
industry_dummies = pd.get_dummies(reg_data2['GICS_level_1'], prefix='industry')
industry_dummies = industry_dummies.iloc[:, 1:]
industry_dummies = industry_dummies.astype(int)

In [98]:
Y5 = reg_data2['RET']
X5 = reg_data2[['carbon_intensity_scope2', 'LOGSIZE_t-1', 'B_M_t-1', 'ROE_t-1', 
                'LEVERAGE_t-1', 'INVEST_A_t-1', 'LOGPPE_t-1', 'MOM_t-1', 'BETA_t-1', 
                'VOLAT_t-1', 'SALESGR_t-1', 'EPSGR_t-1']]

X5 = pd.concat([X5, year_dummies], axis=1)
X5 = pd.concat([X5, month_dummies], axis=1)

X5 = sm.add_constant(X5)

model5 = sm.OLS(Y5, X5)

results5 = model5.fit()

print(results5.summary())

                            OLS Regression Results                            
Dep. Variable:                    RET   R-squared:                       0.067
Model:                            OLS   Adj. R-squared:                  0.067
Method:                 Least Squares   F-statistic:                     183.7
Date:                Mon, 24 Jun 2024   Prob (F-statistic):               0.00
Time:                        02:29:42   Log-Likelihood:            -3.1889e+05
No. Observations:               81605   AIC:                         6.378e+05
Df Residuals:                   81572   BIC:                         6.382e+05
Df Model:                          32                                         
Covariance Type:            nonrobust                                         
                              coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------------------
const                     

In [99]:
Y6 = reg_data2['RET']
X6 = reg_data2[['carbon_intensity_scope2', 'LOGSIZE_t-1', 'B_M_t-1', 'ROE_t-1', 
                'LEVERAGE_t-1', 'INVEST_A_t-1', 'LOGPPE_t-1', 'MOM_t-1', 'BETA_t-1', 
                'VOLAT_t-1', 'SALESGR_t-1', 'EPSGR_t-1']]

X6 = pd.concat([X6, year_dummies], axis=1)
X6 = pd.concat([X6, month_dummies], axis=1)
X6 = pd.concat([X6, industry_dummies], axis=1)

X6 = sm.add_constant(X6)

model6 = sm.OLS(Y6, X6)

results6 = model6.fit()

print(results6.summary())

                            OLS Regression Results                            
Dep. Variable:                    RET   R-squared:                       0.069
Model:                            OLS   Adj. R-squared:                  0.068
Method:                 Least Squares   F-statistic:                     143.6
Date:                Mon, 24 Jun 2024   Prob (F-statistic):               0.00
Time:                        02:29:43   Log-Likelihood:            -3.1882e+05
No. Observations:               81605   AIC:                         6.377e+05
Df Residuals:                   81562   BIC:                         6.381e+05
Df Model:                          42                                         
Covariance Type:            nonrobust                                         
                                      coef    std err          t      P>|t|      [0.025      0.975]
---------------------------------------------------------------------------------------------------
const     