Run the cells below:

In [1]:
import pandas as pd
import statsmodels.api as sm
from linearmodels import PanelOLS

In [2]:
comp = pd.read_pickle('../data/comp_clean.zip')
comp.dtypes

permno          float64
datadate         object
ib              float64
at              float64
che             float64
dltt            float64
ppent           float64
sich            float64
year              int64
roa             float64
future_roa      float64
cash            float64
leverage        float64
investment      float64
w_future_roa    float64
w_cash          float64
w_leverage      float64
w_investment    float64
const             int64
dtype: object

In [3]:
comp = comp.sort_values(['permno','year'])
comp['future_invest'] = comp.groupby('permno')['investment'].shift(-1)  

In [4]:
for v in ['future_invest','roa', 'cash']:
    comp[f'w5_{v}'] = comp[v].clip(lower=comp[v].quantile(0.05), upper=comp[v].quantile(0.95))
    
comp[['w5_future_invest','w5_roa', 'w5_cash']].agg(['mean','std'])

Unnamed: 0,w5_future_invest,w5_roa,w5_cash
mean,0.016095,-0.027871,0.16785
std,0.055204,0.166513,0.198858


In [5]:
df = comp[['permno','year','w5_future_invest','w5_roa', 'w5_cash','const']].copy().dropna()
df.set_index(['permno','year'], inplace=True)

Regress ``w5_future_invest`` on ``w5_roa`` and ``w5_cash`` (and a constant). Use firm fixed effects in your regression.

In [6]:
res = PanelOLS(dependent = df['w5_future_invest'], 
                          exog = df[['const','w5_roa', 'w5_cash']], 
                          entity_effects = True
                         ).fit();
print(res.summary)

                          PanelOLS Estimation Summary                           
Dep. Variable:       w5_future_invest   R-squared:                        0.0651
Estimator:                   PanelOLS   R-squared (Between):             -0.0374
No. Observations:              206817   R-squared (Within):               0.0651
Date:                Thu, Mar 03 2022   R-squared (Overall):              0.0165
Time:                        14:27:00   Log-likelihood                   3.4e+05
Cov. Estimator:            Unadjusted                                           
                                        F-statistic:                      6457.4
Entities:                       21454   P-value                           0.0000
Avg Obs:                       9.6400   Distribution:                F(2,185361)
Min Obs:                       1.0000                                           
Max Obs:                       81.000   F-statistic (robust):             6457.4
                            

Regress ``w5_future_invest`` on ``w5_roa`` and ``w5_cash`` (and a constant). Use time fixed effects in your regression.

In [7]:
res = PanelOLS(dependent = df['w5_future_invest'], 
              exog = df[['const','w5_roa', 'w5_cash']], 
              time_effects = True
             ).fit();
print(res.summary)

                          PanelOLS Estimation Summary                           
Dep. Variable:       w5_future_invest   R-squared:                        0.0410
Estimator:                   PanelOLS   R-squared (Between):              0.0194
No. Observations:              206817   R-squared (Within):               0.0536
Date:                Thu, Mar 03 2022   R-squared (Overall):              0.0425
Time:                        14:27:01   Log-likelihood                 3.121e+05
Cov. Estimator:            Unadjusted                                           
                                        F-statistic:                      4417.5
Entities:                       21454   P-value                           0.0000
Avg Obs:                       9.6400   Distribution:                F(2,206774)
Min Obs:                       1.0000                                           
Max Obs:                       81.000   F-statistic (robust):             4417.5
                            

Regress ``w5_future_invest`` on ``w5_roa`` and ``w5_cash`` (and a constant). Use both firm fixed-effects and time fixed-effects in your regression. Cluster standard errors at the firm level.

In [8]:
res = PanelOLS(dependent = df['w5_future_invest'], 
              exog = df[['const','w5_roa', 'w5_cash']], 
              entity_effects = True, time_effects = True
             );

res2 = res.fit(cov_type='clustered', cluster_entity = True)

print(res2.summary)

                          PanelOLS Estimation Summary                           
Dep. Variable:       w5_future_invest   R-squared:                        0.0602
Estimator:                   PanelOLS   R-squared (Between):             -0.0312
No. Observations:              206817   R-squared (Within):               0.0650
Date:                Thu, Mar 03 2022   R-squared (Overall):              0.0187
Time:                        14:27:02   Log-likelihood                 3.429e+05
Cov. Estimator:             Clustered                                           
                                        F-statistic:                      5933.8
Entities:                       21454   P-value                           0.0000
Avg Obs:                       9.6400   Distribution:                F(2,185321)
Min Obs:                       1.0000                                           
Max Obs:                       81.000   F-statistic (robust):             2300.9
                            

Regress ``w5_future_invest`` on ``w5_roa`` and ``w5_cash`` (and a constant). Use both firm fixed-effects and time fixed-effects in your regression. Cluster standard errors at the firm and year level.

In [9]:
res = PanelOLS(dependent = df['w5_future_invest'], 
              exog = df[['const','w5_roa', 'w5_cash']], 
              entity_effects = True, time_effects = True
             );

res2 = res.fit(cov_type='clustered', cluster_entity = True, cluster_time=True)

print(res2.summary)

                          PanelOLS Estimation Summary                           
Dep. Variable:       w5_future_invest   R-squared:                        0.0602
Estimator:                   PanelOLS   R-squared (Between):             -0.0312
No. Observations:              206817   R-squared (Within):               0.0650
Date:                Thu, Mar 03 2022   R-squared (Overall):              0.0187
Time:                        14:27:03   Log-likelihood                 3.429e+05
Cov. Estimator:             Clustered                                           
                                        F-statistic:                      5933.8
Entities:                       21454   P-value                           0.0000
Avg Obs:                       9.6400   Distribution:                F(2,185321)
Min Obs:                       1.0000                                           
Max Obs:                       81.000   F-statistic (robust):             318.00
                            