## 2 Models with lags & differences

## #1

In [1]:
# pip install linearmodles
import pandas as pd

from linearmodels import PooledOLS # Fitting of Pooling model
from linearmodels import RandomEffects # Fitting of RE-model
from linearmodels import PanelOLS # Fitting og FE-model
from linearmodels import FirstDifferenceOLS # First difference model

from linearmodels.panel import compare # tables of regressions' outputs

In [2]:
df = pd.read_csv('Grunfeld.csv')
df.head()

Unnamed: 0,firm,year,inv,value,capital
0,1,1935,317.6,3078.5,2.8
1,1,1936,391.8,4661.7,52.6
2,1,1937,410.6,5387.1,156.9
3,1,1938,257.7,2792.2,209.2
4,1,1939,330.8,4313.2,203.4


In [3]:
panel_df = df.set_index(['firm', 'year'])
panel_df.head(20)

Unnamed: 0_level_0,Unnamed: 1_level_0,inv,value,capital
firm,year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,1935,317.6,3078.5,2.8
1,1936,391.8,4661.7,52.6
1,1937,410.6,5387.1,156.9
1,1938,257.7,2792.2,209.2
1,1939,330.8,4313.2,203.4
1,1940,461.2,4643.9,207.2
1,1941,512.0,4551.2,255.2
1,1942,448.0,3244.1,303.7
1,1943,499.6,4053.7,264.1
1,1944,547.5,4379.3,201.6


In [4]:
panel_df['d_inv'] = panel_df.groupby(level=0)['inv'].diff()
panel_df['d_value'] = panel_df.groupby(level=0)['value'].diff()
panel_df['d_capital'] = panel_df.groupby(level=0)['capital'].diff()
panel_df.head(20)

Unnamed: 0_level_0,Unnamed: 1_level_0,inv,value,capital,d_inv,d_value,d_capital
firm,year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1,1935,317.6,3078.5,2.8,,,
1,1936,391.8,4661.7,52.6,74.2,1583.2,49.8
1,1937,410.6,5387.1,156.9,18.8,725.4,104.3
1,1938,257.7,2792.2,209.2,-152.9,-2594.9,52.3
1,1939,330.8,4313.2,203.4,73.1,1521.0,-5.8
1,1940,461.2,4643.9,207.2,130.4,330.7,3.8
1,1941,512.0,4551.2,255.2,50.8,-92.7,48.0
1,1942,448.0,3244.1,303.7,-64.0,-1307.1,48.5
1,1943,499.6,4053.7,264.1,51.6,809.6,-39.6
1,1944,547.5,4379.3,201.6,47.9,325.6,-62.5


In [5]:
mod_pl = PooledOLS.from_formula(formula='d_inv~1+d_value+d_capital', data=panel_df)
mod_re = RandomEffects.from_formula(formula='d_inv~1+d_value+d_capital', data=panel_df)
mod_fe = PanelOLS.from_formula(formula='d_inv~1+d_value+d_capital+EntityEffects', data=panel_df)
mod_fd = FirstDifferenceOLS.from_formula(formula='d_inv~d_value+d_capital', data=panel_df)

res_pl = mod_pl.fit(cov_type='clustered', cluster_entity=True)
res_re = mod_re.fit(cov_type='clustered', cluster_entity=True)
res_fe = mod_fe.fit(cov_type='clustered', cluster_entity=True)
res_fd = mod_fd.fit(cov_type='clustered', cluster_entity=True)

compare({'Pool': res_pl, 'RE': res_re, 'FE': res_fe, 'FD':res_fd}, stars=True)

Inputs contain missing values. Dropping rows with missing observations.
  super().__init__(dependent, exog, weights=weights, check_rank=check_rank)
Inputs contain missing values. Dropping rows with missing observations.
  super().__init__(dependent, exog, weights=weights, check_rank=check_rank)
Inputs contain missing values. Dropping rows with missing observations.
  super().__init__(dependent, exog, weights=weights, check_rank=check_rank)


0,1,2,3,4
,Pool,RE,FE,FD
Dep. Variable,d_inv,d_inv,d_inv,d_inv
Estimator,PooledOLS,RandomEffects,PanelOLS,FirstDifferenceOLS
No. Observations,190,190,190,180
Cov. Est.,Clustered,Clustered,Clustered,Clustered
R-squared,0.4089,0.4089,0.3645,0.4645
R-Squared (Within),0.3612,0.3612,0.3645,0.1046
R-Squared (Between),0.8467,0.8467,0.7727,-1.2935
R-Squared (Overall),0.4089,0.4089,0.4046,-0.0769
F-statistic,64.674,64.674,51.053,77.209


In [6]:
import numpy as np
from scipy.stats import chi2

In [7]:
(res_fe.params-res_re.params).T@np.linalg.inv(res_fe.cov-res_re.cov)@(res_fe.params-res_re.params)

0.009503960311212027

In [8]:
chi2.isf(q=0.05, df=res_re.df_model-1)

5.991464547107983

The test statistic is lower than the critical point $\Rightarrow$ the RE model is the best

## #2

In [9]:
panel_df['lag_value'] = panel_df.groupby(level=0)['value'].shift()
panel_df['lag_capital'] = panel_df.groupby(level=0)['capital'].shift()

In [10]:
mod_pl = PooledOLS.from_formula(formula='inv~1+value+capital+lag_value+lag_capital', data=panel_df)
mod_re = RandomEffects.from_formula(formula='inv~1+value+capital+lag_value+lag_capital', data=panel_df)
mod_fe = PanelOLS.from_formula(formula='inv~1+value+capital+lag_value+lag_capital+EntityEffects', data=panel_df)
mod_fd = FirstDifferenceOLS.from_formula(formula='inv~value+capital+lag_value+lag_capital', data=panel_df)

res_pl = mod_pl.fit(cov_type='clustered', cluster_entity=True)
res_re = mod_re.fit(cov_type='clustered', cluster_entity=True)
res_fe = mod_fe.fit(cov_type='clustered', cluster_entity=True)
res_fd = mod_fd.fit(cov_type='clustered', cluster_entity=True)

compare({'Pool': res_pl, 'RE': res_re, 'FE': res_fe, 'FD':res_fd}, stars=True)

Inputs contain missing values. Dropping rows with missing observations.
  super().__init__(dependent, exog, weights=weights, check_rank=check_rank)
Inputs contain missing values. Dropping rows with missing observations.
  super().__init__(dependent, exog, weights=weights, check_rank=check_rank)
Inputs contain missing values. Dropping rows with missing observations.
  super().__init__(dependent, exog, weights=weights, check_rank=check_rank)
Inputs contain missing values. Dropping rows with missing observations.
  super().__init__(dependent, exog, weights=weights, check_rank=check_rank)


0,1,2,3,4
,Pool,RE,FE,FD
Dep. Variable,inv,inv,inv,inv
Estimator,PooledOLS,RandomEffects,PanelOLS,FirstDifferenceOLS
No. Observations,190,190,190,180
Cov. Est.,Clustered,Clustered,Clustered,Clustered
R-squared,0.8266,0.8000,0.7985,0.4841
R-Squared (Within),0.7714,0.7982,0.7985,0.7578
R-Squared (Between),0.8429,0.8204,0.8109,0.8160
R-Squared (Overall),0.8266,0.8153,0.8081,0.8069
F-statistic,220.49,184.96,174.33,41.287


In [11]:
 (res_fe.params-res_re.params).T@np.linalg.inv(res_fe.cov-res_re.cov)@(res_fe.params-res_re.params)

0.5970147748316306

In [12]:
chi2.isf(q=0.05, df=res_re.df_model-1)

9.487729036781158

The test statistic is lower than the critical point $\Rightarrow$ the RE model is the best

## #3 The exercise with the dataframe EmplUK.csv

In [13]:
df = pd.read_csv('EmplUK.csv')
df.head()

Unnamed: 0,firm,year,sector,emp,wage,capital,output
0,1,1977,7,5.041,13.1516,0.5894,95.707199
1,1,1978,7,5.6,12.3018,0.6318,97.356903
2,1,1979,7,5.015,12.8395,0.6771,99.608299
3,1,1980,7,4.715,13.8039,0.6171,100.5501
4,1,1981,7,4.093,14.2897,0.5076,99.558098


In [14]:
panel_df = df.set_index(['firm', 'year'])

In [15]:
panel_df['l_emp'] = np.log(panel_df['emp'])
panel_df['d_l_emp'] = panel_df.groupby(level=0)['l_emp'].diff()

panel_df['l_capital'] = np.log(panel_df['capital'])
panel_df['d_l_capital'] = panel_df.groupby(level=0)['l_capital'].diff()

panel_df['l_wage'] = np.log(panel_df['wage'])
panel_df['d_l_wage'] = panel_df.groupby(level=0)['l_wage'].diff()

panel_df['l_output'] = np.log(panel_df['output'])
panel_df['d_l_output'] = panel_df.groupby(level=0)['l_output'].diff()

panel_df.head(20)

Unnamed: 0_level_0,Unnamed: 1_level_0,sector,emp,wage,capital,output,l_emp,d_l_emp,l_capital,d_l_capital,l_wage,d_l_wage,l_output,d_l_output
firm,year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1,1977,7,5.041,13.1516,0.5894,95.707199,1.617604,,-0.52865,,2.576543,,4.561294,
1,1978,7,5.6,12.3018,0.6318,97.356903,1.722767,0.105162,-0.459182,0.069468,2.509746,-0.066798,4.578384,0.01709
1,1979,7,5.015,12.8395,0.6771,99.608299,1.612433,-0.110333,-0.389936,0.069246,2.552526,0.042781,4.601245,0.022862
1,1980,7,4.715,13.8039,0.6171,100.5501,1.550749,-0.061684,-0.482724,-0.092788,2.624951,0.072425,4.610656,0.009411
1,1981,7,4.093,14.2897,0.5076,99.558098,1.409278,-0.141471,-0.678062,-0.195337,2.659539,0.034588,4.600741,-0.009915
1,1982,7,3.166,14.8681,0.4229,98.615097,1.152469,-0.256809,-0.86062,-0.182558,2.699218,0.039679,4.591224,-0.009517
1,1983,7,2.936,13.7784,0.392,100.0301,1.077048,-0.075421,-0.936493,-0.075874,2.623102,-0.076116,4.605471,0.014247
2,1977,7,71.319,14.7909,16.9363,95.707199,4.267163,,2.829459,,2.694012,,4.561294,
2,1978,7,70.642998,14.1036,17.242201,97.356903,4.257639,-0.009524,2.84736,0.017901,2.64643,-0.047582,4.578384,0.01709
2,1979,7,70.917999,14.9534,17.5413,99.608299,4.261524,0.003885,2.864558,0.017198,2.704939,0.058509,4.601245,0.022862


In [16]:
mod_pl = PooledOLS.from_formula(formula='d_l_emp~1+d_l_capital+d_l_wage+d_l_output', data=panel_df)
mod_re = RandomEffects.from_formula(formula='d_l_emp~1+d_l_capital+d_l_wage+d_l_output', data=panel_df)
mod_fe = PanelOLS.from_formula(formula='d_l_emp~1+d_l_capital+d_l_wage+d_l_output+EntityEffects', data=panel_df)
mod_fd = FirstDifferenceOLS.from_formula(formula='d_l_emp~d_l_capital+d_l_wage+d_l_output', data=panel_df)

res_pl = mod_pl.fit(cov_type='clustered', cluster_entity=True)
res_re = mod_re.fit(cov_type='clustered', cluster_entity=True)
res_fe = mod_fe.fit(cov_type='clustered', cluster_entity=True)
res_fd = mod_fd.fit(cov_type='clustered', cluster_entity=True)

compare({'Pool': res_pl, 'RE': res_re, 'FE': res_fe, 'FD':res_fd}, stars=True)

Inputs contain missing values. Dropping rows with missing observations.
  super().__init__(dependent, exog, weights=weights, check_rank=check_rank)
Inputs contain missing values. Dropping rows with missing observations.
  super().__init__(dependent, exog, weights=weights, check_rank=check_rank)
Inputs contain missing values. Dropping rows with missing observations.
  super().__init__(dependent, exog, weights=weights, check_rank=check_rank)


0,1,2,3,4
,Pool,RE,FE,FD
Dep. Variable,d_l_emp,d_l_emp,d_l_emp,d_l_emp
Estimator,PooledOLS,RandomEffects,PanelOLS,FirstDifferenceOLS
No. Observations,891,891,891,668
Cov. Est.,Clustered,Clustered,Clustered,Clustered
R-squared,0.3830,0.3830,0.3693,0.3020
R-Squared (Within),0.3644,0.3644,0.3693,0.3485
R-Squared (Between),0.4687,0.4687,0.4190,0.4871
R-Squared (Overall),0.3830,0.3830,0.3780,0.3834
F-statistic,183.51,183.51,146.03,95.908


In [17]:
 (res_fe.params-res_re.params).T@np.linalg.inv(res_fe.cov-res_re.cov)@(res_fe.params-res_re.params)

7.283070546413963

In [18]:
chi2.isf(q=0.05, df=res_re.df_model-1)

7.814727903251178

The test statistic is lower than the critical point $\Rightarrow$ the RE model is the best

In [24]:
panel_df['lag_l_wage'] = panel_df.groupby(level=0)['l_wage'].shift()
panel_df['lag_l_capital'] = panel_df.groupby(level=0)['l_capital'].shift()
panel_df['lag_l_output'] = panel_df.groupby(level=0)['l_output'].shift()

In [26]:
mod_pl = PooledOLS.from_formula(formula='l_emp~1+d_l_capital+lag_l_capital+d_l_wage+lag_l_wage+d_l_output+lag_l_output', data=panel_df)
mod_re = RandomEffects.from_formula(formula='l_emp~1+d_l_capital+lag_l_capital+d_l_wage+lag_l_wage+d_l_output+lag_l_output', data=panel_df)
mod_fe = PanelOLS.from_formula(formula='l_emp~1+d_l_capital+lag_l_capital+d_l_wage+lag_l_wage+d_l_output+lag_l_output+EntityEffects', data=panel_df)
mod_fd = FirstDifferenceOLS.from_formula(formula='l_emp~d_l_capital+lag_l_capital+d_l_wage+lag_l_wage+d_l_output+lag_l_output', data=panel_df)

res_pl = mod_pl.fit(cov_type='clustered', cluster_entity=True)
res_re = mod_re.fit(cov_type='clustered', cluster_entity=True)
res_fe = mod_fe.fit(cov_type='clustered', cluster_entity=True)
res_fd = mod_fd.fit(cov_type='clustered', cluster_entity=True)

compare({'Pool': res_pl, 'RE': res_re, 'FE': res_fe, 'FD':res_fd}, stars=True)

Inputs contain missing values. Dropping rows with missing observations.
  super().__init__(dependent, exog, weights=weights, check_rank=check_rank)
Inputs contain missing values. Dropping rows with missing observations.
  super().__init__(dependent, exog, weights=weights, check_rank=check_rank)
Inputs contain missing values. Dropping rows with missing observations.
  super().__init__(dependent, exog, weights=weights, check_rank=check_rank)
Inputs contain missing values. Dropping rows with missing observations.
  super().__init__(dependent, exog, weights=weights, check_rank=check_rank)


0,1,2,3,4
,Pool,RE,FE,FD
Dep. Variable,l_emp,l_emp,l_emp,l_emp
Estimator,PooledOLS,RandomEffects,PanelOLS,FirstDifferenceOLS
No. Observations,891,891,891,668
Cov. Est.,Clustered,Clustered,Clustered,Clustered
R-squared,0.8406,0.7036,0.6459,0.5416
R-Squared (Within),0.4528,0.6369,0.6459,0.6394
R-Squared (Between),0.8545,0.8292,0.7768,0.4695
R-Squared (Overall),0.8406,0.8195,0.7696,0.4681
F-statistic,776.98,349.73,226.44,130.35


In [27]:
 (res_fe.params-res_re.params).T@np.linalg.inv(res_fe.cov-res_re.cov)@(res_fe.params-res_re.params)

-56.28316825780087

In [28]:
chi2.isf(q=0.05, df=res_re.df_model-1)

12.59158724374398

The test statistic is lower than the critical point $\Rightarrow$ the RE model is the best