# 14. Advanced Panel Data Methods

## 14.1. Fixed Effects Estimation

$y_{it} = \beta_0 + \beta_1x_{it1} + . . .  + \beta_kx_{itk} + a_i + u_{it}$

$\bar{y_i} = \beta_0 + \beta_1\bar{x}_{i1} + . . . + \beta_k\bar{x}_{ik} + a_i + \bar{u_i}$

$\ddot{y} = y_{it} - \bar{y_i} $

$\ddot{y} = \beta_1\ddot{x}_{it1} + . . . +  \beta_k\ddot{x}_{itk} + \ddot{u}_{it}$


### Exapmle 14.2: Has the Return to Education Changed over Time

In [1]:
import wooldridge as woo
import pandas as pd
import statsmodels.formula.api as smf
import linearmodels as plm
import scipy.stats as stats
from supplementaryFunctions import *

In [2]:
def LM_summary(results, round_dig = 4):
    summary = {"$\\beta $":results.params, 
           "$t$": results.tstats,
           "$$P>|t|$$":results.pvalues,
           # calculate standard errors by taking the square root of the variance values 
           # along the diagonal of the covariance matrix 
          "$SE$":results.std_errors}
    summary = pd.DataFrame(summary)
    # add r^2 using index name
    summary.index.name = "$$r^2: "+str(round(results.rsquared,round_dig)) + "$$"
    return summary

#### The authors use *drop_absorbed* quite freely. This is problematic since it makes us lazy in identifying source of multicolinearity.

In [3]:
wagepan = woo.dataWoo("wagepan")
wagepan.set_index(["nr","year"], drop = False, inplace = True)
formula = "lwage ~ married + union + C(year)*educ + EntityEffects"
reg = plm.PanelOLS.from_formula(formula = formula,
                               data = wagepan, drop_absorbed=True)
results = reg.fit()
LM_summary(results)

Variables have been fully absorbed and have removed from the regression:

C(year)[T.1981], C(year)[T.1982], C(year)[T.1983], C(year)[T.1984], C(year)[T.1985], C(year)[T.1986], C(year)[T.1987], educ

  results = reg.fit()


Unnamed: 0_level_0,$\beta $,$t$,$$P>|t|$$,$SE$
$$r^2: 0.0099$$,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
C(year)[T.1980],11.172073,14.588203,0.0,0.765829
married,0.05482,2.97734,0.002926,0.018413
union,0.082978,4.267104,2e-05,0.019446
C(year)[T.1981]:educ,0.011585,0.944788,0.344827,0.012262
C(year)[T.1982]:educ,0.01479,1.206054,0.227872,0.012264
C(year)[T.1983]:educ,0.017118,1.395888,0.16283,0.012263
C(year)[T.1984]:educ,0.016584,1.352059,0.176437,0.012266
C(year)[T.1985]:educ,0.023709,1.931635,0.053479,0.012274
C(year)[T.1986]:educ,0.027412,2.233366,0.025583,0.012274
C(year)[T.1987]:educ,0.030433,2.479819,0.013188,0.012272


In [4]:
# names = ["lwage", "married","union" , "educ"]
# X, y = build_X_y_matrices(wagepan, names, log_vars = None)#, constant = True)

# reg = plm.PanelOLS(y, X, entity_effects = True, time_effects = True, drop_absorbed = True)
# results = reg.fit()
# LM_summary(results)

## 14.2. Random Effects Models

$\dot{y}_{it} = y_{it} - \theta \bar{y}_i$

$\dot{y}_{it} = \beta_0(1-\theta) + \beta_1\dot{x}_{it1} + . . . + \beta_k\dot{x}_{itk} + \dot{\nu}_{it}$

$\nu_{it} = a_i + u_{it}$

$\theta = 1 - \sqrt{\frac{\theta_u^2}{\theta_u^2 + T\theta_a^2}}$

In [5]:
wagepan = woo.dataWoo("wagepan")
wagepan.groupby("nr").var()

Unnamed: 0_level_0,year,agric,black,bus,construc,ent,exper,fin,hisp,poorhlth,...,union,lwage,d81,d82,d83,d84,d85,d86,d87,expersq
nr,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
13,6.0,0.0,0.0,0.214286,0.000000,0.000,6.0,0.000000,0.0,0.0,...,0.125000,0.681418,0.125,0.125,0.125,0.125,0.125,0.125,0.125,510.0
17,6.0,0.0,0.0,0.000000,0.267857,0.000,6.0,0.000000,0.0,0.0,...,0.000000,0.009753,0.125,0.125,0.125,0.125,0.125,0.125,0.125,1374.0
18,6.0,0.0,0.0,0.000000,0.000000,0.000,6.0,0.000000,0.0,0.0,...,0.000000,0.186106,0.125,0.125,0.125,0.125,0.125,0.125,0.125,1374.0
45,6.0,0.0,0.0,0.125000,0.125000,0.000,6.0,0.000000,0.0,0.0,...,0.214286,0.049972,0.125,0.125,0.125,0.125,0.125,0.125,0.125,750.0
110,6.0,0.0,0.0,0.125000,0.000000,0.000,6.0,0.214286,0.0,0.0,...,0.125000,0.009836,0.125,0.125,0.125,0.125,0.125,0.125,0.125,1758.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12451,6.0,0.0,0.0,0.000000,0.000000,0.125,6.0,0.000000,0.0,0.0,...,0.000000,0.137637,0.125,0.125,0.125,0.125,0.125,0.125,0.125,750.0
12477,6.0,0.0,0.0,0.000000,0.000000,0.000,6.0,0.000000,0.0,0.0,...,0.000000,0.001519,0.125,0.125,0.125,0.125,0.125,0.125,0.125,1374.0
12500,6.0,0.0,0.0,0.267857,0.000000,0.000,6.0,0.000000,0.0,0.0,...,0.000000,0.198548,0.125,0.125,0.125,0.125,0.125,0.125,0.125,1374.0
12534,6.0,0.0,0.0,0.000000,0.000000,0.000,6.0,0.000000,0.0,0.0,...,0.000000,0.026982,0.125,0.125,0.125,0.125,0.125,0.125,0.125,750.0


In [6]:
isv_nr = (wagepan.groupby("nr").var() == 0)
noVar_nr = isv_nr.all(axis = 0)
# noVar_nr,
isv_nr.columns[noVar_nr]

Index(['black', 'hisp', 'educ'], dtype='object')

In [7]:
isv_t = wagepan.groupby("nr").var() == 0
noVar_t = isv_t.all(axis = 0)
# isv_t, 
isv_t.columns[noVar_t]

Index(['black', 'hisp', 'educ'], dtype='object')

In [8]:
wagepan.set_index(["nr", "year"], drop = False, inplace = True)

In [9]:
formula = "lwage ~ educ + black + hisp + exper + I(exper**2) + married + union + C(year)"

reg_pooled =plm.PooledOLS.from_formula(formula = formula,
                                    data = wagepan)
results_pooled = reg_pooled.fit()
LM_summary(results_pooled)

Unnamed: 0_level_0,$\beta $,$t$,$$P>|t|$$,$SE$
$$r^2: 0.1893$$,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
C(year)[T.1980],0.092056,1.17613,0.2396076,0.07827
C(year)[T.1981],0.150376,1.793474,0.07296674,0.083846
C(year)[T.1982],0.15483,1.733458,0.08308526,0.089319
C(year)[T.1983],0.154068,1.632282,0.1026926,0.094388
C(year)[T.1984],0.182523,1.843739,0.0652892,0.098996
C(year)[T.1985],0.201302,1.952284,0.05096858,0.103111
C(year)[T.1986],0.234015,2.191982,0.02843369,0.10676
C(year)[T.1987],0.265889,2.416628,0.01570597,0.110025
I(exper**2),-0.002412,-2.941264,0.003286019,0.00082
black,-0.139234,-5.904869,3.798544e-09,0.02358


In [10]:
# just time effects same as pooled with time effects
reg_te = plm.PanelOLS.from_formula(formula = formula,
                                    data = wagepan)
results_te =reg_te.fit()
LM_summary(results_te)

Unnamed: 0_level_0,$\beta $,$t$,$$P>|t|$$,$SE$
$$r^2: 0.1893$$,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
C(year)[T.1980],0.092056,1.17613,0.2396076,0.07827
C(year)[T.1981],0.150376,1.793474,0.07296674,0.083846
C(year)[T.1982],0.15483,1.733458,0.08308526,0.089319
C(year)[T.1983],0.154068,1.632282,0.1026926,0.094388
C(year)[T.1984],0.182523,1.843739,0.0652892,0.098996
C(year)[T.1985],0.201302,1.952284,0.05096858,0.103111
C(year)[T.1986],0.234015,2.191982,0.02843369,0.10676
C(year)[T.1987],0.265889,2.416628,0.01570597,0.110025
I(exper**2),-0.002412,-2.941264,0.003286019,0.00082
black,-0.139234,-5.904869,3.798544e-09,0.02358


In [11]:
formula = "lwage ~ I(exper**2) + married + union + C(year)"
# Time and Entity Effects
reg_fe = plm.PanelOLS.from_formula(formula = formula + " + EntityEffects",
                                    data = wagepan)
results_fe = reg_fe.fit()
LM_summary(results_fe)

Unnamed: 0_level_0,$\beta $,$t$,$$P>|t|$$,$SE$
$$r^2: 0.1806$$,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
C(year)[T.1980],1.426019,77.748354,0.0,0.018341
C(year)[T.1981],1.57721,72.965647,0.0,0.021616
C(year)[T.1982],1.678989,63.258282,0.0,0.026542
C(year)[T.1983],1.780462,53.439178,0.0,0.033318
C(year)[T.1984],1.916133,45.98163,0.0,0.041672
C(year)[T.1985],2.043501,39.646046,0.0,0.051544
C(year)[T.1986],2.191515,34.771405,0.0,0.063026
C(year)[T.1987],2.351043,30.866912,0.0,0.076167
I(exper**2),-0.005185,-7.361196,2.220446e-13,0.000704
married,0.04668,2.549386,0.01083019,0.01831


In [12]:
formula = "lwage ~ educ + black + hisp + exper + I(exper**2) + married + union + C(year)"
reg_re = plm.RandomEffects.from_formula(formula = formula,
                                    data = wagepan)

results_re = reg_re.fit()
LM_summary(results_re)

Unnamed: 0_level_0,$\beta $,$t$,$$P>|t|$$,$SE$
$$r^2: 0.1806$$,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
C(year)[T.1980],0.023414,0.154606,0.8771388,0.151441
C(year)[T.1981],0.063837,0.398786,0.6900706,0.160079
C(year)[T.1982],0.054265,0.321115,0.7481389,0.168989
C(year)[T.1983],0.043602,0.244962,0.8064974,0.177995
C(year)[T.1984],0.066427,0.355113,0.7225219,0.187057
C(year)[T.1985],0.081109,0.413565,0.6792131,0.196122
C(year)[T.1986],0.115241,0.561652,0.5743819,0.205182
C(year)[T.1987],0.15825,0.73862,0.4601778,0.214251
I(exper**2),-0.004729,-6.862314,7.731593e-12,0.000689
black,-0.139379,-2.9054,0.003686359,0.047972


#### We can compare all results in a single table

In [13]:
from linearmodels.panel import compare

compare({"FE": results_fe, "RE": results_re, "Pooled": results_pooled})

0,1,2,3
,FE,RE,Pooled
Dep. Variable,lwage,lwage,lwage
Estimator,PanelOLS,RandomEffects,PooledOLS
No. Observations,4360,4360,4360
Cov. Est.,Unadjusted,Unadjusted,Unadjusted
R-squared,0.1806,0.1806,0.1893
R-Squared (Within),0.1806,0.1799,0.1692
R-Squared (Between),-0.0052,0.1853,0.2066
R-Squared (Overall),0.0807,0.1828,0.1893
F-statistic,83.851,68.409,72.459


### Hausman Test 
#### From Appendix: Script 14.4: Example-HausmTest.py

In [14]:
b_fe = results_fe.params
b_fe_cov = results_fe.cov
b_re = results_re.params
b_re_cov = results_re.cov

common_coef = set(results_fe.params.index).intersection(results_re.params.index)
common_coef

{'C(year)[T.1980]',
 'C(year)[T.1981]',
 'C(year)[T.1982]',
 'C(year)[T.1983]',
 'C(year)[T.1984]',
 'C(year)[T.1985]',
 'C(year)[T.1986]',
 'C(year)[T.1987]',
 'I(exper**2)',
 'married',
 'union'}

In [15]:
b_diff = results_fe.params[common_coef].sub(results_re.params[common_coef])
df = len(b_diff)
b_diff

  b_diff = results_fe.params[common_coef].sub(results_re.params[common_coef])
  b_diff = results_fe.params[common_coef].sub(results_re.params[common_coef])


C(year)[T.1986]    2.076274
C(year)[T.1984]    1.849707
C(year)[T.1980]    1.402605
C(year)[T.1983]    1.736860
I(exper**2)       -0.000456
C(year)[T.1982]    1.624725
married           -0.017134
C(year)[T.1985]    1.962392
C(year)[T.1981]    1.513373
union             -0.025865
C(year)[T.1987]    2.192793
Name: parameter, dtype: float64

In [16]:
b_cov_diff = b_fe_cov.loc[common_coef, common_coef].sub(b_re_cov.loc[common_coef, common_coef])
b_cov_diff

  b_cov_diff = b_fe_cov.loc[common_coef, common_coef].sub(b_re_cov.loc[common_coef, common_coef])
  b_cov_diff = b_fe_cov.loc[common_coef, common_coef].sub(b_re_cov.loc[common_coef, common_coef])


Unnamed: 0,C(year)[T.1986],C(year)[T.1984],C(year)[T.1980],C(year)[T.1983],I(exper**2),C(year)[T.1982],married,C(year)[T.1985],C(year)[T.1981],union,C(year)[T.1987]
C(year)[T.1986],-0.038127,-0.035698,-0.029378,-0.0343,-6.415442e-05,-0.032788,-0.000411616,-0.036976,-0.031141,-0.0001188561,-0.03914
C(year)[T.1984],-0.035698,-0.033254,-0.027256,-0.031892,-4.888127e-05,-0.030444,-0.0003419515,-0.034524,-0.028894,-0.0001054621,-0.036766
C(year)[T.1980],-0.029378,-0.027256,-0.022598,-0.026143,-1.882321e-05,-0.024998,-0.0001981415,-0.028336,-0.023814,-7.180325e-05,-0.030377
C(year)[T.1983],-0.0343,-0.031892,-0.026143,-0.030572,-4.130821e-05,-0.029178,-0.0003068184,-0.033137,-0.027697,-9.706718e-05,-0.035373
I(exper**2),-6.4e-05,-4.9e-05,-1.9e-05,-4.1e-05,2.128958e-08,-3.4e-05,1.85887e-07,-5.6e-05,-2.6e-05,-9.915058e-08,-7.2e-05
C(year)[T.1982],-0.032788,-0.030444,-0.024998,-0.029178,-3.376926e-05,-0.027853,-0.0002700965,-0.03165,-0.026455,-8.933812e-05,-0.033852
married,-0.000412,-0.000342,-0.000198,-0.000307,1.85887e-07,-0.00027,5.377188e-05,-0.000377,-0.000235,7.736228e-07,-0.000447
C(year)[T.1985],-0.036976,-0.034524,-0.028336,-0.033137,-5.64978e-05,-0.03165,-0.0003767402,-0.035807,-0.030043,-0.0001118637,-0.038024
C(year)[T.1981],-0.031141,-0.028894,-0.023814,-0.027697,-2.627847e-05,-0.026455,-0.0002348176,-0.030043,-0.025158,-8.043469e-05,-0.03218
union,-0.000119,-0.000105,-7.2e-05,-9.7e-05,-9.915058e-08,-8.9e-05,7.736228e-07,-0.000112,-8e-05,5.40463e-05,-0.000129


In [17]:
b_var_diff = pd.Series(np.diag(b_cov_diff), index=[b_cov_diff.index])
b_var_diff

C(year)[T.1986]   -3.812734e-02
C(year)[T.1984]   -3.325389e-02
C(year)[T.1980]   -2.259812e-02
C(year)[T.1983]   -3.057219e-02
I(exper**2)        2.128958e-08
C(year)[T.1982]   -2.785277e-02
married            5.377188e-05
C(year)[T.1985]   -3.580718e-02
C(year)[T.1981]   -2.515791e-02
union              5.404630e-05
C(year)[T.1987]   -4.010217e-02
dtype: float64

In [18]:
stat = abs(b_diff.T @ np.linalg.inv(b_cov_diff) @ b_diff)
p_val = 1 - stats.chi2.cdf(stat, df)
stat, p_val

(43.42707117534046, 9.150613852537681e-06)

In [19]:
def hausman_test(results_fe, results_re):
    b_fe = results_fe.params
    b_fe_cov = results_fe.cov
    b_re = results_re.params
    b_re_cov = results_re.cov
    common_coef = set(results_fe.params.index).intersection(results_re.params.index)    
    b_diff = results_fe.params[common_coef].sub(results_re.params[common_coef])
    df = len(b_diff)
    b_cov_diff = b_fe_cov.loc[common_coef, common_coef].sub(b_re_cov.loc[common_coef, common_coef])
    b_var_diff = pd.Series(np.diag(b_cov_diff), index=[b_cov_diff.index])
    h_results = {"Hausman":{}}
    h_results["Hausman"]["t"] = abs(b_diff.T @ np.linalg.inv(b_cov_diff) @ b_diff)
    h_results["Hausman"]["p"] = 1 - stats.chi2.cdf(stat, df)
    return pd.DataFrame(h_results)
hausman_test(results_fe, results_re).round(3)

  b_diff = results_fe.params[common_coef].sub(results_re.params[common_coef])
  b_diff = results_fe.params[common_coef].sub(results_re.params[common_coef])
  b_cov_diff = b_fe_cov.loc[common_coef, common_coef].sub(b_re_cov.loc[common_coef, common_coef])
  b_cov_diff = b_fe_cov.loc[common_coef, common_coef].sub(b_re_cov.loc[common_coef, common_coef])


Unnamed: 0,Hausman
p,0.0
t,43.427


It turns out that we can get the FE parameter estimates in two other ways than the within transformation we used in Section 14.1. The dummy variable regression uses OLS on the original variables in Equation 13.2 instead of the transformed ones. But it adds n - 1 dummy variables.

$$a_i = \gamma_0 + \gamma_1 \bar{x}_{i1} + . . . + \gamma \bar{x}_{ik} + r_i$$

$$y_{it} = \beta_0 + \beta_1 x_{it1} + . . . + \beta_kx_{itk} + a_i + u_{it}$$


In [20]:
wagepan["entity"] = wagepan["nr"]
wagepan.set_index("nr", inplace = True)#, drop = False)
wagepan["married_b"] = wagepan.groupby("nr").mean()["married"]
wagepan["union_b"] = wagepan.groupby("nr").mean()["union"]
wagepan = wagepan.set_index(["year"], append = True, drop = False)
wagepan

Unnamed: 0_level_0,Unnamed: 1_level_0,year,agric,black,bus,construc,ent,exper,fin,hisp,poorhlth,...,d82,d83,d84,d85,d86,d87,expersq,entity,married_b,union_b
nr,year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
13,1980,1980,0,0,1,0,0,1,0,0,0,...,0,0,0,0,0,0,1,13,0.000,0.125
13,1981,1981,0,0,0,0,0,2,0,0,0,...,0,0,0,0,0,0,4,13,0.000,0.125
13,1982,1982,0,0,1,0,0,3,0,0,0,...,1,0,0,0,0,0,9,13,0.000,0.125
13,1983,1983,0,0,1,0,0,4,0,0,0,...,0,1,0,0,0,0,16,13,0.000,0.125
13,1984,1984,0,0,0,0,0,5,0,0,0,...,0,0,1,0,0,0,25,13,0.000,0.125
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12548,1983,1983,0,0,0,1,0,8,0,0,0,...,0,1,0,0,0,0,64,12548,0.625,0.375
12548,1984,1984,0,0,0,1,0,9,0,0,0,...,0,0,1,0,0,0,81,12548,0.625,0.375
12548,1985,1985,0,0,0,1,0,10,0,0,0,...,0,0,0,1,0,0,100,12548,0.625,0.375
12548,1986,1986,0,0,0,0,0,11,0,0,0,...,0,0,0,0,1,0,121,12548,0.625,0.375


In [21]:
formula = "lwage ~ married + union + C(year)*educ + EntityEffects"
reg_we = plm.PanelOLS.from_formula(formula = formula,
                                  drop_absorbed = True, data = wagepan)
results_we = reg_we.fit()
LM_summary(results_we)

Variables have been fully absorbed and have removed from the regression:

educ

  results_we = reg_we.fit()


Unnamed: 0_level_0,$\beta $,$t$,$$P>|t|$$,$SE$
$$r^2: 0.1708$$,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
C(year)[T.1980],1.362459,83.90311,0.0,0.016238
C(year)[T.1981],1.340043,9.230656,0.0,0.145173
C(year)[T.1982],1.356698,9.348067,0.0,0.145131
C(year)[T.1983],1.372888,9.456084,0.0,0.145186
C(year)[T.1984],1.446833,9.961677,0.0,0.14524
C(year)[T.1985],1.412184,9.731511,0.0,0.145115
C(year)[T.1986],1.428065,9.840444,0.0,0.145122
C(year)[T.1987],1.452904,10.006096,0.0,0.145202
married,0.05482,2.97734,0.002926,0.018413
union,0.082978,4.267104,2e-05,0.019446


In [22]:
formula = "lwage ~ married + union + C(year)*educ + C(entity)"
reg_dum = smf.ols(formula = formula,
                  data = wagepan)
results_dum = reg_dum.fit()
OLS_summary(results_dum)

Unnamed: 0_level_0,$\beta $,$t$,$$P>|t|$$,$SE$
$$r^2: 0.6164$$,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Intercept,2.1080,5.8568,0.0000,0.3599
C(year)[T.1981],-0.0224,-0.1537,0.8779,0.1459
C(year)[T.1982],-0.0058,-0.0395,0.9685,0.1459
C(year)[T.1983],0.0104,0.0715,0.9430,0.1459
C(year)[T.1984],0.0844,0.5785,0.5630,0.1459
...,...,...,...,...
C(year)[T.1983]:educ,0.0171,1.3959,0.1628,0.0123
C(year)[T.1984]:educ,0.0166,1.3521,0.1764,0.0123
C(year)[T.1985]:educ,0.0237,1.9316,0.0535,0.0123
C(year)[T.1986]:educ,0.0274,2.2334,0.0256,0.0123


In [23]:
formula = "lwage ~ married + union + C(year)*educ + married_b + union_b"
reg_cre = plm.RandomEffects.from_formula(formula = formula,
                                  data = wagepan)
results_cre = reg_cre.fit()
LM_summary(results_cre)

Unnamed: 0_level_0,$\beta $,$t$,$$P>|t|$$,$SE$
$$r^2: 0.1739$$,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
C(year)[T.1980],0.577921,4.043669,5.353429e-05,0.14292
C(year)[T.1981],0.555506,3.886793,0.0001030935,0.142921
C(year)[T.1982],0.57216,4.003608,6.342595e-05,0.142911
C(year)[T.1983],0.588351,4.116912,3.911186e-05,0.142911
C(year)[T.1984],0.662296,4.633842,3.694655e-06,0.142926
C(year)[T.1985],0.627647,4.39174,1.151257e-05,0.142915
C(year)[T.1986],0.643528,4.502529,6.891094e-06,0.142926
C(year)[T.1987],0.668366,4.676713,3.00368e-06,0.142914
educ,0.058594,4.917946,9.069083e-07,0.011914
married,0.05482,2.9827,0.002873075,0.018379


In [24]:
formula = "lwage ~ married + union + C(year)*educ"
reg_re = plm.RandomEffects.from_formula(formula = formula,
                                  data = wagepan)
results_re = reg_re.fit()
LM_summary(results_re)

Unnamed: 0_level_0,$\beta $,$t$,$$P>|t|$$,$SE$
$$r^2: 0.1702$$,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
C(year)[T.1980],0.652315,4.59178,4.519133e-06,0.142061
C(year)[T.1981],0.630936,4.441247,9.16629e-06,0.142063
C(year)[T.1982],0.648583,4.567148,5.081008e-06,0.142011
C(year)[T.1983],0.662299,4.661373,3.235258e-06,0.142082
C(year)[T.1984],0.733996,5.163699,2.52954e-07,0.142145
C(year)[T.1985],0.705038,4.965526,7.114465e-07,0.141987
C(year)[T.1986],0.721497,5.081196,3.908216e-07,0.141994
C(year)[T.1987],0.741491,5.218032,1.89269e-07,0.142102
educ,0.059473,4.987051,6.370088e-07,0.011926
married,0.077258,4.605448,4.233559e-06,0.016775


In [25]:
compare({"WE": results_we,  "CRE": results_cre})#, "Pooled": results_pooled})

0,1,2
,WE,CRE
Dep. Variable,lwage,lwage
Estimator,PanelOLS,RandomEffects
No. Observations,4360,4360
Cov. Est.,Unadjusted,Unadjusted
R-squared,0.1708,0.1739
R-Squared (Within),0.1708,0.1708
R-Squared (Between),0.0905,0.1956
R-Squared (Overall),0.1277,0.1841
F-statistic,48.907,48.081


In [26]:
names = ["married", "union", "C(year)[T.1982]:educ"]
pd.DataFrame({"Within":results_we.params[names],
              "OLS":results_dum.params[names],
              "CRE":results_cre.params[names],
              "RE":results_re.params[names]}).round(4)

Unnamed: 0,Within,OLS,CRE,RE
married,0.0548,0.0548,0.0548,0.0773
union,0.083,0.083,0.083,0.1075
C(year)[T.1982]:educ,0.0148,0.0148,0.0148,0.0143


In [27]:
# Wald test using correlated random effects
wtest = results_cre.wald_test(formula="married_b = union_b = 0")
wtest

Linear Equality Hypothesis Test
H0: Linear equality constraint is valid
Statistic: 19.4058
P-value: 0.0001
Distributed: chi2(2)
WaldTestStatistic, id: 0x18473765ee0