# 13. Pooling Cross-Sections Across Time: Simple Panel Data Methods

## 13.1. Pooled Cross-Sections

### Example 13.2: Changes to the Return to Education and the Gender Wage Gap

In [1]:
import wooldridge as woo
import numpy as np
import pandas as pd
import statsmodels.formula.api as smf
import linearmodels as plm
from supplementaryFunctions import *

In [2]:
cps78_85 = woo.dataWoo("cps78_85")
formula = "lwage ~ y85 * (educ + female) + exper + I(exper ** 2 / 100) + union"
# OLS results including interaction terms:
reg = smf.ols(formula=formula,
             data = cps78_85)
results = reg.fit()
OLS_summary(results)

Unnamed: 0_level_0,$\beta $,$t$,$$P>|t|$$,$SE$
$$r^2: 0.4262$$,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Intercept,0.4589,4.9111,0.0,0.0934
y85,0.1178,0.9517,0.3415,0.1238
educ,0.0747,11.1917,0.0,0.0067
female,-0.3167,-8.6482,0.0,0.0366
y85:educ,0.0185,1.9735,0.0487,0.0094
y85:female,0.0851,1.6576,0.0977,0.0513
exper,0.0296,8.2932,0.0,0.0036
I(exper ** 2 / 100),-0.0399,-5.1513,0.0,0.0078
union,0.2021,6.6722,0.0,0.0303


## 13.2. Difference-in-Differences

### Example 13.3.: Effect of a Garbage Incinerator's Location on Housing Prices

In [3]:
kielmc = woo.dataWoo("kielmc")
# pd.to_datetime(kielmc["year"])
# kielmc.set_index("year", inplace = True)
# Separate regressions for 1978 and 1981
formula = "rprice ~ nearinc"
for year in (1978, 1981):
    ysub = kielmc["year"] == year
    reg = smf.ols(formula = formula,
                   data = kielmc,
                   subset = ysub)
    results = reg.fit()
    print(OLS_summary(results))

formula = "rprice ~ nearinc * C(year)"
reg = smf.ols(formula = formula,
               data = kielmc)
results = reg.fit()
OLS_summary(results)

                   $\beta $      $t$  $$P>|t|$$      $SE$
$$r^2: 0.0817$$                                          
Intercept        82517.2276  31.0941     0.0000  2653.790
nearinc         -18824.3705  -3.9675     0.0001  4744.594
                    $\beta $      $t$  $$P>|t|$$       $SE$
$$r^2: 0.1653$$                                            
Intercept        101307.5136  32.7535        0.0  3093.0267
nearinc          -30688.2738  -5.2659        0.0  5827.7088


Unnamed: 0_level_0,$\beta $,$t$,$$P>|t|$$,$SE$
$$r^2: 0.1739$$,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Intercept,82517.2276,30.2603,0.0,2726.9101
C(year)[T.1981],18790.286,4.6395,0.0,4050.065
nearinc,-18824.3705,-3.8612,0.0001,4875.3221
nearinc:C(year)[T.1981],-11863.9033,-1.5911,0.1126,7456.6462


## 13.3. Organizing Panel Data

$$y_{it} = \beta_0 + \beta_1x_{it1} + \beta_2x_{it2} + . . .  + \beta_kx_{itk} + \nu_{it}$$

$$t = 1, . . . , T$$ 
$$i = 1, . . . , n$$

## 13.4. First Differenced Estimator

$$y_{it} = \beta_0 + \beta_1x_{it1} + \beta_2x_{it2} + . . .  + \beta_kx_{itk} + a_i + u_{it}$$

$$\Delta y_{it} = y_{it} - y_{it-1}$$
$$\Delta y_{it} = \beta_1\Delta x_{it1} + . . . + \beta_k\Delta x_{itk} + \Delta u_{it}$$

In [4]:
crime2 = woo.dataWoo("crime2")
crime2["year"].value_counts()

82    46
87    46
Name: year, dtype: int64

In [5]:
crime2["y87"] = crime2["year"] == 87
crime2["id"] = sorted([i for i in range(crime2["year"].value_counts()[82])] +\
    [i for i in range(crime2["year"].value_counts()[87])])
crime2.sort_values("y87", inplace = True)
crime2.head()


Unnamed: 0,pop,crimes,unem,officers,pcinc,west,nrtheast,south,year,area,...,clcrmrte,lpolpc,clpolpc,cllawexp,cunem,clpopden,lcrmrt_1,ccrmrte,y87,id
0,229528.0,17136.0,8.2,326,8532,1,0,0,82,44.599998,...,,0.350872,,,,,,,False,0
28,127198.0,13122.0,7.0,290,8727,0,0,1,82,25.299999,...,,0.824136,,,,,,,False,14
74,425814.0,29913.0,10.2,1278,6845,0,1,0,82,55.400002,...,,1.099049,,,,,,,False,37
32,56168.0,6533.0,5.3,149,10204,0,0,1,82,20.799999,...,,0.975599,,,,,,,False,16
34,254253.0,19563.0,7.6,401,6966,0,0,1,82,55.900002,...,,0.455632,,,,,,,False,17


In [6]:
crime2.set_index(["id","year"], inplace = True)
diff_crime = crime2.groupby("id")[["crmrte", "unem"]].diff()
reg_sm = smf.ols(formula = "crmrte ~ unem",
                data = diff_crime)
results_sm = reg_sm.fit()
OLS_summary(results_sm)

Unnamed: 0_level_0,$\beta $,$t$,$$P>|t|$$,$SE$
$$r^2: 0.1267$$,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Intercept,15.4022,3.2756,0.0021,4.7021
unem,2.218,2.5266,0.0152,0.8779


In [7]:
reg_plm = plm.FirstDifferenceOLS.from_formula(formula = "crmrte ~ y87 + unem",
                                             data = crime2) 
results_plm = reg_plm.fit()
LM_summary(results_plm)

Unnamed: 0_level_0,$\beta $,$t$,$$P>|t|$$,$SE$
$$r^2: 0.1961$$,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
unem,2.218,2.5266,0.0152,0.8779
y87,15.4022,3.2756,0.0021,4.7021


### Example 13.9: County Crime Rates in North Carolina

In [8]:
crime4 = woo.dataWoo("crime4")
crime4.set_index(["county", "year"], inplace = True,  drop = False)
crime4

Unnamed: 0_level_0,Unnamed: 1_level_0,county,year,crmrte,prbarr,prbconv,prbpris,avgsen,polpc,density,taxpc,...,lpctymle,lpctmin,clcrmrte,clprbarr,clprbcon,clprbpri,clavgsen,clpolpc,cltaxpc,clmix
county,year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
1,81,1,81,0.039885,0.289696,0.402062,0.472222,5.61,0.001787,2.307159,25.697630,...,-2.433870,3.006608,,,,,,,,
1,82,1,82,0.038345,0.338111,0.433005,0.506993,5.59,0.001767,2.330254,24.874252,...,-2.449038,3.006608,-0.039376,0.154542,0.074143,0.071048,-0.003571,-0.011364,-0.032565,0.030857
1,83,1,83,0.030305,0.330449,0.525703,0.479705,5.80,0.001836,2.341801,26.451443,...,-2.464036,3.006608,-0.235316,-0.022922,0.193987,-0.055326,0.036879,0.038413,0.061477,-0.244732
1,84,1,84,0.034726,0.362525,0.604706,0.520104,6.89,0.001886,2.346420,26.842348,...,-2.478925,3.006608,0.136180,0.092641,0.140006,0.080857,0.172213,0.026930,0.014670,-0.027331
1,85,1,85,0.036573,0.325395,0.578723,0.497059,6.55,0.001924,2.364896,28.140337,...,-2.497306,3.006608,0.051825,-0.108054,-0.043918,-0.045320,-0.050606,0.020199,0.047223,0.172125
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
197,83,197,83,0.015575,0.226667,0.480392,0.428571,7.77,0.001073,0.869048,18.905853,...,-2.538060,1.697597,-0.148666,-0.010969,-0.127018,0.164303,0.157158,0.149330,0.070461,0.020250
197,84,197,84,0.013662,0.204188,1.410260,0.372727,10.11,0.001109,0.872024,22.704754,...,-2.548068,1.697597,-0.131037,-0.104441,1.076927,-0.139610,0.263255,0.032795,0.183103,0.026842
197,85,197,85,0.013086,0.180556,0.830769,0.333333,5.96,0.001054,0.875000,24.123611,...,-2.561072,1.697597,-0.043091,-0.123000,-0.529178,-0.111704,-0.528454,-0.050473,0.060617,-0.366374
197,86,197,86,0.012874,0.112676,2.250000,0.244444,7.68,0.001088,0.880952,24.981979,...,-2.580968,1.697597,-0.016311,-0.471524,0.996334,-0.310156,0.253549,0.031580,0.034964,-0.067911


In [9]:
formula = "np.log(crmrte) ~ year + d83 + d84 + d85 + d86 + d87 + lprbarr + lprbconv + lprbpris + lavgsen + lpolpc"
reg = plm.FirstDifferenceOLS.from_formula(formula = formula,
                                         data = crime4)
results = reg.fit()
LM_summary(results)

Unnamed: 0_level_0,$\beta $,$t$,$$P>|t|$$,$SE$
$$r^2: 0.4326$$,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
d83,-0.0999,-4.1793,0.0,0.0239
d84,-0.1478,-3.5806,0.0004,0.0413
d85,-0.1524,-2.6098,0.0093,0.0584
d86,-0.1249,-1.6433,0.1009,0.076
d87,-0.0841,-0.8944,0.3715,0.094
lavgsen,-0.0218,-0.985,0.3251,0.0221
lpolpc,0.3984,14.8213,0.0,0.0269
lprbarr,-0.3275,-10.9237,0.0,0.03
lprbconv,-0.2381,-13.0583,0.0,0.0182
lprbpris,-0.165,-6.3555,0.0,0.026
