In [58]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import statsmodels.api as sm
from scipy.stats import norm

In [59]:
column_names = ['nr','year','black','exper','hisp','hours','married','occ1','occ2','occ3','occ4','occ5','occ6','occ7','occ8','occ9','educ','union','lwage','d81','d82','d83','d84','d85','d86', 'd87','expersq' ]
data = pd.read_csv('wagepan.csv',names= column_names, header= None)
data.head(5)

Unnamed: 0,nr,year,black,exper,hisp,hours,married,occ1,occ2,occ3,...,union,lwage,d81,d82,d83,d84,d85,d86,d87,expersq
0,13,1980,0,1,0,2672,0,0,0,0,...,0,1.19754,0,0,0,0,0,0,0,1
1,13,1981,0,2,0,2320,0,0,0,0,...,1,1.85306,1,0,0,0,0,0,0,4
2,13,1982,0,3,0,2940,0,0,0,0,...,0,1.344462,0,1,0,0,0,0,0,9
3,13,1983,0,4,0,2960,0,0,0,0,...,0,1.433213,0,0,1,0,0,0,0,16
4,13,1984,0,5,0,3071,0,0,0,0,...,0,1.568125,0,0,0,1,0,0,0,25


In [60]:
len(data['nr'].unique())

545

In [61]:
data[data['nr'] == 13]

Unnamed: 0,nr,year,black,exper,hisp,hours,married,occ1,occ2,occ3,...,union,lwage,d81,d82,d83,d84,d85,d86,d87,expersq
0,13,1980,0,1,0,2672,0,0,0,0,...,0,1.19754,0,0,0,0,0,0,0,1
1,13,1981,0,2,0,2320,0,0,0,0,...,1,1.85306,1,0,0,0,0,0,0,4
2,13,1982,0,3,0,2940,0,0,0,0,...,0,1.344462,0,1,0,0,0,0,0,9
3,13,1983,0,4,0,2960,0,0,0,0,...,0,1.433213,0,0,1,0,0,0,0,16
4,13,1984,0,5,0,3071,0,0,0,0,...,0,1.568125,0,0,0,1,0,0,0,25
5,13,1985,0,6,0,2864,0,0,1,0,...,0,1.699891,0,0,0,0,1,0,0,36
6,13,1986,0,7,0,2994,0,0,1,0,...,0,-0.720263,0,0,0,0,0,1,0,49
7,13,1987,0,8,0,2640,0,0,1,0,...,0,1.669188,0,0,0,0,0,0,1,64


The Panel regression:
$$ y_{i,t} = \beta_0 + \beta_1 educ_{i,t}+\beta_2 black_{i,t} + \beta_3 hispan_{i,t}+ \beta_4 exper_{i,t}+\beta_5 exper_{i,t}^2 + \beta_6 married_{i,t} +\beta_7 union_{i,t} + u_i + \epsilon_{i,t} $$

The inefficient OLS estimator will act as a starting point:
$$ Y = X \beta + v $$
Where v is the OLS residuals:
$$ v = [v_{1,1}, v_{1,2}, ..,v_{i,t}]'$$
$$ \beta = (X'X)^{-1}X'Y$$
$$ v_{i,t} = y_{i,t} - \hat{y_{i,t}} = y_{i,t} - \hat{\beta_0} - \hat{\beta_1} educ_{i,t} - \hat{\beta_2} black_{i,t} - \hat{\beta_3} hispan_{i,t} - \hat{\beta_4} exper_{i,t} -\hat{\beta_5} exper_{i,t}^2 - \hat{\beta_6} married_{i,t} - \hat{\beta_7} union_{i,t}$$

In [62]:
data['exper^2'] = data['exper'].pow(2)

In [63]:
y = data['lwage']
df = data[['educ','black','hisp','exper','exper^2','married','union']]
x = sm.add_constant(df)
model = sm.OLS(y, x).fit()
v = np.zeros(len(data)) # the residuals
for i in range(len(data)):
    v[i] = data['lwage'][i] - model.params[0] - model.params[1]*data['exper'][i] - model.params[2]*data['married'][i] - model.params[3]*data['union'][i]


In [64]:
data['vi'] = v
data.head(5)

Unnamed: 0,nr,year,black,exper,hisp,hours,married,occ1,occ2,occ3,...,d81,d82,d83,d84,d85,d86,d87,expersq,exper^2,vi
0,13,1980,0,1,0,2672,0,0,0,0,...,0,0,0,0,0,0,0,1,1,1.132858
1,13,1981,0,2,0,2320,0,0,0,0,...,1,0,0,0,0,0,0,4,4,1.673292
2,13,1982,0,3,0,2940,0,0,0,0,...,0,1,0,0,0,0,0,9,9,1.081004
3,13,1983,0,4,0,2960,0,0,0,0,...,0,0,1,0,0,0,0,16,16,1.070367
4,13,1984,0,5,0,3071,0,0,0,0,...,0,0,0,1,0,0,0,25,25,1.105892


Notice that: $$v_{i,t} = u_{i} + \epsilon_{i,t}$$
Then given $\hat{v_{i,t}}$ the estimates for $\hat{u_i}$ are as follows:
$$ \hat{u_i} = \sum_{t=1}^{T}{\hat{v_{i,t}}}$$
That is the errors are averaged for every individual to obtain the unobserved hetrogenity, and the variance of such component is:
$$\hat{\sigma^2_{u}} = \frac{\sum_{i=1}^{N}{\hat{u_{i}^2}}}{N-1}$$

In [65]:
u = np.zeros(len(data['nr'].unique()))
j = 0
for i in data['nr'].unique():
    u[j] = np.mean(data['vi'][data['nr'] == i])
    j+=1
sigma2_u = np.sum(u**2)/(len(u)-1)

Once the unobserved hetregenous component is estimated one can estimate the other error component:
$$ \hat{\epsilon_{i,t}} = \hat{v_{i,t}} - \hat{u_i}$$
The variance of this compnent is:
$$ \hat{\sigma^2_{\epsilon}} = \frac{\sum_{i=1}^{N}\sum_{t=1}^{T}{\epsilon_{i,t}}}{NT-(N-1)}$$

In [66]:
k = 0
eps = np.zeros(len(data))
for j in range(len(u)):
    for i in range(8):
        eps[i+k] = v[i+k] - u[j]
    k +=8
sigma2_eps = np.sum(eps**2)/(len(data) - (len(u)-1))


The Random effects estimator is just the GLS estimator with a special variance covariance matrix $\Omega$:
$$\Omega_{NT \times NT} = \begin{bmatrix} \Sigma_{T\times T} & 0 & ... & 0\\ 0 & \Sigma_{T\times T} &...&0 \\ ...\\ 0&0&...&\Sigma_{T\times T} \end{bmatrix}$$
The non-zero elemnts are formed by the variance-covariance of the T observations for an individual i:
$$ \Sigma_{T \times T} = \begin{bmatrix} \sigma^2_{\epsilon}+\sigma^2_{u} & \sigma^2_{u} & ... &\sigma^2_{u}\\\sigma^2_{u} & \sigma^2_{\epsilon}+\sigma^2_{u} & ... & \sigma^2_{u} \\ ... \\ \sigma^2_{u} & \sigma^2_{u} & ... & \sigma^2_{\epsilon}+\sigma^2_{u} \end{bmatrix} $$

In [67]:
F = np.zeros((8,8))
F[:,:] = sigma2_u
for i in range(8):
    for j in range(8):
        if i ==j:
            F[i,j] = sigma2_eps+sigma2_u
        else:
            F[i,j] = sigma2_u
Omega = np.zeros((len(u)*8,len(u)*8))
for i in range(len(u)):
    m = i*8
    n = (i+1)*8
    Omega[m:n,m:n] = F


The GLS estimator is:
$$\hat{\beta} = (X'\Omega X)^{-1}X'\Omega^{-1}Y$$
The variance of the estimator is:
$$ Var[\hat{\beta}|X] = \sigma^2(X'\Omega X)^{-1} $$

In [68]:
X = np.matrix([np.ones(len(data)), data['educ'],data['black'],data['hisp'],data['exper'],data['exper^2'],data['married'],data['union']])
X = np.transpose(X)

In [69]:
y = np.matrix(np.array(data['lwage']))
beta = np.linalg.inv(np.transpose(X)*np.linalg.inv(Omega)*X)*np.transpose(X)*np.linalg.inv(Omega)*np.transpose(y)

In [70]:
beta

matrix([[-0.11877554],
        [ 0.10176075],
        [-0.14434222],
        [ 0.02137512],
        [ 0.11637871],
        [-0.00427836],
        [ 0.04722172],
        [ 0.08479858]])

In [71]:
sigma2_v = np.sum(v**2)/(n-len(beta))
variance_beta = sigma2_v*np.linalg.inv(np.transpose(X)*np.linalg.inv(Omega)*X)
np.sqrt(np.diag(variance_beta))

array([0.44945865, 0.03695168, 0.19984821, 0.17955102, 0.0107936 ,
       0.0007757 , 0.02330968, 0.02458217])

The estimates along with their standard errors are:
$$ \hat{\beta}_0 = -0.118, \hat{\sigma}_{\beta_0} = 0.449$$
$$ \hat{\beta}_{educ} = 0.101, \hat{\sigma}_{educ} = 0.0369$$
$$ \hat{\beta}_{black} = -0.144, \hat{\sigma}_{black} = 0.199$$
$$ \hat{\beta}_{hispan} =  0.021, \hat{\sigma}_{hispan} = 0.179$$
$$ \hat{\beta}_{exper} = 0.116, \hat{\sigma}_{exper} = 0.010$$
$$ \hat{\beta}_{exper^2} = -0.004, \hat{\sigma}_{exper^2} = 0.000$$
$$ \hat{\beta}_{married} = 0.047, \hat{\sigma}_{married} = 0.023$$
$$ \hat{\beta}_{union} = 0.084, \hat{\sigma}_{union} = 0.024$$