# Ejemplo de Estimación por Datos Panel

Referencia: Vella and M. Verbeek (1998), “Whose Wages Do Unions Raise? A Dynamic Model of Unionism and Wage Rate Determination for Young Men,” Journal of Applied Econometrics 13, 163-183.

## 1. Dependencias

In [None]:
#!pip install linearmodels==4.24
#!pip install linearmodels==4.5
#!pip install linearmodels

In [None]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
from statsmodels.iolib.summary2 import summary_col
from linearmodels.panel.model import PooledOLS, PanelOLS
from linearmodels.panel import RandomEffects
from linearmodels.panel import compare # Para comparar modelos

#
import warnings
warnings.filterwarnings('ignore')

## 2. Importación de datos

In [None]:
# Importamos el Data Set
data_to_load = 'wage_panel.csv'

# Read the and 
wage_df = pd.read_csv(data_to_load)
wage_df.head()

### Los datos importados son:
* nr: person identifier
* year: 1980 to 1987
* black: =1 if black
* exper: labor market experience
* hisp: =1 if Hispanic
* hours: annual hours worked
* married: =1 if married
* educ: years of schooling
* union: =1 if in union
* lwage: log(wage)
* expersq: exper^2
* occupation: Occupation code

In [None]:
# Adecuaciones al índice para hacerlo Panel:
year = wage_df.year
wage_df = wage_df.set_index(['nr', 'year'])
wage_df#.head()

In [None]:
# Agregamos una columna adicional de año (opción 1):
wage_df['year'] = pd.Categorical( year )
wage_df.head()

In [None]:
[ wage_df , pd.get_dummies(wage_df['year']) ]

In [None]:
# Agregamos columnas de dummies de Year (opción 2):
wage_df = pd.concat( [ wage_df , pd.get_dummies(wage_df['year']) ], axis=1)

In [None]:
# Show data:
wage_df

## 3. Regresión Pooled

In [None]:
# Definición de variables exógeneas y endógena
X = [ 'black','hisp','exper','expersq','married', 'educ', 'union', 'year', 'hours' ]
X = sm.add_constant( wage_df[X] )
X

In [None]:
# Definición de variables exógeneas y endógena
X1 = [ 'black','hisp','exper','expersq','married', 'educ', 'union', 1981, 1982,
       1983, 1984, 1985, 1986, 1987, 'hours' ]
X1 = sm.add_constant( wage_df[X1] )
X1

In [None]:
#
Y = wage_df[ 'lwage' ]
Y

In [None]:
# Regresión
model_1 = PooledOLS(Y, X1)
pooled_res_1 = model_1.fit()
print(pooled_res_1)

In [None]:
# Regresión
model = PooledOLS(Y, X)
pooled_res = model.fit()
print(pooled_res)

## 4. Efectos aleatorios

In [None]:
# Regresión
model = RandomEffects(Y, X)
re_res = model.fit()
print(re_res)

In [None]:
# Descomposición de varianza
re_res.variance_decomposition

## 5. Efectos fijos

In [None]:
# Regresion
#(Estas líneas generan un error que es intencional..)
model = PanelOLS(Y, X, entity_effects = True)
fe_res = model.fit()
print(fe_res)

In [None]:
# Regresion con efectos fijos por entidad
# Omitimos: 'exper', 'black','hisp', 'educ'
X = [ 'expersq', 'union', 'married', 'year', 'hours' ]
X = sm.add_constant(wage_df[X])
model = PanelOLS(Y, X, entity_effects = True)
fe_res = model.fit()
print(fe_res)

In [None]:
# Regresion con efectos fijos por entidad y tiempo
# Omitimos: 'exper', 'black','hisp', 'educ', 'year'
X = ['expersq', 'union', 'married', 'hours']
X = sm.add_constant(wage_df[X])
model = PanelOLS(Y, X, entity_effects = True, time_effects = True)
fe_te_res = model.fit()
print(fe_te_res)

## 6. Comparación de modelos

In [None]:
#
print(compare( { 'Fix efect Ent.': fe_res, 
                 'Fix Efect Ent-Time': fe_te_res,
                 'Radom efects': re_res, 
                 'Pooled': pooled_res } ))

## 7. Varianza Robusta:

In [None]:
# Rregresión
X = ['expersq', 'union', 'married', 'year', 'hours']
X = sm.add_constant(wage_df[X])
model = PanelOLS(Y, X, entity_effects = True)
#fe_res = model.fit(cov_type = 'robust') 
# NOTAS: “unadjusted”, “homoskedastic” - Assume residual are homoskedastic, AND 
#       “robust”, “heteroskedastic” - Control for heteroskedasticity using White’s estimator
fe_res = model.fit(cov_type = "clustered", cluster_entity = True)
# NOTAS: clust_entity_time = mod.fit(cov_type='clustered', cluster_entity=True, cluster_time=True)

print(fe_res)