# Panel data analysis with Python

In [None]:
import numpy as np
import pandas as pd

from linearmodels import PooledOLS          # Pooled model
from linearmodels import RandomEffects      # Random-effect model
from linearmodels import PanelOLS           # Fixed-effect model
from linearmodels import FirstDifferenceOLS # First difference model

from linearmodels.panel import compare      # Compare the results of multiple models
from statsmodels.api import add_constant    # for matrices of regression design

## Panel data preparation

Consider `Gasoline` dataset. We load it from a file as a DataFrame

In [None]:
df = pd.read_csv('./panels-plm/Gasoline.csv')
df.head()

To setup a panel data we need to set a multi-index over a DataFrame

We call `.set_index(['country', 'year'])` method (the order matters!)

In [None]:
panel_df = df.set_index(['country', 'year'])
panel_df.head()

## Models' fitting via formula

In [None]:
mod_pl = PooledOLS.from_formula(formula='lgaspcar~1+lincomep+lrpmg+lcarpcap', data=panel_df)
res_pl = mod_pl.fit()
res_pl

In [None]:
mod_re = RandomEffects.from_formula(formula='lgaspcar~1+lincomep+lrpmg+lcarpcap', data=panel_df)
res_re = mod_re.fit()
res_re

In [None]:
mod_fe = PanelOLS.from_formula(formula='lgaspcar~1+lincomep+lrpmg+lcarpcap+EntityEffects', data=panel_df)
res_fe = mod_fe.fit()
res_fe

In [None]:
mod_fd = FirstDifferenceOLS.from_formula(formula='lgaspcar~lincomep+lrpmg+lcarpcap', data=panel_df)
res_fd = mod_fd.fit()
res_fd

## Panel regressions with robust inferences

Let's fit basic models with Arellano-Bond covariance matrix estimator

we call `.fit(cov_type='clustered', cluster_entity=True)` method for all models

We collect fitted models into a table (t-stats in brackets)

In [None]:
mod_pl = PooledOLS.from_formula(formula='lgaspcar~1+lincomep+lrpmg+lcarpcap', data=panel_df)
mod_re = RandomEffects.from_formula(formula='lgaspcar~1+lincomep+lrpmg+lcarpcap', data=panel_df)
mod_fe = PanelOLS.from_formula(formula='lgaspcar~1+lincomep+lrpmg+lcarpcap+EntityEffects', data=panel_df)
mod_fd = FirstDifferenceOLS.from_formula(formula='lgaspcar~lincomep+lrpmg+lcarpcap', data=panel_df)

res_pl = mod_pl.fit(cov_type='clustered', cluster_entity=True)
res_re = mod_re.fit(cov_type='clustered', cluster_entity=True)
res_fe = mod_fe.fit(cov_type='clustered', cluster_entity=True)
res_fd = mod_fd.fit(cov_type='clustered', cluster_entity=True)

compare({'Pool': res_pl, 'RE': res_re, 'FE': res_fe, 'FD':res_fd}, stars=True)

Let's fit basic models with Driscoll-Kraay covariance matrix estimator

we call `.fit(cov_type='kernel')` method for all models

We collect fitted models into a table (standard errors in brackets)

In [None]:
mod_pl = PooledOLS.from_formula(formula='lgaspcar~1+lincomep+lrpmg+lcarpcap', data=panel_df)
mod_re = RandomEffects.from_formula(formula='lgaspcar~1+lincomep+lrpmg+lcarpcap', data=panel_df)
mod_fe = PanelOLS.from_formula(formula='lgaspcar~1+lincomep+lrpmg+lcarpcap+EntityEffects', data=panel_df)
mod_fd = FirstDifferenceOLS.from_formula(formula='lgaspcar~lincomep+lrpmg+lcarpcap', data=panel_df)

res_pl = mod_pl.fit(cov_type='kernel')
res_re = mod_re.fit(cov_type='kernel')
res_fe = mod_fe.fit(cov_type='kernel')
res_fd = mod_fd.fit(cov_type='kernel')

compare({'Pool': res_pl, 'RE': res_re, 'FE': res_fe, 'FD':res_fd}, stars=True, precision='std_errors')

## Models' fitting via matrices of regression design

__Remark__ we need to remove missing values for variables we include into the model

In [None]:
dependent = ['lgaspcar']
regressors = ['lincomep', 'lrpmg', 'lcarpcap']

y = panel_df[dependent+regressors].dropna()[dependent]
X = add_constant( panel_df[dependent+regressors].dropna()[regressors] )
# For FD-estimator we do not include intercept
X_fd = panel_df[dependent+regressors].dropna()[regressors]

In [None]:
mod_pl = PooledOLS(y, X)
mod_re = RandomEffects(y, X)
mod_fe = PanelOLS(y, X, entity_effects=True, drop_absorbed=True)
mod_fd = FirstDifferenceOLS(y, X_fd)

res_pl = mod_pl.fit()
res_re = mod_re.fit()
res_fe = mod_fe.fit()
res_fd = mod_fd.fit()

In [None]:
compare({'Pool': res_pl, 'RE': res_re, 'FE': res_fe, 'FD':res_fd}, stars=True, precision='std_errors')