In [2]:
import pandas as pd
import math
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm
import statsmodels.formula.api as smf

from scipy import stats
from scipy.interpolate import interp1d
from scipy.optimize import fsolve
from scipy.optimize import minimize

In [3]:
data = pd.read_stata('chile.dta')
ldata = data.copy()

ldata[['routput', 'totlab', 'renerg','realmats',
       'rcapstock']] = np.log(ldata.copy()[['routput','totlab', 'renerg','realmats','rcapstock']])

ldata = ldata.dropna()

In [4]:
olsmodel = smf.ols('routput ~ totlab + renerg + realmats + rcapstock',ldata)
olsresult = olsmodel.fit()
olsresult.summary()

0,1,2,3
Dep. Variable:,routput,R-squared:,0.963
Model:,OLS,Adj. R-squared:,0.963
Method:,Least Squares,F-statistic:,135900.0
Date:,"Sun, 12 May 2019",Prob (F-statistic):,0.0
Time:,11:18:42,Log-Likelihood:,-4619.4
No. Observations:,20717,AIC:,9249.0
Df Residuals:,20712,BIC:,9289.0
Df Model:,4,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,1.3535,0.015,89.079,0.000,1.324,1.383
totlab,0.1643,0.004,42.376,0.000,0.157,0.172
renerg,0.1025,0.002,45.094,0.000,0.098,0.107
realmats,0.7352,0.003,290.483,0.000,0.730,0.740
rcapstock,0.0644,0.002,38.010,0.000,0.061,0.068

0,1,2,3
Omnibus:,10161.91,Durbin-Watson:,1.031
Prob(Omnibus):,0.0,Jarque-Bera (JB):,136680.009
Skew:,2.025,Prob(JB):,0.0
Kurtosis:,14.914,Cond. No.,124.0


In [5]:
def add_fe(df, dfvars,groups):
    #apply the within transformation
    for var in dfvars: #,
        df[var+'fe'] = df[var]
        for fe in groups:
            dum = pd.get_dummies(fedata[fe])
            means = np.tile(fedata[var],len(dum.columns)).reshape(len(dum.columns),fedata.shape[0])
            means = np.array(means.transpose()*dum)
            means = means.sum(axis=0)/dum.sum(axis=0)
            means = np.tile(means,fedata.shape[0]).reshape(fedata.shape[0],len(dum.columns))
            means = (means*dum).sum(axis=1)
            df[var+'fe'] = df[var+'fe'] - means
    return df

fedata = ldata.copy()
fedata = add_fe(fedata, ['routput','totlab', 'renerg','realmats','rcapstock'], ['id'])
print fedata['routputfe']

0        0.085300
1        0.241224
2       -0.326523
3        0.350528
4        0.392843
5       -0.563570
6       -0.043745
7       -0.147861
8       -0.230948
9       -0.310527
10       0.963412
11      -0.410137
12      -0.089180
13       0.168297
14       0.266208
15       0.216876
16       0.173726
17       0.076088
18       0.003244
19      -0.815268
20       0.000000
21       0.098078
22      -0.154069
23       0.089334
24       0.039676
25      -0.073016
26       1.021615
27       0.817558
28       0.714607
29      -0.190690
           ...   
20700    0.000000
20701    0.000000
20702    0.000000
20703    0.000000
20704    0.000000
20705    0.000000
20706    0.000000
20707    0.000000
20708    0.000000
20709    0.000000
20710    0.000000
20711    0.000000
20712    0.000000
20713    0.000000
20714    0.000000
20715    0.000000
20716    0.000000
20717    0.000000
20718    0.000000
20719    0.000000
20720    0.000000
20721    0.000000
20722    0.000000
20723    0.000000
20724    0

In [7]:
femodel = smf.ols('routputfe ~ totlabfe + renergfe + realmatsfe + rcapstockfe -1',fedata)
feresult = femodel.fit()
feresult.summary()

0,1,2,3
Dep. Variable:,routputfe,R-squared:,0.745
Model:,OLS,Adj. R-squared:,0.745
Method:,Least Squares,F-statistic:,15110.0
Date:,"Fri, 10 May 2019",Prob (F-statistic):,0.0
Time:,09:35:00,Log-Likelihood:,3385.1
No. Observations:,20717,AIC:,-6762.0
Df Residuals:,20713,BIC:,-6731.0
Df Model:,4,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
totlabfe,0.1904,0.005,40.301,0.000,0.181,0.200
renergfe,0.0626,0.002,27.477,0.000,0.058,0.067
realmatsfe,0.6073,0.003,179.527,0.000,0.601,0.614
rcapstockfe,0.0508,0.003,18.381,0.000,0.045,0.056

0,1,2,3
Omnibus:,4681.848,Durbin-Watson:,1.56
Prob(Omnibus):,0.0,Jarque-Bera (JB):,49300.045
Skew:,0.785,Prob(JB):,0.0
Kurtosis:,10.392,Cond. No.,2.58
