in this notebook, FEF model in [Pesaran, M. Hashem; Zhou, Qiankun (2014) : Estimation of Time-invariant Effects in Static Panel Data Models] (https://papers.ssrn.com/sol3/papers.cfm?abstract_id=2493312) is applied to estimate the effect of time-invariant variables, altruism, in the panel data.

I wrote this in Stata first to ensure the correctness of applying statistic models, then writing this again in Python for the completeness of the project and to practice.


In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from auxiliary import *
import zipfile
from linearmodels.panel import PooledOLS, RandomEffects, BetweenOLS, FirstDifferenceOLS

import statsmodels.api as sm
import missingno as msno

pd.set_option('display.max_columns', 500)

sns.set_theme(style="ticks")

ModuleNotFoundError: No module named 'numpy'

In [None]:
with zipfile.ZipFile('data.zip', 'r') as zip_ref:
    zip_ref.extractall('')

### read in data
### set up dependent and independent variables
### examine missing value

In [5]:
long = pd.read_csv('data/result_long.csv')

#setting variables
#time invariants
x = ['altruism', 'posrecip', 'risktaking', 'patience', 'trust', 'negrecip', 'income_type', 'region']

#dummy vars
x_dummy = ['oecd', 'g20', 'oda_int', 'aid']

#year-varied vars 
x_year = ['demo_electoral', 'demo_gov', 'demo_participate', 'demo_culture', 'demo_liberty', 'govexpense',  'gdpcapita']

#year-varied gini index, contains large numbers of na
gni = ['gni']

#add constant
long_c = sm.add_constant(long)
long_y = long_c.assign(funding_capita= long_c['funding']/long_c['pop'])

long_replace = long_y.replace({'funding_capita': np.nan}, 0)
long_index = long_replace.set_index(['isocode', 'year'])
long_select_gni = long_index[['funding_capita'] + x + x_dummy + x_year + gni]
long_select_gni.info()

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 1368 entries, ('AFG', 2003) to ('ZWE', 2020)
Data columns (total 21 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   funding_capita    1368 non-null   float64
 1   altruism          1368 non-null   float64
 2   posrecip          1368 non-null   float64
 3   risktaking        1368 non-null   float64
 4   patience          1368 non-null   float64
 5   trust             1368 non-null   float64
 6   negrecip          1368 non-null   float64
 7   income_type       1368 non-null   object 
 8   region            1368 non-null   object 
 9   oecd              1368 non-null   int64  
 10  g20               1368 non-null   int64  
 11  oda_int           1368 non-null   int64  
 12  aid               1368 non-null   int64  
 13  demo_electoral    912 non-null    float64
 14  demo_gov          912 non-null    float64
 15  demo_participate  912 non-null    float64
 16  demo_culture      912

### heat map for examine missing value

In [8]:
sns.heatmap(long_select_gni.isnull(), cbar=False)



In [2]:
long_select = long_select_gni.drop('gni')

df = long_select.dropna()
df_gni = long_select_gni.dropna()

# df['year'] = pd.Categorical(df.index.to_frame()['year'])
# x += ['year']
df.head()


Unnamed: 0_level_0,Unnamed: 1_level_0,funding_gdp,posrecip,risktaking,patience,trust,negrecip,aid,gdpcapita,govexpense,pop,oda_int,const,income_type,region,demo,altruism,year
isocode,year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
AFG,2006,0.0,0.289641,0.120764,-0.20136,0.315964,0.254712,0,263.733692,18.303,26433049.0,0,1.0,Low income,South Asia,30.6,0.166455,2006
AFG,2008,0.2374,0.289641,0.120764,-0.20136,0.315964,0.254712,0,364.660745,20.918,27722276.0,0,1.0,Low income,South Asia,30.2,0.166455,2008
AFG,2010,0.015,0.289641,0.120764,-0.20136,0.315964,0.254712,0,543.303042,20.801,29185507.0,0,1.0,Low income,South Asia,24.8,0.166455,2010
AFG,2011,0.0,0.289641,0.120764,-0.20136,0.315964,0.254712,0,591.162759,21.937,30117413.0,0,1.0,Low income,South Asia,24.8,0.166455,2011
AFG,2012,0.0,0.289641,0.120764,-0.20136,0.315964,0.254712,0,641.871479,25.028,31161376.0,0,1.0,Low income,South Asia,24.8,0.166455,2012


# OLS

In [60]:
mod = PooledOLS(df.funding_gdp, df[x])
pooled_res = mod.fit(cov_type='clustered', cluster_entity=True, cluster_time=False)
pooled_res

0,1,2,3
Dep. Variable:,funding_gdp,R-squared:,0.3773
Estimator:,PooledOLS,R-squared (Between):,0.6241
No. Observations:,905,R-squared (Within):,0.0043
Date:,"Tue, Dec 08 2020",R-squared (Overall):,0.3773
Time:,23:40:58,Log-likelihood,2075.8
Cov. Estimator:,Clustered,,
,,F-statistic:,16.509
Entities:,76,P-value,0.0000
Avg Obs:,11.908,Distribution:,"F(32,872)"
Min Obs:,7.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
posrecip,-0.0020,0.0065,-0.3083,0.7580,-0.0149,0.0108
risktaking,-0.0009,0.0087,-0.0990,0.9211,-0.0178,0.0161
patience,0.0202,0.0171,1.1834,0.2370,-0.0133,0.0538
trust,0.0218,0.0088,2.4869,0.0131,0.0046,0.0390
negrecip,-0.0058,0.0067,-0.8754,0.3816,-0.0189,0.0073
aid,0.0027,0.0069,0.3904,0.6963,-0.0108,0.0162
gdpcapita,9.585e-07,3.432e-07,2.7926,0.0053,2.848e-07,1.632e-06
govexpense,-0.0001,0.0002,-0.6455,0.5188,-0.0004,0.0002
pop,-9.856e-12,8.949e-12,-1.1013,0.2711,-2.742e-11,7.709e-12


In [None]:
# FEF