in this notebook, I combined the data that is needed for our research project, using 76 countries in GPS as the key because this is our main interest in the project, all the data has been filtered to at least after 2003, for the reason that democratic index only starts from 2006.
notice:
- democratic index range from 2006 to 2019, 2007 & 2009 doesn't exist
- Bosnia Herzegovina have no record in humanitarian aids data.
- 54 countries do not have ODA record
- 63 countries do not have aid received record
 

### data output 
| Variable     |      Content                                                   |
|:-------------|:------------------------------------------------------------- :|
| country| name of countries from Global Preference Survey(GPS) data            |
| isocode | isocode of the countries                             |
|demo2019 ~ demo2006| democratic index by year|
|altruism| altruism score from GPS | 
|gdp2003 ~ gdp2019| gdp in current US dollar by year|
|funding2003 ~ funding2019| humanitarian aid fundinn in US dollar by year       |
|pledge2003 ~ pledge2019|pledging humanitarian aid in US dollar by year        |
|funding2003_gdp ~ funding2019_gdp| funding / gdp the year * 100       |
|gdpcapita2003 ~ gdpcapita2019| gdp per capita in current US dollar by year  |
|region| region group from WDI data|
|income_type| income group from WDI data|
|aid_boolean| True if the country ever receive net official aid between 2003 ~ 2019|
|pop2003 ~ pop2019| population data by year |
|oda2003 ~ oda2019|Net Official development assistance in current US dollar |
|govexpense2003 ~ govexpense2019| General government total expenditure as % of GDP  |
|oda_int| 1 if the country ever receive net official aid between 2003 ~ 2019|
|oecd| dummy variable indicates whether the country belongs to OECD at 2020 |
|g20| dummy variable indicates whether the country belongs to G20 at 2019 |
|gni2003 ~ gni2020| Gini Index data from world bank  |






In [1]:
import pandas as pd
import glob
from auxiliary import *
import zipfile

pd.set_option('display.max_columns', 500)
pd.set_option('display.max_rows', 500)



In [3]:

with zipfile.ZipFile('data.zip', 'r') as zip_ref:
    zip_ref.extractall('')

### read in  democracy index from The Economist's Democracy Index

In [2]:
democracy = pd.read_excel('data/EIU_Democracy_Index_2006_to_2019.xls').rename(columns={"Unnamed: 0": "country"})

In [3]:
country_dict = {'US': 'United States',  'Bosnia and Hercegovina' : 'Bosnia Herzegovina', 'UK' : 'United Kingdom', 'UAE': 'United Arab Emirates' }
democracy = democracy.replace({"country": country_dict})
democracy = democracy.set_index('country').add_prefix('demo').reset_index()
democracy.head()

Unnamed: 0,country,demo2019,demo2018,demo2017,demo2016,demo2015,demo2014,demo2013,demo2012,demo2011,demo2010,demo2008,demo2006
0,Canada,9.22,9.15,9.15,9.15,9.08,9.08,9.08,9.08,9.08,9.08,9.07,9.07
1,United States,7.96,7.96,7.98,7.98,8.05,8.11,8.11,8.11,8.11,8.18,8.22,8.22
2,Austria,8.29,8.29,8.42,8.41,8.54,8.54,8.48,8.62,8.49,8.49,8.49,8.69
3,Belgium,7.64,7.78,7.78,7.77,7.93,7.93,8.05,8.05,8.05,8.05,8.16,8.15
4,Cyprus,7.59,7.59,7.59,7.65,7.53,7.4,7.29,7.29,7.29,7.29,7.7,7.6


### read in Global Preference Survey (GPS) data and merge with  democracy index 

In [4]:
gps = pd.read_stata('data/country.dta')
gps.shape

(76, 8)

In [5]:
gps.head()

Unnamed: 0,country,isocode,patience,risktaking,posrecip,negrecip,altruism,trust
0,Afghanistan,AFG,-0.20136,0.120764,0.289641,0.254712,0.166455,0.315964
1,Algeria,DZA,0.059815,0.39153,-0.598255,0.254901,-0.009968,-0.177371
2,Argentina,ARG,-0.229308,0.041503,0.159679,-0.140457,0.027347,-0.206733
3,Australia,AUS,0.657004,0.137137,0.06966,0.02219,0.155331,0.298973
4,Austria,AUT,0.608285,-0.061829,0.161047,-0.055415,-0.035363,0.154628


In [6]:
gps_democracy = pd.merge(gps, democracy, how = 'left', on = 'country')
gps_democracy.shape

(76, 20)

In [7]:
# gps_democracy[gps_democracy.isna().any(axis=1)]
gps_democracy.head()

Unnamed: 0,country,isocode,patience,risktaking,posrecip,negrecip,altruism,trust,demo2019,demo2018,demo2017,demo2016,demo2015,demo2014,demo2013,demo2012,demo2011,demo2010,demo2008,demo2006
0,Afghanistan,AFG,-0.20136,0.120764,0.289641,0.254712,0.166455,0.315964,2.85,2.97,2.55,2.55,2.77,2.77,2.48,2.48,2.48,2.48,3.02,3.06
1,Algeria,DZA,0.059815,0.39153,-0.598255,0.254901,-0.009968,-0.177371,4.01,3.5,3.56,3.56,3.95,3.83,3.83,3.83,3.44,3.44,3.32,3.17
2,Argentina,ARG,-0.229308,0.041503,0.159679,-0.140457,0.027347,-0.206733,7.02,7.02,6.96,6.96,7.02,6.84,6.84,6.84,6.84,6.84,6.63,6.63
3,Australia,AUS,0.657004,0.137137,0.06966,0.02219,0.155331,0.298973,9.09,9.09,9.09,9.01,9.01,9.01,9.13,9.22,9.22,9.22,9.09,9.09
4,Austria,AUT,0.608285,-0.061829,0.161047,-0.055415,-0.035363,0.154628,8.29,8.29,8.42,8.41,8.54,8.54,8.48,8.62,8.49,8.49,8.49,8.69


### read in UN Humanitarian Affairs Financial Tracking Service data by year and rename columns for merging
### notice Bosnia does not get matched

In [8]:
file_name = 'data/OCHA_FTS_Government_Donations_20'
extension = '.xlsx'
all_filenames = [i for i in glob.glob(f'{file_name}*{extension}')]

In [9]:
for i in range(len(all_filenames)):
    new = pd.read_excel(all_filenames[i], sheet_name='Export data', skiprows=2).rename(columns={"Source org.": "country", "Funding US$": "funding", "Pledges US$" : "pledge"})
    if i == 0:
        df = new
    elif i < 7:
        df = pd.merge(df, new, how='outer', on="country", suffixes=('', f'200{i+3}'))       
    else:
        df = pd.merge(df, new, how='outer', on="country", suffixes=('', f'20{i+3}'))

donate = df.rename(columns={"pledge": "pledge2003", "funding": "funding2003"}).replace({0: np.nan})
donate['country'] = donate['country'].str.rstrip(' Government of')
donate['country'] = donate['country'].str.rstrip(',')


In [10]:
#checking 2006 & 2020 manually 
country_dict = {'United States of America': 'United States',  'Saudi Arabia (Kingdom of)' : 'Saudi Arabia', 'Russian Federation' : 'Russia', 'Korea, Republic of': 'South Korea', 'Viet Nam' : 'Vietnam'}
donate = donate.replace({"country": country_dict})

In [11]:
gps_demo_donate = pd.merge(gps_democracy, donate,  how = 'left', on = 'country')
gps_demo_donate.shape

(76, 56)

In [12]:
gps_demo_donate.head()

Unnamed: 0,country,isocode,patience,risktaking,posrecip,negrecip,altruism,trust,demo2019,demo2018,demo2017,demo2016,demo2015,demo2014,demo2013,demo2012,demo2011,demo2010,demo2008,demo2006,funding2003,pledge2003,funding2004,pledge2004,funding2005,pledge2005,funding2006,pledge2006,funding2007,pledge2007,funding2008,pledge2008,funding2009,pledge2009,funding2010,pledge2010,funding2011,pledge2011,funding2012,pledge2012,funding2013,pledge2013,funding2014,pledge2014,funding2015,pledge2015,funding2016,pledge2016,funding2017,pledge2017,funding2018,pledge2018,funding2019,pledge2019,funding2020,pledge2020
0,Afghanistan,AFG,-0.20136,0.120764,0.289641,0.254712,0.166455,0.315964,2.85,2.97,2.55,2.55,2.77,2.77,2.48,2.48,2.48,2.48,3.02,3.06,,,,,500000.0,,,,,,24000000.0,,136493.0,,2379910.0,,3360.0,,3000.0,,2000.0,,,,1000.0,,,,,,3870542.0,,903177.0,,,
1,Algeria,DZA,0.059815,0.39153,-0.598255,0.254901,-0.009968,-0.177371,4.01,3.5,3.56,3.56,3.95,3.83,3.83,3.83,3.44,3.44,3.32,3.17,3178000.0,,,,14769749.0,,,,10000.0,,10000.0,,76432.0,,10110696.0,,16010000.0,7000000.0,,,2000000.0,,10000.0,,,,660000.0,,10000.0,,10000.0,,,,,
2,Argentina,ARG,-0.229308,0.041503,0.159679,-0.140457,0.027347,-0.206733,7.02,7.02,6.96,6.96,7.02,6.84,6.84,6.84,6.84,6.84,6.63,6.63,,,,,,,,,,,139000.0,,31100.0,,50000.0,,107332.0,,130826.0,,165718.0,,230500.0,,,,176272.0,,1344591.0,,265000.0,,155000.0,,,
3,Australia,AUS,0.657004,0.137137,0.06966,0.02219,0.155331,0.298973,9.09,9.09,9.09,9.01,9.01,9.01,9.13,9.22,9.22,9.22,9.09,9.09,87736725.0,,69076907.0,,85073844.0,29859474.0,89757369.0,2262517.0,109371739.0,3817867.0,143674846.0,39494954.0,196311166.0,8733624.0,293856560.0,1923773.0,312711049.0,,271004092.0,2809573.0,263194483.0,,269253261.0,,166236181.0,762236.0,205207886.0,2724017.0,243900706.0,2712522.0,248119692.0,,310639840.0,,271692972.0,
4,Austria,AUT,0.608285,-0.061829,0.161047,-0.055415,-0.035363,0.154628,8.29,8.29,8.42,8.41,8.54,8.54,8.48,8.62,8.49,8.49,8.49,8.69,3245126.0,,6186042.0,,18423937.0,,5624017.0,765306.0,7885069.0,,15280397.0,1017509.0,16939495.0,348675.0,25973202.0,,18074363.0,500409.0,16333100.0,,17699567.0,,18504048.0,,18786627.0,,35663228.0,,49109105.0,,26096928.0,6670328.0,32476153.0,1540154.0,19142494.0,


In [13]:
gps_demo_donate[gps_demo_donate.filter(regex='(funding|pledge)20*', axis=1).isna().all(axis=1)]

Unnamed: 0,country,isocode,patience,risktaking,posrecip,negrecip,altruism,trust,demo2019,demo2018,demo2017,demo2016,demo2015,demo2014,demo2013,demo2012,demo2011,demo2010,demo2008,demo2006,funding2003,pledge2003,funding2004,pledge2004,funding2005,pledge2005,funding2006,pledge2006,funding2007,pledge2007,funding2008,pledge2008,funding2009,pledge2009,funding2010,pledge2010,funding2011,pledge2011,funding2012,pledge2012,funding2013,pledge2013,funding2014,pledge2014,funding2015,pledge2015,funding2016,pledge2016,funding2017,pledge2017,funding2018,pledge2018,funding2019,pledge2019,funding2020,pledge2020
7,Bosnia Herzegovina,BIH,-0.247258,-0.125639,0.096463,0.429355,-0.11531,-0.193106,4.86,4.98,4.87,4.87,4.83,4.78,5.02,5.11,5.24,5.32,5.7,5.78,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


### read in GDP in current US dollar data from WDI and merge with previous data frame

In [14]:
gdp = pd.read_excel('data/GDP_by_country_by_year.xls', sheet_name='Data', skiprows=3).rename(columns={"Country Code": "isocode"}).set_index('isocode')
gdp = gdp.iloc[:,-18 : -1]
gdp = gdp.add_prefix('gdp')
gdp.reset_index()
gdp.head()


Unnamed: 0_level_0,gdp2003,gdp2004,gdp2005,gdp2006,gdp2007,gdp2008,gdp2009,gdp2010,gdp2011,gdp2012,gdp2013,gdp2014,gdp2015,gdp2016,gdp2017,gdp2018,gdp2019
isocode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
ABW,2021229000.0,2228492000.0,2330726000.0,2424581000.0,2615084000.0,2745251000.0,2498883000.0,2390503000.0,2549721000.0,2534637000.0,2701676000.0,2765363000.0,2919553000.0,2965922000.0,3056425000.0,,
AFG,4515559000.0,5226779000.0,6209138000.0,6971286000.0,9747880000.0,10109230000.0,12439090000.0,15856570000.0,17804290000.0,20001600000.0,20561070000.0,20484890000.0,19907110000.0,19362640000.0,20191760000.0,19484380000.0,19101350000.0
AGO,17812710000.0,23552050000.0,36970920000.0,52381010000.0,65266450000.0,88538610000.0,70307160000.0,83799500000.0,111789700000.0,128052900000.0,136709900000.0,145712200000.0,116193600000.0,101123900000.0,122123800000.0,101353200000.0,94635420000.0
ALB,5611492000.0,7184681000.0,8052076000.0,8896074000.0,10677320000.0,12881350000.0,12044220000.0,11926930000.0,12890770000.0,12319830000.0,12776220000.0,13228140000.0,11386850000.0,11861200000.0,13019690000.0,15147020000.0,15278080000.0
AND,2361727000.0,2894922000.0,3159905000.0,3456442000.0,3952601000.0,4085631000.0,3674410000.0,3449967000.0,3629204000.0,3188809000.0,3193704000.0,3271808000.0,2789870000.0,2896679000.0,3000181000.0,3218316000.0,3154058000.0


In [15]:
gps_demo_donate_gdp = pd.merge(gps_demo_donate, gdp, how='left', on="isocode")
gps_demo_donate_gdp.shape

(76, 73)

In [16]:
# gps_demo_donate_gdp[gps_demo_donate_gdp.filter(regex='gdp.*', axis=1).isna().all(axis=1)]
gps_demo_donate_gdp.head()

Unnamed: 0,country,isocode,patience,risktaking,posrecip,negrecip,altruism,trust,demo2019,demo2018,demo2017,demo2016,demo2015,demo2014,demo2013,demo2012,demo2011,demo2010,demo2008,demo2006,funding2003,pledge2003,funding2004,pledge2004,funding2005,pledge2005,funding2006,pledge2006,funding2007,pledge2007,funding2008,pledge2008,funding2009,pledge2009,funding2010,pledge2010,funding2011,pledge2011,funding2012,pledge2012,funding2013,pledge2013,funding2014,pledge2014,funding2015,pledge2015,funding2016,pledge2016,funding2017,pledge2017,funding2018,pledge2018,funding2019,pledge2019,funding2020,pledge2020,gdp2003,gdp2004,gdp2005,gdp2006,gdp2007,gdp2008,gdp2009,gdp2010,gdp2011,gdp2012,gdp2013,gdp2014,gdp2015,gdp2016,gdp2017,gdp2018,gdp2019
0,Afghanistan,AFG,-0.20136,0.120764,0.289641,0.254712,0.166455,0.315964,2.85,2.97,2.55,2.55,2.77,2.77,2.48,2.48,2.48,2.48,3.02,3.06,,,,,500000.0,,,,,,24000000.0,,136493.0,,2379910.0,,3360.0,,3000.0,,2000.0,,,,1000.0,,,,,,3870542.0,,903177.0,,,,4515559000.0,5226779000.0,6209138000.0,6971286000.0,9747880000.0,10109230000.0,12439090000.0,15856570000.0,17804290000.0,20001600000.0,20561070000.0,20484890000.0,19907110000.0,19362640000.0,20191760000.0,19484380000.0,19101350000.0
1,Algeria,DZA,0.059815,0.39153,-0.598255,0.254901,-0.009968,-0.177371,4.01,3.5,3.56,3.56,3.95,3.83,3.83,3.83,3.44,3.44,3.32,3.17,3178000.0,,,,14769749.0,,,,10000.0,,10000.0,,76432.0,,10110696.0,,16010000.0,7000000.0,,,2000000.0,,10000.0,,,,660000.0,,10000.0,,10000.0,,,,,,67864850000.0,85324770000.0,103198400000.0,117025400000.0,134977300000.0,171000600000.0,137210700000.0,161207800000.0,200019500000.0,209060300000.0,209754800000.0,213808800000.0,165979400000.0,160033800000.0,167389400000.0,173758000000.0,169988200000.0
2,Argentina,ARG,-0.229308,0.041503,0.159679,-0.140457,0.027347,-0.206733,7.02,7.02,6.96,6.96,7.02,6.84,6.84,6.84,6.84,6.84,6.63,6.63,,,,,,,,,,,139000.0,,31100.0,,50000.0,,107332.0,,130826.0,,165718.0,,230500.0,,,,176272.0,,1344591.0,,265000.0,,155000.0,,,,127587000000.0,164657900000.0,198737100000.0,232557300000.0,287530500000.0,361558000000.0,332976500000.0,423627400000.0,530163300000.0,545982400000.0,552025100000.0,526319700000.0,594749300000.0,557531400000.0,642695900000.0,519871500000.0,449663400000.0
3,Australia,AUS,0.657004,0.137137,0.06966,0.02219,0.155331,0.298973,9.09,9.09,9.09,9.01,9.01,9.01,9.13,9.22,9.22,9.22,9.09,9.09,87736725.0,,69076907.0,,85073844.0,29859474.0,89757369.0,2262517.0,109371739.0,3817867.0,143674846.0,39494954.0,196311166.0,8733624.0,293856560.0,1923773.0,312711049.0,,271004092.0,2809573.0,263194483.0,,269253261.0,,166236181.0,762236.0,205207886.0,2724017.0,243900706.0,2712522.0,248119692.0,,310639840.0,,271692972.0,,466488100000.0,612490400000.0,693407800000.0,746054200000.0,853099600000.0,1053996000000.0,927805200000.0,1146138000000.0,1396650000000.0,1546152000000.0,1576184000000.0,1467484000000.0,1351694000000.0,1208847000000.0,1330136000000.0,1433904000000.0,1392681000000.0
4,Austria,AUT,0.608285,-0.061829,0.161047,-0.055415,-0.035363,0.154628,8.29,8.29,8.42,8.41,8.54,8.54,8.48,8.62,8.49,8.49,8.49,8.69,3245126.0,,6186042.0,,18423937.0,,5624017.0,765306.0,7885069.0,,15280397.0,1017509.0,16939495.0,348675.0,25973202.0,,18074363.0,500409.0,16333100.0,,17699567.0,,18504048.0,,18786627.0,,35663228.0,,49109105.0,,26096928.0,6670328.0,32476153.0,1540154.0,19142494.0,,261695800000.0,300904200000.0,315974400000.0,335998600000.0,388691400000.0,430294300000.0,400172300000.0,391892700000.0,431120300000.0,409425200000.0,430068700000.0,441996100000.0,381817600000.0,395227700000.0,418316200000.0,455508300000.0,446314700000.0


### adding columns of funding / GDP * 100 by year

In [17]:
df_funding = convert_percent('funding','gdp', gps_demo_donate_gdp, ratio=100)
df_funding.head()

Unnamed: 0,country,isocode,patience,risktaking,posrecip,negrecip,altruism,trust,demo2019,demo2018,demo2017,demo2016,demo2015,demo2014,demo2013,demo2012,demo2011,demo2010,demo2008,demo2006,funding2003,pledge2003,funding2004,pledge2004,funding2005,pledge2005,funding2006,pledge2006,funding2007,pledge2007,funding2008,pledge2008,funding2009,pledge2009,funding2010,pledge2010,funding2011,pledge2011,funding2012,pledge2012,funding2013,pledge2013,funding2014,pledge2014,funding2015,pledge2015,funding2016,pledge2016,funding2017,pledge2017,funding2018,pledge2018,funding2019,pledge2019,funding2020,pledge2020,gdp2003,gdp2004,gdp2005,gdp2006,gdp2007,gdp2008,gdp2009,gdp2010,gdp2011,gdp2012,gdp2013,gdp2014,gdp2015,gdp2016,gdp2017,gdp2018,gdp2019,funding_gdp2003,funding_gdp2004,funding_gdp2005,funding_gdp2006,funding_gdp2007,funding_gdp2008,funding_gdp2009,funding_gdp2010,funding_gdp2011,funding_gdp2012,funding_gdp2013,funding_gdp2014,funding_gdp2015,funding_gdp2016,funding_gdp2017,funding_gdp2018,funding_gdp2019
0,Afghanistan,AFG,-0.20136,0.120764,0.289641,0.254712,0.166455,0.315964,2.85,2.97,2.55,2.55,2.77,2.77,2.48,2.48,2.48,2.48,3.02,3.06,,,,,500000.0,,,,,,24000000.0,,136493.0,,2379910.0,,3360.0,,3000.0,,2000.0,,,,1000.0,,,,,,3870542.0,,903177.0,,,,4515559000.0,5226779000.0,6209138000.0,6971286000.0,9747880000.0,10109230000.0,12439090000.0,15856570000.0,17804290000.0,20001600000.0,20561070000.0,20484890000.0,19907110000.0,19362640000.0,20191760000.0,19484380000.0,19101350000.0,,,0.0081,,,0.2374,0.0011,0.015,0.0,0.0,0.0,,0.0,,,0.0199,0.0047
1,Algeria,DZA,0.059815,0.39153,-0.598255,0.254901,-0.009968,-0.177371,4.01,3.5,3.56,3.56,3.95,3.83,3.83,3.83,3.44,3.44,3.32,3.17,3178000.0,,,,14769749.0,,,,10000.0,,10000.0,,76432.0,,10110696.0,,16010000.0,7000000.0,,,2000000.0,,10000.0,,,,660000.0,,10000.0,,10000.0,,,,,,67864850000.0,85324770000.0,103198400000.0,117025400000.0,134977300000.0,171000600000.0,137210700000.0,161207800000.0,200019500000.0,209060300000.0,209754800000.0,213808800000.0,165979400000.0,160033800000.0,167389400000.0,173758000000.0,169988200000.0,0.0047,,0.0143,,0.0,0.0,0.0001,0.0063,0.008,,0.001,0.0,,0.0004,0.0,0.0,
2,Argentina,ARG,-0.229308,0.041503,0.159679,-0.140457,0.027347,-0.206733,7.02,7.02,6.96,6.96,7.02,6.84,6.84,6.84,6.84,6.84,6.63,6.63,,,,,,,,,,,139000.0,,31100.0,,50000.0,,107332.0,,130826.0,,165718.0,,230500.0,,,,176272.0,,1344591.0,,265000.0,,155000.0,,,,127587000000.0,164657900000.0,198737100000.0,232557300000.0,287530500000.0,361558000000.0,332976500000.0,423627400000.0,530163300000.0,545982400000.0,552025100000.0,526319700000.0,594749300000.0,557531400000.0,642695900000.0,519871500000.0,449663400000.0,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,0.0002,0.0001,0.0
3,Australia,AUS,0.657004,0.137137,0.06966,0.02219,0.155331,0.298973,9.09,9.09,9.09,9.01,9.01,9.01,9.13,9.22,9.22,9.22,9.09,9.09,87736725.0,,69076907.0,,85073844.0,29859474.0,89757369.0,2262517.0,109371739.0,3817867.0,143674846.0,39494954.0,196311166.0,8733624.0,293856560.0,1923773.0,312711049.0,,271004092.0,2809573.0,263194483.0,,269253261.0,,166236181.0,762236.0,205207886.0,2724017.0,243900706.0,2712522.0,248119692.0,,310639840.0,,271692972.0,,466488100000.0,612490400000.0,693407800000.0,746054200000.0,853099600000.0,1053996000000.0,927805200000.0,1146138000000.0,1396650000000.0,1546152000000.0,1576184000000.0,1467484000000.0,1351694000000.0,1208847000000.0,1330136000000.0,1433904000000.0,1392681000000.0,0.0188,0.0113,0.0123,0.012,0.0128,0.0136,0.0212,0.0256,0.0224,0.0175,0.0167,0.0183,0.0123,0.017,0.0183,0.0173,0.0223
4,Austria,AUT,0.608285,-0.061829,0.161047,-0.055415,-0.035363,0.154628,8.29,8.29,8.42,8.41,8.54,8.54,8.48,8.62,8.49,8.49,8.49,8.69,3245126.0,,6186042.0,,18423937.0,,5624017.0,765306.0,7885069.0,,15280397.0,1017509.0,16939495.0,348675.0,25973202.0,,18074363.0,500409.0,16333100.0,,17699567.0,,18504048.0,,18786627.0,,35663228.0,,49109105.0,,26096928.0,6670328.0,32476153.0,1540154.0,19142494.0,,261695800000.0,300904200000.0,315974400000.0,335998600000.0,388691400000.0,430294300000.0,400172300000.0,391892700000.0,431120300000.0,409425200000.0,430068700000.0,441996100000.0,381817600000.0,395227700000.0,418316200000.0,455508300000.0,446314700000.0,0.0012,0.0021,0.0058,0.0017,0.002,0.0036,0.0042,0.0066,0.0042,0.004,0.0041,0.0042,0.0049,0.009,0.0117,0.0057,0.0073


### adding GDP per capita in current US dollar from WDI

<!---adding General government total expenditure as % of GDP from IMF --->

In [18]:
capita = pd.read_excel('data/gdppercapita.xls', sheet_name='Data', skiprows=3).rename(columns={"Country Code": "isocode"}).set_index('isocode')
capita_filter = capita.filter(regex='^20(10|11|12|.*[3456789]$)', axis=1).add_prefix('gdpcapita')

capita_filter.head()



Unnamed: 0_level_0,gdpcapita2003,gdpcapita2004,gdpcapita2005,gdpcapita2006,gdpcapita2007,gdpcapita2008,gdpcapita2009,gdpcapita2010,gdpcapita2011,gdpcapita2012,gdpcapita2013,gdpcapita2014,gdpcapita2015,gdpcapita2016,gdpcapita2017,gdpcapita2018,gdpcapita2019
isocode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
ABW,20833.761612,22569.974985,23300.039558,24045.272483,25835.132668,27084.70369,24630.453714,23512.602596,24985.993281,24713.698045,26189.435509,26647.938101,27980.880695,28281.350482,29007.693003,,
AFG,190.683814,211.382117,242.031285,263.733692,359.693238,364.660745,438.076034,543.303042,591.162759,641.871479,637.165523,613.856689,578.466353,547.22811,556.302002,524.162881,502.115487
AGO,982.960899,1255.564045,1902.422346,2599.566464,3121.995637,4080.94141,3122.780766,3587.883798,4615.468028,5100.095808,5254.882338,5408.410496,4166.979684,3506.072885,4095.812942,3289.646664,2973.59116
ALB,1846.118813,2373.579844,2673.787283,2972.743265,3595.037163,4370.540127,4114.140114,4094.350334,4437.142885,4247.629984,4413.060861,4578.631994,3952.801215,4124.055726,4531.020806,5284.380184,5352.857411
AND,32271.963902,37969.174991,40066.256919,42675.812757,47803.693608,48718.496869,43503.185516,40852.666777,43335.328862,38686.461264,39538.766722,41303.929371,35762.523074,37474.665406,38962.880354,41793.055258,40886.391165


In [19]:
df_add_capita = pd.merge(df_funding, capita_filter, how='left', on="isocode")
# df_add_capita.shape
# df_add_capita[df_add_capita.filter(regex='gdpcapita.*', axis=1).isna().all(axis=1)]





### adding government expense as % of GDP from imf


In [20]:
govexpense = pd.read_csv('data/govexpense.csv').set_index('isocode')
df_add_govexpense = pd.merge(df_add_capita, govexpense, how='left', on="isocode")
df_add_govexpense.shape
# df_add_govexpense[df_add_govexpense.filter(regex='govexpense.*', axis=1).isna().all(axis=1)]

(76, 124)

### adding region and income group data from WDI

In [21]:
region = pd.read_csv('data/WDICountry.csv').rename(columns={"Country Code": "isocode"}).set_index('isocode')
region_rename = region.rename({'Region': 'region', 'Income Group': 'income_type'}, axis=1)
region_filter = region_rename.filter(items=['region', 'income_type'], axis=1)
region_filter.head()


Unnamed: 0_level_0,region,income_type
isocode,Unnamed: 1_level_1,Unnamed: 2_level_1
ABW,Latin America & Caribbean,High income
AFG,South Asia,Low income
AGO,Sub-Saharan Africa,Lower middle income
ALB,Europe & Central Asia,Upper middle income
AND,Europe & Central Asia,High income


In [22]:
df_add_region = pd.merge(df_add_govexpense, region_filter, how='left', on="isocode")
# df_add_region.shape
# df_add_region[df_add_region.filter(items=['region', 'income_type'], axis=1).isna().any(axis=1)]

### adding population data by year from world bank

In [23]:
pop = pd.read_excel('data/Worldbank_Population_Data.xls', sheet_name='Data', skiprows=3).rename(columns={"Country Code": "isocode"}).set_index('isocode')
pop_filter = pop.filter(regex='^20(10|11|12|.*[3456789]$)', axis=1).add_prefix('pop')
pop_filter.head()

Unnamed: 0_level_0,pop2003,pop2004,pop2005,pop2006,pop2007,pop2008,pop2009,pop2010,pop2011,pop2012,pop2013,pop2014,pop2015,pop2016,pop2017,pop2018,pop2019
isocode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
ABW,97017.0,98737.0,100031.0,100834.0,101222.0,101358.0,101455.0,101669.0,102046.0,102560.0,103159.0,103774.0,104341.0,104872.0,105366.0,105845.0,106314.0
AFG,23680871.0,24726684.0,25654277.0,26433049.0,27100536.0,27722276.0,28394813.0,29185507.0,30117413.0,31161376.0,32269589.0,33370794.0,34413603.0,35383128.0,36296400.0,37172386.0,38041754.0
AGO,18121479.0,18758145.0,19433602.0,20149901.0,20905363.0,21695634.0,22514281.0,23356246.0,24220661.0,25107931.0,26015780.0,26941779.0,27884381.0,28842484.0,29816748.0,30809762.0,31825295.0
ALB,3039616.0,3026939.0,3011487.0,2992547.0,2970017.0,2947314.0,2927519.0,2913021.0,2905195.0,2900401.0,2895092.0,2889104.0,2880703.0,2876101.0,2873457.0,2866376.0,2854191.0
AND,73182.0,76244.0,78867.0,80993.0,82684.0,83862.0,84463.0,84449.0,83747.0,82427.0,80774.0,79213.0,78011.0,77297.0,77001.0,77006.0,77142.0


In [24]:
df_add_pop = pd.merge(df_add_region, pop_filter, how='left', on="isocode")
# df_add_pop.shape
# df_add_pop[df_add_pop.filter(regex='pop.*', axis=1).isna().all(axis=1)]

### adding Net Official development assistance (ODA) data in current US dollar from WDI
### notice 54/76 countries have no ODA record from 2003 ~ 2019

In [25]:
oda = pd.read_excel('data/oda.xls', sheet_name='Data', skiprows=3).rename(columns={"Country Code": "isocode"}).set_index('isocode')
oda_filter = oda.filter(regex='^20(10|11|12|.*[3456789]$)', axis=1).add_prefix('oda')
# oda_replace = oda.replace({0: np.nan})
oda_filter.head()

Unnamed: 0_level_0,oda2003,oda2004,oda2005,oda2006,oda2007,oda2008,oda2009,oda2010,oda2011,oda2012,oda2013,oda2014,oda2015,oda2016,oda2017,oda2018,oda2019
isocode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
ABW,,,,,,,,,,,,,,,,,
AFG,,,,,,,,,,,,,,,,,
AGO,,,,,,,,,,,,,,,,,
ALB,,,,,,,,,,,,,,,,,
AND,,,,,,,,,,,,,,,,,


In [26]:
oda_filter['oda_int'] = oda_filter.mean(axis=1).notna().astype(int)

In [27]:
df_add_oda = pd.merge(df_add_pop, oda_filter, how='left', on="isocode")
# df_add_oda.shape
df_add_oda[df_add_oda.filter(regex='oda.*', axis=1).isna().all(axis=1)].shape

(0, 161)

In [28]:
#oda as % of GNI 
# odagni = pd.read_excel('data/oda gni.xls', sheet_name='Data', skiprows=3).rename(columns={"Country Code": "isocode"}).set_index('isocode')
# odagni_filter = odagni.filter(regex='^20(10|11|12|.*[3456789]$)', axis=1).add_prefix('odagni')
# odagni_filter.head()
# df_add_odagni = pd.merge(df_add_oda, odagni_filter, how='left', on="isocode")
# df_add_odagni.shape
# df_add_odagni[df_add_odagni.filter(regex='odagni.*', axis=1).isna().all(axis=1)].shape

### adding net official aid recieve or not in current US dollar 
### our countries of interest only appear in 2003, 2004, 2008, 2009, 2011
### notice 63/76 countries does not have any record

In [29]:

aid = pd.read_excel('data/offial aid received.xls', sheet_name='Data', skiprows=3).rename(columns={"Country Code": "isocode"}).set_index('isocode')
aid_filter = aid.filter(regex='^20(10|11|12|.*[3456789]$)', axis=1).add_prefix('aid')
aid_filter.head()

Unnamed: 0_level_0,aid2003,aid2004,aid2005,aid2006,aid2007,aid2008,aid2009,aid2010,aid2011,aid2012,aid2013,aid2014,aid2015,aid2016,aid2017,aid2018,aid2019
isocode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
ABW,76220000.0,-11320000.0,,,,,,,,,,,,,,,
AFG,,,,,,,,,,,,,,,,,
AGO,,,,,,,,,,,,,,,,,
ALB,,,,,,,,,,,,,,,,,
AND,,,,,,,,,,,,,,,,,


In [30]:
aid_int = aid_filter.mean(axis=1).notna().astype(int)
df_add_aid = pd.merge(df_add_oda, aid_int.rename('aid'), how='left', on="isocode")
df_add_aid.shape


(76, 162)

In [31]:
### adding columns of GDP / population * 100 by year and compare with GDP per capita to ensure corectness
# df_capita = convert_percent('gdp','pop', df_add_aid)
# df_capita[['gdp2003_pop', 'gdpcapita2003', 'gdp2004_pop', 'gdpcapita2004']]

In [32]:
df_add_aid.head()

Unnamed: 0,country,isocode,patience,risktaking,posrecip,negrecip,altruism,trust,demo2019,demo2018,demo2017,demo2016,demo2015,demo2014,demo2013,demo2012,demo2011,demo2010,demo2008,demo2006,funding2003,pledge2003,funding2004,pledge2004,funding2005,pledge2005,funding2006,pledge2006,funding2007,pledge2007,funding2008,pledge2008,funding2009,pledge2009,funding2010,pledge2010,funding2011,pledge2011,funding2012,pledge2012,funding2013,pledge2013,funding2014,pledge2014,funding2015,pledge2015,funding2016,pledge2016,funding2017,pledge2017,funding2018,pledge2018,funding2019,pledge2019,funding2020,pledge2020,gdp2003,gdp2004,gdp2005,gdp2006,gdp2007,gdp2008,gdp2009,gdp2010,gdp2011,gdp2012,gdp2013,gdp2014,gdp2015,gdp2016,gdp2017,gdp2018,gdp2019,funding_gdp2003,funding_gdp2004,funding_gdp2005,funding_gdp2006,funding_gdp2007,funding_gdp2008,funding_gdp2009,funding_gdp2010,funding_gdp2011,funding_gdp2012,funding_gdp2013,funding_gdp2014,funding_gdp2015,funding_gdp2016,funding_gdp2017,funding_gdp2018,funding_gdp2019,gdpcapita2003,gdpcapita2004,gdpcapita2005,gdpcapita2006,gdpcapita2007,gdpcapita2008,gdpcapita2009,gdpcapita2010,gdpcapita2011,gdpcapita2012,gdpcapita2013,gdpcapita2014,gdpcapita2015,gdpcapita2016,gdpcapita2017,gdpcapita2018,gdpcapita2019,govexpense2003,govexpense2004,govexpense2005,govexpense2006,govexpense2007,govexpense2008,govexpense2009,govexpense2010,govexpense2011,govexpense2012,govexpense2013,govexpense2014,govexpense2015,govexpense2016,govexpense2017,govexpense2018,govexpense2019,region,income_type,pop2003,pop2004,pop2005,pop2006,pop2007,pop2008,pop2009,pop2010,pop2011,pop2012,pop2013,pop2014,pop2015,pop2016,pop2017,pop2018,pop2019,oda2003,oda2004,oda2005,oda2006,oda2007,oda2008,oda2009,oda2010,oda2011,oda2012,oda2013,oda2014,oda2015,oda2016,oda2017,oda2018,oda2019,oda_int,aid
0,Afghanistan,AFG,-0.20136,0.120764,0.289641,0.254712,0.166455,0.315964,2.85,2.97,2.55,2.55,2.77,2.77,2.48,2.48,2.48,2.48,3.02,3.06,,,,,500000.0,,,,,,24000000.0,,136493.0,,2379910.0,,3360.0,,3000.0,,2000.0,,,,1000.0,,,,,,3870542.0,,903177.0,,,,4515559000.0,5226779000.0,6209138000.0,6971286000.0,9747880000.0,10109230000.0,12439090000.0,15856570000.0,17804290000.0,20001600000.0,20561070000.0,20484890000.0,19907110000.0,19362640000.0,20191760000.0,19484380000.0,19101350000.0,,,0.0081,,,0.2374,0.0011,0.015,0.0,0.0,0.0,,0.0,,,0.0199,0.0047,190.683814,211.382117,242.031285,263.733692,359.693238,364.660745,438.076034,543.303042,591.162759,641.871479,637.165523,613.856689,578.466353,547.22811,556.302002,524.162881,502.115487,11.927,15.069,15.698,18.303,21.478,20.918,21.17,20.801,21.937,25.028,24.977,25.402,25.947,28.029,27.732,28.936,27.999,South Asia,Low income,23680871.0,24726684.0,25654277.0,26433049.0,27100536.0,27722276.0,28394813.0,29185507.0,30117413.0,31161376.0,32269589.0,33370794.0,34413603.0,35383128.0,36296400.0,37172386.0,38041754.0,,,,,,,,,,,,,,,,,,0,0
1,Algeria,DZA,0.059815,0.39153,-0.598255,0.254901,-0.009968,-0.177371,4.01,3.5,3.56,3.56,3.95,3.83,3.83,3.83,3.44,3.44,3.32,3.17,3178000.0,,,,14769749.0,,,,10000.0,,10000.0,,76432.0,,10110696.0,,16010000.0,7000000.0,,,2000000.0,,10000.0,,,,660000.0,,10000.0,,10000.0,,,,,,67864850000.0,85324770000.0,103198400000.0,117025400000.0,134977300000.0,171000600000.0,137210700000.0,161207800000.0,200019500000.0,209060300000.0,209754800000.0,213808800000.0,165979400000.0,160033800000.0,167389400000.0,173758000000.0,169988200000.0,0.0047,,0.0143,,0.0,0.0,0.0001,0.0063,0.008,,0.001,0.0,,0.0004,0.0,0.0,,2103.413058,2609.945608,3113.101094,3478.654871,3950.519369,4923.629265,3883.261271,4480.799271,5455.853529,5592.257099,5499.581487,5493.02559,4177.892515,3946.443977,4044.2766,4114.715061,3948.343279,32.202,30.765,27.108,28.853,33.236,37.95,42.6,37.251,40.123,43.543,36.186,40.606,45.811,41.665,39.205,38.166,38.241,Middle East & North Africa,Lower middle income,32264157.0,32692163.0,33149724.0,33641002.0,34166972.0,34730608.0,35333881.0,35977455.0,36661444.0,37383887.0,38140132.0,38923687.0,39728025.0,40551404.0,41389198.0,42228429.0,43053054.0,,,,,,,,,,,,,,,,,,0,0
2,Argentina,ARG,-0.229308,0.041503,0.159679,-0.140457,0.027347,-0.206733,7.02,7.02,6.96,6.96,7.02,6.84,6.84,6.84,6.84,6.84,6.63,6.63,,,,,,,,,,,139000.0,,31100.0,,50000.0,,107332.0,,130826.0,,165718.0,,230500.0,,,,176272.0,,1344591.0,,265000.0,,155000.0,,,,127587000000.0,164657900000.0,198737100000.0,232557300000.0,287530500000.0,361558000000.0,332976500000.0,423627400000.0,530163300000.0,545982400000.0,552025100000.0,526319700000.0,594749300000.0,557531400000.0,642695900000.0,519871500000.0,449663400000.0,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,0.0002,0.0001,0.0,3349.806124,4277.721351,5109.851325,5919.012037,7245.448317,9020.873098,8225.137176,10385.964432,12848.864197,13082.664326,13080.254732,12334.798245,13789.060425,12790.242473,14591.863381,11683.949622,10006.148974,22.026,23.03,24.426,26.617,29.551,30.762,34.524,33.358,34.902,36.821,37.603,38.853,41.366,41.522,41.121,39.49,38.327,Latin America & Caribbean,Upper middle income,38087868.0,38491972.0,38892931.0,39289878.0,39684295.0,40080160.0,40482788.0,40788453.0,41261490.0,41733271.0,42202935.0,42669500.0,43131966.0,43590368.0,44044811.0,44494502.0,44938712.0,,,,,,,,,,,,,,,,,,0,0
3,Australia,AUS,0.657004,0.137137,0.06966,0.02219,0.155331,0.298973,9.09,9.09,9.09,9.01,9.01,9.01,9.13,9.22,9.22,9.22,9.09,9.09,87736725.0,,69076907.0,,85073844.0,29859474.0,89757369.0,2262517.0,109371739.0,3817867.0,143674846.0,39494954.0,196311166.0,8733624.0,293856560.0,1923773.0,312711049.0,,271004092.0,2809573.0,263194483.0,,269253261.0,,166236181.0,762236.0,205207886.0,2724017.0,243900706.0,2712522.0,248119692.0,,310639840.0,,271692972.0,,466488100000.0,612490400000.0,693407800000.0,746054200000.0,853099600000.0,1053996000000.0,927805200000.0,1146138000000.0,1396650000000.0,1546152000000.0,1576184000000.0,1467484000000.0,1351694000000.0,1208847000000.0,1330136000000.0,1433904000000.0,1392681000000.0,0.0188,0.0113,0.0123,0.012,0.0128,0.0136,0.0212,0.0256,0.0224,0.0175,0.0167,0.0183,0.0123,0.017,0.0183,0.0173,0.0223,23447.031001,30430.676437,33999.242858,36044.922811,40960.054495,49601.656708,42772.359166,52022.125596,62517.833747,68012.147901,68150.107041,62510.791171,56755.721712,49971.131456,54066.471269,57395.919466,54907.10191,35.012,34.943,34.691,34.633,34.384,35.113,37.825,37.033,36.352,36.577,36.494,36.832,37.336,37.282,36.741,36.876,38.451,East Asia & Pacific,High income,19895400.0,20127400.0,20394800.0,20697900.0,20827600.0,21249200.0,21691700.0,22031750.0,22340024.0,22733465.0,23128129.0,23475686.0,23815995.0,24190907.0,24601860.0,24982688.0,25364307.0,1218600000.0,1460130000.0,1680160000.0,2123220000.0,2668520000.0,2954070000.0,2761610000.0,3826100000.0,4982910000.0,5402700000.0,4845550000.0,4382420000.0,3493610000.0,3277520000.0,,,,1,0
4,Austria,AUT,0.608285,-0.061829,0.161047,-0.055415,-0.035363,0.154628,8.29,8.29,8.42,8.41,8.54,8.54,8.48,8.62,8.49,8.49,8.49,8.69,3245126.0,,6186042.0,,18423937.0,,5624017.0,765306.0,7885069.0,,15280397.0,1017509.0,16939495.0,348675.0,25973202.0,,18074363.0,500409.0,16333100.0,,17699567.0,,18504048.0,,18786627.0,,35663228.0,,49109105.0,,26096928.0,6670328.0,32476153.0,1540154.0,19142494.0,,261695800000.0,300904200000.0,315974400000.0,335998600000.0,388691400000.0,430294300000.0,400172300000.0,391892700000.0,431120300000.0,409425200000.0,430068700000.0,441996100000.0,381817600000.0,395227700000.0,418316200000.0,455508300000.0,446314700000.0,0.0012,0.0021,0.0058,0.0017,0.002,0.0036,0.0042,0.0066,0.0042,0.004,0.0041,0.0042,0.0049,0.009,0.0117,0.0057,0.0073,32222.897241,36821.521468,38403.133877,40635.281816,46855.771745,51708.765754,47963.179402,46858.043273,51374.958407,48567.695286,50716.708706,51717.495941,44178.047378,45237.805092,47549.07904,51525.046434,50277.275087,49.926,53.704,51.154,50.378,49.242,49.871,54.141,52.835,50.892,51.213,51.648,52.325,51.027,50.068,48.931,48.65,48.21,Europe & Central Asia,High income,8121423.0,8171966.0,8227829.0,8268641.0,8295487.0,8321496.0,8343323.0,8363404.0,8391643.0,8429991.0,8479823.0,8546356.0,8642699.0,8736668.0,8797566.0,8840521.0,8877067.0,504780000.0,677630000.0,1573320000.0,1498430000.0,1808460000.0,1713510000.0,1141780000.0,1208420000.0,1111370000.0,1105750000.0,1171490000.0,1234520000.0,1323510000.0,1635480000.0,,,,1,0


### add in dummy variables indicates whether in OECD or G20 in 2019

In [33]:
OECD = ['Austria', 'Belgium', 'Czech Republic', 'Denmark', 'Estonia', 'Finland', 'France', 'Germany', 'Greece', 'Hungary', 'Iceland', 'Ireland', 'Italy', 'Latvia', 'Lithuania', 'Luxembourg', 'Netherlands', 'Norway', 'Poland', 'Portugal', 'Slovak Republic', 'Slovenia', 'Spain', 'Sweden', 'Switzerland', 'United Kingdom']
EU = [ 'Austria', 'Belgium', 'Bulgaria', 'Croatia', 'Czech Republic', 'Denmark', 'Estonia', 'Finland', 'France', 'Germany', 'Greece', 'Hungary', 'Ireland', 'Italy', 'Latvia', 'Lithuania', 'Luxembourg', 'Malta', 'Netherlands', 'Poland', 'Portugal', 'Romania', 'Slovakia', 'Slovenia', 'Spain', 'Sweden']
G20_else = ['Argentina', 'Australia', 'Brazil', 'Canada', 'China', 'France', 'Germany', 'India', 'Indonesia', 'Italy', 'Japan', 'Mexico', 'Russia', 'Saudi Arabia', 'South Africa', 'South Korea', 'Turkey', 'United Kingdom', 'United States']
G20 = G20_else + EU


In [34]:
df_add_oecd = df_add_aid.assign(oecd = df_add_aid[['country']].isin(OECD).astype('int'))
df_add_g20 = df_add_oecd.assign(g20 = df_add_aid[['country']].isin(G20).astype('int'))
df_add_g20.head()

Unnamed: 0,country,isocode,patience,risktaking,posrecip,negrecip,altruism,trust,demo2019,demo2018,demo2017,demo2016,demo2015,demo2014,demo2013,demo2012,demo2011,demo2010,demo2008,demo2006,funding2003,pledge2003,funding2004,pledge2004,funding2005,pledge2005,funding2006,pledge2006,funding2007,pledge2007,funding2008,pledge2008,funding2009,pledge2009,funding2010,pledge2010,funding2011,pledge2011,funding2012,pledge2012,funding2013,pledge2013,funding2014,pledge2014,funding2015,pledge2015,funding2016,pledge2016,funding2017,pledge2017,funding2018,pledge2018,funding2019,pledge2019,funding2020,pledge2020,gdp2003,gdp2004,gdp2005,gdp2006,gdp2007,gdp2008,gdp2009,gdp2010,gdp2011,gdp2012,gdp2013,gdp2014,gdp2015,gdp2016,gdp2017,gdp2018,gdp2019,funding_gdp2003,funding_gdp2004,funding_gdp2005,funding_gdp2006,funding_gdp2007,funding_gdp2008,funding_gdp2009,funding_gdp2010,funding_gdp2011,funding_gdp2012,funding_gdp2013,funding_gdp2014,funding_gdp2015,funding_gdp2016,funding_gdp2017,funding_gdp2018,funding_gdp2019,gdpcapita2003,gdpcapita2004,gdpcapita2005,gdpcapita2006,gdpcapita2007,gdpcapita2008,gdpcapita2009,gdpcapita2010,gdpcapita2011,gdpcapita2012,gdpcapita2013,gdpcapita2014,gdpcapita2015,gdpcapita2016,gdpcapita2017,gdpcapita2018,gdpcapita2019,govexpense2003,govexpense2004,govexpense2005,govexpense2006,govexpense2007,govexpense2008,govexpense2009,govexpense2010,govexpense2011,govexpense2012,govexpense2013,govexpense2014,govexpense2015,govexpense2016,govexpense2017,govexpense2018,govexpense2019,region,income_type,pop2003,pop2004,pop2005,pop2006,pop2007,pop2008,pop2009,pop2010,pop2011,pop2012,pop2013,pop2014,pop2015,pop2016,pop2017,pop2018,pop2019,oda2003,oda2004,oda2005,oda2006,oda2007,oda2008,oda2009,oda2010,oda2011,oda2012,oda2013,oda2014,oda2015,oda2016,oda2017,oda2018,oda2019,oda_int,aid,oecd,g20
0,Afghanistan,AFG,-0.20136,0.120764,0.289641,0.254712,0.166455,0.315964,2.85,2.97,2.55,2.55,2.77,2.77,2.48,2.48,2.48,2.48,3.02,3.06,,,,,500000.0,,,,,,24000000.0,,136493.0,,2379910.0,,3360.0,,3000.0,,2000.0,,,,1000.0,,,,,,3870542.0,,903177.0,,,,4515559000.0,5226779000.0,6209138000.0,6971286000.0,9747880000.0,10109230000.0,12439090000.0,15856570000.0,17804290000.0,20001600000.0,20561070000.0,20484890000.0,19907110000.0,19362640000.0,20191760000.0,19484380000.0,19101350000.0,,,0.0081,,,0.2374,0.0011,0.015,0.0,0.0,0.0,,0.0,,,0.0199,0.0047,190.683814,211.382117,242.031285,263.733692,359.693238,364.660745,438.076034,543.303042,591.162759,641.871479,637.165523,613.856689,578.466353,547.22811,556.302002,524.162881,502.115487,11.927,15.069,15.698,18.303,21.478,20.918,21.17,20.801,21.937,25.028,24.977,25.402,25.947,28.029,27.732,28.936,27.999,South Asia,Low income,23680871.0,24726684.0,25654277.0,26433049.0,27100536.0,27722276.0,28394813.0,29185507.0,30117413.0,31161376.0,32269589.0,33370794.0,34413603.0,35383128.0,36296400.0,37172386.0,38041754.0,,,,,,,,,,,,,,,,,,0,0,0,0
1,Algeria,DZA,0.059815,0.39153,-0.598255,0.254901,-0.009968,-0.177371,4.01,3.5,3.56,3.56,3.95,3.83,3.83,3.83,3.44,3.44,3.32,3.17,3178000.0,,,,14769749.0,,,,10000.0,,10000.0,,76432.0,,10110696.0,,16010000.0,7000000.0,,,2000000.0,,10000.0,,,,660000.0,,10000.0,,10000.0,,,,,,67864850000.0,85324770000.0,103198400000.0,117025400000.0,134977300000.0,171000600000.0,137210700000.0,161207800000.0,200019500000.0,209060300000.0,209754800000.0,213808800000.0,165979400000.0,160033800000.0,167389400000.0,173758000000.0,169988200000.0,0.0047,,0.0143,,0.0,0.0,0.0001,0.0063,0.008,,0.001,0.0,,0.0004,0.0,0.0,,2103.413058,2609.945608,3113.101094,3478.654871,3950.519369,4923.629265,3883.261271,4480.799271,5455.853529,5592.257099,5499.581487,5493.02559,4177.892515,3946.443977,4044.2766,4114.715061,3948.343279,32.202,30.765,27.108,28.853,33.236,37.95,42.6,37.251,40.123,43.543,36.186,40.606,45.811,41.665,39.205,38.166,38.241,Middle East & North Africa,Lower middle income,32264157.0,32692163.0,33149724.0,33641002.0,34166972.0,34730608.0,35333881.0,35977455.0,36661444.0,37383887.0,38140132.0,38923687.0,39728025.0,40551404.0,41389198.0,42228429.0,43053054.0,,,,,,,,,,,,,,,,,,0,0,0,0
2,Argentina,ARG,-0.229308,0.041503,0.159679,-0.140457,0.027347,-0.206733,7.02,7.02,6.96,6.96,7.02,6.84,6.84,6.84,6.84,6.84,6.63,6.63,,,,,,,,,,,139000.0,,31100.0,,50000.0,,107332.0,,130826.0,,165718.0,,230500.0,,,,176272.0,,1344591.0,,265000.0,,155000.0,,,,127587000000.0,164657900000.0,198737100000.0,232557300000.0,287530500000.0,361558000000.0,332976500000.0,423627400000.0,530163300000.0,545982400000.0,552025100000.0,526319700000.0,594749300000.0,557531400000.0,642695900000.0,519871500000.0,449663400000.0,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,0.0002,0.0001,0.0,3349.806124,4277.721351,5109.851325,5919.012037,7245.448317,9020.873098,8225.137176,10385.964432,12848.864197,13082.664326,13080.254732,12334.798245,13789.060425,12790.242473,14591.863381,11683.949622,10006.148974,22.026,23.03,24.426,26.617,29.551,30.762,34.524,33.358,34.902,36.821,37.603,38.853,41.366,41.522,41.121,39.49,38.327,Latin America & Caribbean,Upper middle income,38087868.0,38491972.0,38892931.0,39289878.0,39684295.0,40080160.0,40482788.0,40788453.0,41261490.0,41733271.0,42202935.0,42669500.0,43131966.0,43590368.0,44044811.0,44494502.0,44938712.0,,,,,,,,,,,,,,,,,,0,0,0,1
3,Australia,AUS,0.657004,0.137137,0.06966,0.02219,0.155331,0.298973,9.09,9.09,9.09,9.01,9.01,9.01,9.13,9.22,9.22,9.22,9.09,9.09,87736725.0,,69076907.0,,85073844.0,29859474.0,89757369.0,2262517.0,109371739.0,3817867.0,143674846.0,39494954.0,196311166.0,8733624.0,293856560.0,1923773.0,312711049.0,,271004092.0,2809573.0,263194483.0,,269253261.0,,166236181.0,762236.0,205207886.0,2724017.0,243900706.0,2712522.0,248119692.0,,310639840.0,,271692972.0,,466488100000.0,612490400000.0,693407800000.0,746054200000.0,853099600000.0,1053996000000.0,927805200000.0,1146138000000.0,1396650000000.0,1546152000000.0,1576184000000.0,1467484000000.0,1351694000000.0,1208847000000.0,1330136000000.0,1433904000000.0,1392681000000.0,0.0188,0.0113,0.0123,0.012,0.0128,0.0136,0.0212,0.0256,0.0224,0.0175,0.0167,0.0183,0.0123,0.017,0.0183,0.0173,0.0223,23447.031001,30430.676437,33999.242858,36044.922811,40960.054495,49601.656708,42772.359166,52022.125596,62517.833747,68012.147901,68150.107041,62510.791171,56755.721712,49971.131456,54066.471269,57395.919466,54907.10191,35.012,34.943,34.691,34.633,34.384,35.113,37.825,37.033,36.352,36.577,36.494,36.832,37.336,37.282,36.741,36.876,38.451,East Asia & Pacific,High income,19895400.0,20127400.0,20394800.0,20697900.0,20827600.0,21249200.0,21691700.0,22031750.0,22340024.0,22733465.0,23128129.0,23475686.0,23815995.0,24190907.0,24601860.0,24982688.0,25364307.0,1218600000.0,1460130000.0,1680160000.0,2123220000.0,2668520000.0,2954070000.0,2761610000.0,3826100000.0,4982910000.0,5402700000.0,4845550000.0,4382420000.0,3493610000.0,3277520000.0,,,,1,0,0,1
4,Austria,AUT,0.608285,-0.061829,0.161047,-0.055415,-0.035363,0.154628,8.29,8.29,8.42,8.41,8.54,8.54,8.48,8.62,8.49,8.49,8.49,8.69,3245126.0,,6186042.0,,18423937.0,,5624017.0,765306.0,7885069.0,,15280397.0,1017509.0,16939495.0,348675.0,25973202.0,,18074363.0,500409.0,16333100.0,,17699567.0,,18504048.0,,18786627.0,,35663228.0,,49109105.0,,26096928.0,6670328.0,32476153.0,1540154.0,19142494.0,,261695800000.0,300904200000.0,315974400000.0,335998600000.0,388691400000.0,430294300000.0,400172300000.0,391892700000.0,431120300000.0,409425200000.0,430068700000.0,441996100000.0,381817600000.0,395227700000.0,418316200000.0,455508300000.0,446314700000.0,0.0012,0.0021,0.0058,0.0017,0.002,0.0036,0.0042,0.0066,0.0042,0.004,0.0041,0.0042,0.0049,0.009,0.0117,0.0057,0.0073,32222.897241,36821.521468,38403.133877,40635.281816,46855.771745,51708.765754,47963.179402,46858.043273,51374.958407,48567.695286,50716.708706,51717.495941,44178.047378,45237.805092,47549.07904,51525.046434,50277.275087,49.926,53.704,51.154,50.378,49.242,49.871,54.141,52.835,50.892,51.213,51.648,52.325,51.027,50.068,48.931,48.65,48.21,Europe & Central Asia,High income,8121423.0,8171966.0,8227829.0,8268641.0,8295487.0,8321496.0,8343323.0,8363404.0,8391643.0,8429991.0,8479823.0,8546356.0,8642699.0,8736668.0,8797566.0,8840521.0,8877067.0,504780000.0,677630000.0,1573320000.0,1498430000.0,1808460000.0,1713510000.0,1141780000.0,1208420000.0,1111370000.0,1105750000.0,1171490000.0,1234520000.0,1323510000.0,1635480000.0,,,,1,0,1,1


### add Gini Index by year from World Bank
### 2019's data wasn't avalaible 
### notice 4/76 country have no observations at all

In [35]:

gni = pd.read_excel('data/GNI.xls', sheet_name='Data', skiprows=3).rename(columns={"Country Code": "isocode"}).set_index('isocode')
gni_filter = gni.filter(regex='^20(10|11|12|.*[3456789]$)', axis=1).add_prefix('gni')
gni_filter.head()

Unnamed: 0_level_0,gni2003,gni2004,gni2005,gni2006,gni2007,gni2008,gni2009,gni2010,gni2011,gni2012,gni2013,gni2014,gni2015,gni2016,gni2017,gni2018,gni2019
isocode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
ABW,,,,,,,,,,,,,,,,,
AFG,,,,,,,,,,,,,,,,,
AGO,,,,,,42.7,,,,,,,,,,51.3,
ALB,,,30.6,,,30.0,,,,29.0,,34.6,32.9,33.7,33.2,,
AND,,,,,,,,,,,,,,,,,


In [37]:
df_add_gini = pd.merge(df_add_g20, gni_filter, how='left', on="isocode")
# df_add_gini.shape
df_add_gini[df_add_gini.filter(regex='gni.*', axis=1).isna().all(axis=1)]

Unnamed: 0,country,isocode,patience,risktaking,posrecip,negrecip,altruism,trust,demo2019,demo2018,demo2017,demo2016,demo2015,demo2014,demo2013,demo2012,demo2011,demo2010,demo2008,demo2006,funding2003,pledge2003,funding2004,pledge2004,funding2005,pledge2005,funding2006,pledge2006,funding2007,pledge2007,funding2008,pledge2008,funding2009,pledge2009,funding2010,pledge2010,funding2011,pledge2011,funding2012,pledge2012,funding2013,pledge2013,funding2014,pledge2014,funding2015,pledge2015,funding2016,pledge2016,funding2017,pledge2017,funding2018,pledge2018,funding2019,pledge2019,funding2020,pledge2020,gdp2003,gdp2004,gdp2005,gdp2006,gdp2007,gdp2008,gdp2009,gdp2010,gdp2011,gdp2012,gdp2013,gdp2014,gdp2015,gdp2016,gdp2017,gdp2018,gdp2019,funding_gdp2003,funding_gdp2004,funding_gdp2005,funding_gdp2006,funding_gdp2007,funding_gdp2008,funding_gdp2009,funding_gdp2010,funding_gdp2011,funding_gdp2012,funding_gdp2013,funding_gdp2014,funding_gdp2015,funding_gdp2016,funding_gdp2017,funding_gdp2018,funding_gdp2019,gdpcapita2003,gdpcapita2004,gdpcapita2005,gdpcapita2006,gdpcapita2007,gdpcapita2008,gdpcapita2009,gdpcapita2010,gdpcapita2011,gdpcapita2012,gdpcapita2013,gdpcapita2014,gdpcapita2015,gdpcapita2016,gdpcapita2017,gdpcapita2018,gdpcapita2019,govexpense2003,govexpense2004,govexpense2005,govexpense2006,govexpense2007,govexpense2008,govexpense2009,govexpense2010,govexpense2011,govexpense2012,govexpense2013,govexpense2014,govexpense2015,govexpense2016,govexpense2017,govexpense2018,govexpense2019,region,income_type,pop2003,pop2004,pop2005,pop2006,pop2007,pop2008,pop2009,pop2010,pop2011,pop2012,pop2013,pop2014,pop2015,pop2016,pop2017,pop2018,pop2019,oda2003,oda2004,oda2005,oda2006,oda2007,oda2008,oda2009,oda2010,oda2011,oda2012,oda2013,oda2014,oda2015,oda2016,oda2017,oda2018,oda2019,oda_int,aid,oecd,g20,gni2003,gni2004,gni2005,gni2006,gni2007,gni2008,gni2009,gni2010,gni2011,gni2012,gni2013,gni2014,gni2015,gni2016,gni2017,gni2018,gni2019
0,Afghanistan,AFG,-0.20136,0.120764,0.289641,0.254712,0.166455,0.315964,2.85,2.97,2.55,2.55,2.77,2.77,2.48,2.48,2.48,2.48,3.02,3.06,,,,,500000.0,,,,,,24000000.0,,136493.0,,2379910.0,,3360.0,,3000.0,,2000.0,,,,1000.0,,,,,,3870542.0,,903177.0,,,,4515559000.0,5226779000.0,6209138000.0,6971286000.0,9747880000.0,10109230000.0,12439090000.0,15856570000.0,17804290000.0,20001600000.0,20561070000.0,20484890000.0,19907110000.0,19362640000.0,20191760000.0,19484380000.0,19101350000.0,,,0.0081,,,0.2374,0.0011,0.015,0.0,0.0,0.0,,0.0,,,0.0199,0.0047,190.683814,211.382117,242.031285,263.733692,359.693238,364.660745,438.076034,543.303042,591.162759,641.871479,637.165523,613.856689,578.466353,547.22811,556.302002,524.162881,502.115487,11.927,15.069,15.698,18.303,21.478,20.918,21.17,20.801,21.937,25.028,24.977,25.402,25.947,28.029,27.732,28.936,27.999,South Asia,Low income,23680871.0,24726684.0,25654277.0,26433049.0,27100536.0,27722276.0,28394813.0,29185507.0,30117413.0,31161376.0,32269589.0,33370794.0,34413603.0,35383128.0,36296400.0,37172386.0,38041754.0,,,,,,,,,,,,,,,,,,0,0,0,0,,,,,,,,,,,,,,,,,
10,Cambodia,KHM,-0.119899,-0.40464,0.171683,-0.062052,-0.216157,-0.079135,3.53,3.59,3.63,4.27,4.27,4.78,4.6,4.96,4.87,4.87,4.87,4.77,,,,,10000.0,,,,,,310000.0,,1554.0,,60000.0,,,,,,,,,,,,,,,,,,,,20000.0,,4658247000.0,5337833000.0,6293046000.0,7274596000.0,8639236000.0,10351910000.0,10401850000.0,11242280000.0,12829540000.0,14054440000.0,15227990000.0,16702610000.0,18049950000.0,20016750000.0,22177200000.0,24571750000.0,27089390000.0,,,0.0002,,,0.003,0.0,0.0005,,,,,,,,,,362.335708,408.513826,474.111228,539.750169,631.524842,745.609181,738.054784,785.502283,882.275493,950.880346,1013.420401,1093.496191,1162.90492,1269.591257,1385.25998,1512.12671,1643.121389,15.749,13.865,12.332,12.952,14.404,15.382,20.4,20.903,20.595,21.68,21.366,21.718,20.272,21.118,22.375,23.175,23.02,East Asia & Pacific,Lower middle income,12856163.0,13066469.0,13273354.0,13477709.0,13679962.0,13883834.0,14093604.0,14312212.0,14541423.0,14780454.0,15026332.0,15274503.0,15521436.0,15766293.0,16009414.0,16249798.0,16486542.0,,,,,,,,,,,,,,,,,,0,0,0,0,,,,,,,,,,,,,,,,,
56,Saudi Arabia,SAU,0.200122,0.695746,-0.487077,0.599556,-0.371266,0.300153,1.93,1.93,1.93,1.93,1.93,1.82,1.82,1.71,1.77,1.84,1.9,1.92,103645788.0,,45511458.0,,189744486.0,256398333.0,187656161.0,9950000.0,252564761.0,200000.0,609589418.0,,143442118.0,,281757318.0,146306111.0,213828471.0,,412225334.0,2000000.0,287252781.0,,860357984.0,35000000.0,521954949.0,,441809387.0,3400000.0,437150488.0,11203500.0,1512600000.0,,1406178000.0,,506114935.0,150000.0,215807700000.0,258742100000.0,328459600000.0,376900100000.0,415964500000.0,519796800000.0,429097900000.0,528207200000.0,671238800000.0,735974800000.0,746647100000.0,756350300000.0,654269900000.0,644935500000.0,688586100000.0,786521800000.0,792966800000.0,0.048,0.0176,0.0578,0.0498,0.0607,0.1173,0.0334,0.0533,0.0319,0.056,0.0385,0.1138,0.0798,0.0685,0.0635,0.1923,0.1773,9609.966975,11185.133368,13791.446294,15384.740152,16516.62362,20078.25779,16113.142485,19262.547681,23745.801614,25243.358698,24844.744371,24463.903164,20627.932782,19879.29753,20803.742566,23338.963458,23139.798656,33.116,31.094,28.151,27.269,29.453,26.68,37.066,33.012,32.843,33.233,35.527,40.214,40.811,38.682,33.305,36.575,35.613,Middle East & North Africa,High income,22456649.0,23132682.0,23816183.0,24498310.0,25184597.0,25888541.0,26630303.0,27421461.0,28267685.0,29155187.0,30052518.0,30916994.0,31717667.0,32442572.0,33099147.0,33699947.0,34268528.0,,,,,,,,,,,,,,,,,,0,1,0,1,,,,,,,,,,,,,,,,,
62,Suriname,SUR,0.006719,0.178213,0.164919,0.015116,0.090163,0.116365,6.98,6.98,6.76,6.77,6.77,6.77,6.77,6.65,6.65,6.65,6.58,6.52,,,,,,,,,,,,,,,1000000.0,,,,,,,,,,,,,,,,,,,,,,1274190000.0,1484093000.0,1793389000.0,2626380000.0,2936612000.0,3532969000.0,3875410000.0,4368398000.0,4422277000.0,4980000000.0,5145758000.0,5240606000.0,4787368000.0,3128954000.0,3210065000.0,3458090000.0,3985251000.0,,,,,,,,0.0229,,,,,,,,,,2611.356086,3006.189322,3590.626616,5197.717047,5744.759727,6831.970411,7408.389111,8255.796859,8263.172923,9201.008785,9402.233137,9472.0076,8561.974179,5539.068872,5626.795816,6003.723066,6854.906561,20.534,22.286,23.777,23.78,24.18,21.95,25.721,22.515,22.538,28.538,28.772,29.123,31.226,28.005,30.797,30.907,35.949,Latin America & Caribbean,Upper middle income,487942.0,493679.0,499464.0,505295.0,511181.0,517123.0,523111.0,529131.0,535179.0,541245.0,547291.0,553273.0,559143.0,564888.0,570496.0,575991.0,581372.0,,,,,,,,,,,,,,,,,,0,0,0,0,,,,,,,,,,,,,,,,,


### output

In [38]:
df_add_gini.to_csv('data/result.csv', index=False) 