in this notebook, I combined the data that is needed for our research project, using 76 countries in GPS as the key because this is our main interest in the project, the GDP, humanitarian aids data has been filtered to after 2003, for the reason that democratic index only starts from 2006.
notice that Bosnia Herzegovina have no record in humanitarian aids data.

### data output 
| Variable     |Type |      Content                                             |
|:-------------|:---:|:------------------------------------------------------- :|
| country   |  string| name of countries from Global Preference Survey(GPS) data|
| isocode | string|  isocode of the countries                             |
|2019 ~ 2006| float| democratic index by year, notice 2007 & 2009 doesn't exist|
|altruism| float| altruism from GPS | 
|2003_gdp ~ 2019_gdp|int | gdp in current US$ by year                           |
|funding_3 ~ funding_19| int| humanitarian aid funding (US$) by year       |
|pledge_3 ~ pledge_19| int | pledging humanitarian aid (US$) by year        |
|funding_3_pct~funding_19_pct| float | funding / gdp the year * 100           |
|pledge_3_pct ~ pledge_19_pct| float| pledging funding / gdp the year * 100    |


In [1]:
import pandas as pd
import glob
from auxiliary import *


### read in  democracy index and renaming some countries for merging later

In [4]:
democracy = pd.read_excel('data/EIU_Democracy_Index_2006_to_2019.xlsx').rename(columns={"Unnamed: 0": "country"})

In [5]:
country_dict = {'US': 'United States',  'Bosnia and Hercegovina' : 'Bosnia Herzegovina', 'UK' : 'United Kingdom', 'UAE': 'United Arab Emirates' }
democracy = democracy.replace({"country": country_dict})
democracy.head()

Unnamed: 0,country,2019,2018,2017,2016,2015,2014,2013,2012,2011,2010,2008,2006
0,Canada,9.22,9.15,9.15,9.15,9.08,9.08,9.08,9.08,9.08,9.08,9.07,9.07
1,United States,7.96,7.96,7.98,7.98,8.05,8.11,8.11,8.11,8.11,8.18,8.22,8.22
2,Austria,8.29,8.29,8.42,8.41,8.54,8.54,8.48,8.62,8.49,8.49,8.49,8.69
3,Belgium,7.64,7.78,7.78,7.77,7.93,7.93,8.05,8.05,8.05,8.05,8.16,8.15
4,Cyprus,7.59,7.59,7.59,7.65,7.53,7.4,7.29,7.29,7.29,7.29,7.7,7.6


### read in Global Preference Survey (GPS) data and merge with  democracy index 

In [6]:
gps = pd.read_stata('data/country.dta')
gps.shape

(76, 8)

In [7]:
gps.head()

Unnamed: 0,country,isocode,patience,risktaking,posrecip,negrecip,altruism,trust
0,Afghanistan,AFG,-0.20136,0.120764,0.289641,0.254712,0.166455,0.315964
1,Algeria,DZA,0.059815,0.39153,-0.598255,0.254901,-0.009968,-0.177371
2,Argentina,ARG,-0.229308,0.041503,0.159679,-0.140457,0.027347,-0.206733
3,Australia,AUS,0.657004,0.137137,0.06966,0.02219,0.155331,0.298973
4,Austria,AUT,0.608285,-0.061829,0.161047,-0.055415,-0.035363,0.154628


In [8]:
gps_democracy = pd.merge(gps, democracy, how = 'left', on = 'country')
gps_democracy.shape

(76, 20)

In [9]:
# gps_democracy[gps_democracy.isna().any(axis=1)]
gps_democracy.head()

Unnamed: 0,country,isocode,patience,risktaking,posrecip,negrecip,altruism,trust,2019,2018,2017,2016,2015,2014,2013,2012,2011,2010,2008,2006
0,Afghanistan,AFG,-0.20136,0.120764,0.289641,0.254712,0.166455,0.315964,2.85,2.97,2.55,2.55,2.77,2.77,2.48,2.48,2.48,2.48,3.02,3.06
1,Algeria,DZA,0.059815,0.39153,-0.598255,0.254901,-0.009968,-0.177371,4.01,3.5,3.56,3.56,3.95,3.83,3.83,3.83,3.44,3.44,3.32,3.17
2,Argentina,ARG,-0.229308,0.041503,0.159679,-0.140457,0.027347,-0.206733,7.02,7.02,6.96,6.96,7.02,6.84,6.84,6.84,6.84,6.84,6.63,6.63
3,Australia,AUS,0.657004,0.137137,0.06966,0.02219,0.155331,0.298973,9.09,9.09,9.09,9.01,9.01,9.01,9.13,9.22,9.22,9.22,9.09,9.09
4,Austria,AUT,0.608285,-0.061829,0.161047,-0.055415,-0.035363,0.154628,8.29,8.29,8.42,8.41,8.54,8.54,8.48,8.62,8.49,8.49,8.49,8.69


### read in UN Humanitarian Affairs Financial Tracking Service data by year and rename columns for merging

In [10]:
file_name = 'data/OCHA_FTS_Government_Donations_20'
extension = '.xlsx'
all_filenames = [i for i in glob.glob(f'{file_name}*{extension}')]

In [11]:
for i in range(len(all_filenames)):
    new = pd.read_excel(all_filenames[i], sheet_name='Export data', skiprows=2).rename(columns={"Source org.": "country", "Funding US$": "funding", "Pledges US$" : "pledge"})
    if i == 0:
        df = new
    else:
        df = pd.merge(df, new, how='outer', on="country", suffixes=('', f'_{i+3}'))

donate = df.rename(columns={"pledge": "pledge_3", "funding": "funding_3"})

donate['country'] = donate['country'].str.rstrip(' Government of')
donate['country'] = donate['country'].str.rstrip(',')


In [12]:
#checking 2006 & 2020 manually 
country_dict = {'United States of America': 'United States',  'Saudi Arabia (Kingdom of)' : 'Saudi Arabia', 'Russian Federation' : 'Russia', 'Korea, Republic of': 'South Korea', 'Viet Nam' : 'Vietnam'}
donate = donate.replace({"country": country_dict})

### merge humanitarian data with previous dataframe, notice Bosnia does not get matched

In [13]:
gps_demo_donate = pd.merge(gps_democracy, donate,  how = 'left', on = 'country')
gps_demo_donate.shape

(76, 56)

In [14]:
gps_demo_donate.head()

Unnamed: 0,country,isocode,patience,risktaking,posrecip,negrecip,altruism,trust,2019,2018,...,funding_16,pledge_16,funding_17,pledge_17,funding_18,pledge_18,funding_19,pledge_19,funding_20,pledge_20
0,Afghanistan,AFG,-0.20136,0.120764,0.289641,0.254712,0.166455,0.315964,2.85,2.97,...,,,,,3870542.0,0.0,903177.0,0.0,,
1,Algeria,DZA,0.059815,0.39153,-0.598255,0.254901,-0.009968,-0.177371,4.01,3.5,...,660000.0,0.0,10000.0,0.0,10000.0,0.0,,,0.0,0.0
2,Argentina,ARG,-0.229308,0.041503,0.159679,-0.140457,0.027347,-0.206733,7.02,7.02,...,176272.0,0.0,1344591.0,0.0,265000.0,0.0,155000.0,0.0,,
3,Australia,AUS,0.657004,0.137137,0.06966,0.02219,0.155331,0.298973,9.09,9.09,...,205207886.0,2724017.0,243900706.0,2712522.0,248119692.0,0.0,310639840.0,0.0,271692972.0,0.0
4,Austria,AUT,0.608285,-0.061829,0.161047,-0.055415,-0.035363,0.154628,8.29,8.29,...,35663228.0,0.0,49109105.0,0.0,26096928.0,6670328.0,32476153.0,1540154.0,19142494.0,0.0


In [16]:
gps_demo_donate[gps_demo_donate.filter(regex='(funding|pledge)_*', axis=1).isna().all(axis=1)]

Unnamed: 0,country,isocode,patience,risktaking,posrecip,negrecip,altruism,trust,2019,2018,...,funding_16,pledge_16,funding_17,pledge_17,funding_18,pledge_18,funding_19,pledge_19,funding_20,pledge_20
7,Bosnia Herzegovina,BIH,-0.247258,-0.125639,0.096463,0.429355,-0.11531,-0.193106,4.86,4.98,...,,,,,,,,,,


### read in GDP data and merge with previous data frame

In [17]:
gdp = pd.read_excel('data/GDP_by_country_by_year.xls', sheet_name='Data', skiprows=3).rename(columns={"Country Code": "isocode"}).set_index('isocode')
gdp = gdp.iloc[:,-18 : -1]
gdp = gdp.add_suffix('_gdp')
gdp.reset_index()
gdp.head()


Unnamed: 0_level_0,2003_gdp,2004_gdp,2005_gdp,2006_gdp,2007_gdp,2008_gdp,2009_gdp,2010_gdp,2011_gdp,2012_gdp,2013_gdp,2014_gdp,2015_gdp,2016_gdp,2017_gdp,2018_gdp,2019_gdp
isocode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
ABW,2021229000.0,2228492000.0,2330726000.0,2424581000.0,2615084000.0,2745251000.0,2498883000.0,2390503000.0,2549721000.0,2534637000.0,2701676000.0,2765363000.0,2919553000.0,2965922000.0,3056425000.0,,
AFG,4515559000.0,5226779000.0,6209138000.0,6971286000.0,9747880000.0,10109230000.0,12439090000.0,15856570000.0,17804290000.0,20001600000.0,20561070000.0,20484890000.0,19907110000.0,19362640000.0,20191760000.0,19484380000.0,19101350000.0
AGO,17812710000.0,23552050000.0,36970920000.0,52381010000.0,65266450000.0,88538610000.0,70307160000.0,83799500000.0,111789700000.0,128052900000.0,136709900000.0,145712200000.0,116193600000.0,101123900000.0,122123800000.0,101353200000.0,94635420000.0
ALB,5611492000.0,7184681000.0,8052076000.0,8896074000.0,10677320000.0,12881350000.0,12044220000.0,11926930000.0,12890770000.0,12319830000.0,12776220000.0,13228140000.0,11386850000.0,11861200000.0,13019690000.0,15147020000.0,15278080000.0
AND,2361727000.0,2894922000.0,3159905000.0,3456442000.0,3952601000.0,4085631000.0,3674410000.0,3449967000.0,3629204000.0,3188809000.0,3193704000.0,3271808000.0,2789870000.0,2896679000.0,3000181000.0,3218316000.0,3154058000.0


In [18]:
gps_demo_donate_gdp = pd.merge(gps_demo_donate, gdp, how='left', on="isocode")
gps_demo_donate_gdp.shape

(76, 73)

In [19]:
# gps_demo_donate_gdp[gps_demo_donate_gdp.filter(regex='.*_gdp', axis=1).isna().all(axis=1)]
gps_demo_donate_gdp.head()

Unnamed: 0,country,isocode,patience,risktaking,posrecip,negrecip,altruism,trust,2019,2018,...,2010_gdp,2011_gdp,2012_gdp,2013_gdp,2014_gdp,2015_gdp,2016_gdp,2017_gdp,2018_gdp,2019_gdp
0,Afghanistan,AFG,-0.20136,0.120764,0.289641,0.254712,0.166455,0.315964,2.85,2.97,...,15856570000.0,17804290000.0,20001600000.0,20561070000.0,20484890000.0,19907110000.0,19362640000.0,20191760000.0,19484380000.0,19101350000.0
1,Algeria,DZA,0.059815,0.39153,-0.598255,0.254901,-0.009968,-0.177371,4.01,3.5,...,161207800000.0,200019500000.0,209060300000.0,209754800000.0,213808800000.0,165979400000.0,160033800000.0,167389400000.0,173758000000.0,169988200000.0
2,Argentina,ARG,-0.229308,0.041503,0.159679,-0.140457,0.027347,-0.206733,7.02,7.02,...,423627400000.0,530163300000.0,545982400000.0,552025100000.0,526319700000.0,594749300000.0,557531400000.0,642695900000.0,519871500000.0,449663400000.0
3,Australia,AUS,0.657004,0.137137,0.06966,0.02219,0.155331,0.298973,9.09,9.09,...,1146138000000.0,1396650000000.0,1546152000000.0,1576184000000.0,1467484000000.0,1351694000000.0,1208847000000.0,1330136000000.0,1433904000000.0,1392681000000.0
4,Austria,AUT,0.608285,-0.061829,0.161047,-0.055415,-0.035363,0.154628,8.29,8.29,...,391892700000.0,431120300000.0,409425200000.0,430068700000.0,441996100000.0,381817600000.0,395227700000.0,418316200000.0,455508300000.0,446314700000.0


### adding columns of funding / GDP * 100 by year

In [24]:
df_funding = convert_gdp_percent('funding', gps_demo_donate_gdp)
df_pledge = convert_gdp_percent('pledge', df_funding)

In [26]:
df_pledge.to_csv('data/gps_demo_donate_gdp.csv') 