In [1]:
%run -i 'setup.py'

import re

import pandas as pd
import numpy as np

from src.settings import BASE_DIR

neonatal_mortality = os.path.join(BASE_DIR, 'data/health_well_being/child_mortality/NMR_mortality_rate_2019.xlsx')
u5_mortality = os.path.join(BASE_DIR, 'data/health_well_being/child_mortality/U5MR_mortality_rate_2019-1.xlsx')
contraceptive = os.path.join(BASE_DIR, 'data/health_well_being/family_planning/UNPD_WCU2019_Country_Data_Survey-Based.xlsx')
fertility = os.path.join(BASE_DIR, 'data/health_well_being/family_planning/UNPD_WFD_2017_FERTILITY.xlsx')
maternal_mortality = os.path.join(BASE_DIR, 'data/health_well_being/maternal_mortality/maternal_mortality/countryresults_all.csv')
water_resource_mgmt = os.path.join(BASE_DIR, 'data/water_sanitation/SDG651_National_Database_2017_baseline.xlsx')
water_treatment = os.path.join(BASE_DIR, 'data/water_sanitation/JMP_2019_WLD.xlsx')

gini = os.path.join(BASE_DIR, 'data/country_stats/world_bank/gini_index/API_SI.POV.GINI_DS2_en_csv_v2_247786.csv')
pop = os.path.join(BASE_DIR, 'data/country_stats/world_bank/population/API_SP.POP.TOTL_DS2_en_csv_v2_247892.csv')
oecd = os.path.join(BASE_DIR, 'data/country_stats/oecd/NAAG_13102019054548637.csv')

In [2]:
# Neonatal Mortality

unicef_id_vars = ['ISO Code', 'Country Name', 'Uncertainty bounds*']
unicef_sheet = 'Country estimates'
unicef_header_idx = 11

nn_mort_df = (
    pd
    .read_excel(neonatal_mortality, sheet_name=unicef_sheet, header=unicef_header_idx)
    .melt(id_vars=unicef_id_vars, value_vars=np.arange(1950.5, 2019))
    .dropna(subset=unicef_id_vars)
)
nn_mort_df

Unnamed: 0,ISO Code,Country Name,Uncertainty bounds*,variable,value
0,AFG,Afghanistan,Lower,1950.5,
1,AFG,Afghanistan,Median,1950.5,
2,AFG,Afghanistan,Upper,1950.5,
3,ALB,Albania,Lower,1950.5,
4,ALB,Albania,Median,1950.5,
5,ALB,Albania,Upper,1950.5,
6,DZA,Algeria,Lower,1950.5,
7,DZA,Algeria,Median,1950.5,
8,DZA,Algeria,Upper,1950.5,
9,AND,Andorra,Lower,1950.5,


In [3]:
# U5 Child Mortality

u5_mort_df = (
    pd
    .read_excel(u5_mortality, sheet_name=unicef_sheet, header=unicef_header_idx)
    .melt(id_vars=unicef_id_vars, value_vars=np.arange(1950.5, 2019))
    .dropna(subset=unicef_id_vars)
)
u5_mort_df

Unnamed: 0,ISO Code,Country Name,Uncertainty bounds*,variable,value
0,AFG,Afghanistan,Lower,1950.5,
1,AFG,Afghanistan,Median,1950.5,
2,AFG,Afghanistan,Upper,1950.5,
3,ALB,Albania,Lower,1950.5,
4,ALB,Albania,Median,1950.5,
5,ALB,Albania,Upper,1950.5,
6,DZA,Algeria,Lower,1950.5,
7,DZA,Algeria,Median,1950.5,
8,DZA,Algeria,Upper,1950.5,
9,AND,Andorra,Lower,1950.5,


In [4]:
# Maternal Mortality

mat_mort_df = (
    pd
    .read_csv(maternal_mortality)
)

mat_mort_df

Unnamed: 0,name,iso,year,estimate,rounded,indicator,value
0,Afghanistan,AFG,1985,lower bound (80% UI),False,mmr,1016.765570
1,Albania,ALB,1985,lower bound (80% UI),False,mmr,63.871845
2,Algeria,DZA,1985,lower bound (80% UI),False,mmr,189.377981
3,Angola,AGO,1985,lower bound (80% UI),False,mmr,641.210991
4,Argentina,ARG,1985,lower bound (80% UI),False,mmr,67.029417
5,Armenia,ARM,1985,lower bound (80% UI),False,mmr,47.866019
6,Australia,AUS,1985,lower bound (80% UI),False,mmr,7.665231
7,Austria,AUT,1985,lower bound (80% UI),False,mmr,7.476203
8,Azerbaijan,AZE,1985,lower bound (80% UI),False,mmr,61.722872
9,Bahamas,BHS,1985,lower bound (80% UI),False,mmr,33.306014


In [5]:
# Contraceptive Use

def join_cols(top_col, bottom_col):
    if not any(bottom_col) or 'Unnamed' in bottom_col:
        return top_col.replace('\n', ' ')
    
    return ': '.join((top_col, bottom_col)).replace('\n', ' ')


cont_df = (
    pd
    .read_excel(contraceptive, sheet_name='By methods', header=[3, 4])
    .reset_index(drop=False)
    .rename(columns={'index': 'Country or area'})
)

cont_df.columns = [join_cols(*col_pair) for col_pair in cont_df.columns.values]

# Note: there are some duplicate country/year combos, sometimes representing same start year but different end years,
# sometimes full duplicates.
cont_df

Unnamed: 0,Country or area,Country or area.1,ISO code,Survey start year,Survey end year,Age group,Contraceptive prevalence (per cent): Any method,Contraceptive prevalence (per cent): Any modern method,Contraceptive prevalence (per cent): Female sterilization,Contraceptive prevalence (per cent): Male sterilization,...,Notes: Note on country,Notes: Note on data,Notes: Note on population,Notes: Contraceptive use: methods,Notes: Contraceptive use: residuals (modern methods),Notes: Contraceptive use: residuals (traditional methods),Notes: Unmet need: population included,Notes: Unmet need: population excluded,Notes: Unmet need: indicator,Notes: Unmet need: indicator.1
0,0,Afghanistan,4,1972,1974,15-44,1.6,1.6,..,..,...,,,Data pertain to ever-married women of reproduc...,,,,,,,
1,1,Afghanistan,4,2000,2000,12-49,5.3,3.6,0.8,0.1,...,"Data pertain to Nangarhar, Konar and Laghman, ...",,,Figures by method do not add up to the total. ...,,,,,,
2,2,Afghanistan,4,2003,2003,<50,10.3,8.7,..,..,...,,Adjusted.,,,Including male and female sterilization.,,,,,
3,3,Afghanistan,4,2005,2005,<50,13.6,12.5,..,..,...,,Adjusted.,,,Including male and female sterilization.,,,,,
4,4,Afghanistan,4,2006,2006,10-49,18.6,17.5,0.7,..,...,"Excluding the six largest cities (Kabul, Hirat...",,,Figures by method do not add up to the total b...,,,,,,
5,5,Afghanistan,4,2007,2008,15-49,22.8,15.2,..,..,...,,,,,,,,,,
6,6,Afghanistan,4,2010,2010,15-49,21.8,19.9,1.4,0,...,Excluding areas in the South zone.,,,,,,,,,
7,7,Afghanistan,4,2010,2011,15-49,21.2,20.3,0.6,0.2,...,,,,,,,,,,
8,8,Afghanistan,4,2012,2012,12-49,..,13.8,..,..,...,,,,,,,,,,
9,9,Afghanistan,4,2015,2016,15-49,22.5,19.8,1.8,0,...,,,,Figures by method do not add up to the total.,,,,,,


In [6]:
# Fertility

fert_df = (
    pd
    .read_excel(fertility, sheet_name='FERTILITY_INDICATORS', header=2)
)

fert_df

Unnamed: 0,Country or area,ISO code,Indicator,AgeGroup,TimeMid,DataValue,Series,DataProcess,DataCatalog ShortName,DataCatalog ID,DataCatalog LongName,Source,YearStart,YearEnd,DataType,Note on population data used in denominator
0,Afghanistan,4,ASFR1519,[15-19],1973.833333,122.199997,"1972-1974 NDFGS,Recent births,Article",Survey,1972-1974 NDFGS,160,Afghanistan 1972-1974 National Demographic and...,Other sources,1972.0,1974.0,Recent births,
1,Afghanistan,4,ASFR1519,[15-19],1973.833333,172.665298,"1972-1974 NDFGS,Relational Gompertz model,Comp...",Survey,1972-1974 NDFGS,160,Afghanistan 1972-1974 National Demographic and...,UNPD,1972.0,1974.0,Relational Gompertz model,
2,Afghanistan,4,ASFR1519,[15-19],1979.500000,155.791229,"1979 Census,Computed rate from DYB,DYB,280-135-36",Census,1979 Census,280,Afghanistan 1979 Census,UNSD,1979.0,1979.0,Computed rate from DYB,
3,Afghanistan,4,ASFR1519,[15-19],1978.976685,125.192848,"1979 Census,Recent births,Database",Census,1979 Census,280,Afghanistan 1979 Census,Other sources,1979.0,1979.0,Recent births,
4,Afghanistan,4,ASFR1519,[15-19],1978.976685,172.670837,"1979 Census,Relational Gompertz model,Computed",Census,1979 Census,280,Afghanistan 1979 Census,UNPD,1979.0,1979.0,Relational Gompertz model,
5,Afghanistan,4,ASFR1519,[15-19],2000.541138,193.775665,"2000 MICS,Arriaga modified P/F Ratio method,Co...",MICS,2000 MICS,2893,Afghanistan 2000 Multiple Indicator Cluster Su...,UNPD,2000.0,2000.0,Arriaga modified P/F Ratio method,
6,Afghanistan,4,ASFR1519,[15-19],2000.541138,323.916962,"2000 MICS,Arriaga-Mortara CEB method,Computed",MICS,2000 MICS,2893,Afghanistan 2000 Multiple Indicator Cluster Su...,UNPD,2000.0,2000.0,Arriaga-Mortara CEB method,
7,Afghanistan,4,ASFR1519,[15-19],2003.497192,151.834244,"2003 MICS,Arriaga modified P/F Ratio method,Co...",MICS,2003 MICS,1925,Afghanistan 2003 Multiple Indicator Cluster Su...,UNPD,2003.0,2003.0,Arriaga modified P/F Ratio method,
8,Afghanistan,4,ASFR1519,[15-19],2003.497192,108.671272,"2003 MICS,Arriaga-Mortara CEB method,Computed",MICS,2003 MICS,1925,Afghanistan 2003 Multiple Indicator Cluster Su...,UNPD,2003.0,2003.0,Arriaga-Mortara CEB method,
9,Afghanistan,4,ASFR1519,[15-19],2005.810425,117.444847,"2006 HS,Arriaga modified P/F Ratio method,Comp...",Survey,2006 HS,4369,Afghanistan 2006 Health Survey,UNPD,2006.0,2006.0,Arriaga modified P/F Ratio method,


In [7]:
# Drinking Water

def join_cols(col_triplets):
    col_names = [col for col in col_triplets if any(col) and 'Unnamed' not in col and 'DRINKING WATER' not in col]

    if len(set(col_names)) == 1:
        return col_names[0].replace('\n', ' ')
    
    return ': '.join(col_names).replace('\n', ' ')


drink_water_df = (
    pd
    .read_excel(water_treatment, sheet_name='Water', header=[0, 1, 2])
)

drink_water_df.columns = [join_cols(cols) for cols in drink_water_df.columns.values]
drink_water_df = drink_water_df.loc[:, ~drink_water_df.columns.duplicated()]

drink_water_df

Unnamed: 0,"COUNTRY, AREA OR TERRITORY",ISO3,Year,Population (thousands),% urban,NATIONAL: At least basic,NATIONAL: Limited (more than 30 mins),NATIONAL: Unimproved,NATIONAL: Surface water,NATIONAL: Annual rate of change in basic,...,RURAL: Proportion of population using improved water supplies: Free from contamination,RURAL: Proportion of population using improved water supplies: Piped,RURAL: Proportion of population using improved water supplies: Non-piped,URBAN: Proportion of population using improved water supplies: Safely managed,URBAN: Proportion of population using improved water supplies: Accessible on premises,URBAN: Proportion of population using improved water supplies: Available when needed,URBAN: Proportion of population using improved water supplies: Free from contamination,URBAN: Proportion of population using improved water supplies: Piped,URBAN: Proportion of population using improved water supplies: Non-piped,URBAN: Proportion of population using improved water supplies: Sl
0,Afghanistan,AFG,2000.0,20093.755859,22.077999,27.7719,3.57996,43.4006,25.2475,2.31134,...,-,<1,24.8826,-,40.0053,-,-,16.7937,37.3908,27.0
1,Afghanistan,AFG,2001.0,20966.462891,22.169001,27.7973,3.58126,43.3917,25.2298,2.31134,...,-,<1,24.8826,-,40.0053,-,-,16.7937,37.3908,28.0
2,Afghanistan,AFG,2002.0,21979.923828,22.261000,29.9008,3.86609,41.8907,24.3424,2.31134,...,-,<1,27.1623,-,41.9592,-,-,18.5629,38.268,29.0
3,Afghanistan,AFG,2003.0,23064.851562,22.353001,32.0051,4.15078,40.3885,23.4557,2.31134,...,-,<1,28.9725,-,43.9131,-,-,20.3321,39.1452,30.0
4,Afghanistan,AFG,2004.0,24118.978516,22.500000,34.1262,4.43603,38.8784,22.5594,2.31134,...,-,1.41878,30.303,-,45.867,-,-,22.1014,40.0224,31.0
5,Afghanistan,AFG,2005.0,25070.798828,22.702999,36.2653,4.72171,37.3592,21.6538,2.31134,...,-,2.368,31.6336,-,47.8209,-,-,23.8706,40.8996,32.0
6,Afghanistan,AFG,2006.0,25893.449219,22.907000,38.4064,5.00713,35.837,20.7495,2.31134,...,-,3.31723,32.9641,-,49.7748,-,-,25.6398,41.7768,33.0
7,Afghanistan,AFG,2007.0,26616.792969,23.113001,40.8442,4.99794,34.3117,19.8462,2.31134,...,-,4.26646,34.2946,-,52.2195,-,-,27.4091,42.654,34.0
8,Afghanistan,AFG,2008.0,27294.031250,23.320000,43.3151,4.95753,32.7834,18.944,2.31134,...,-,5.21569,35.6251,-,54.7013,-,-,29.1783,43.5312,35.0
9,Afghanistan,AFG,2009.0,28004.330078,23.528000,45.8191,4.88581,31.252,18.0431,2.31134,...,-,6.16491,36.9556,-,57.2201,-,-,30.9475,44.4084,36.0


In [8]:
# Water Sanitation

def join_cols(col_triplets):
    col_names = [col for col in col_triplets if any(col) and 'Unnamed' not in col and 'SANITATION' not in col]

    if len(set(col_names)) == 1:
        return col_names[0].replace('\n', ' ')
    
    return ': '.join(col_names).replace('\n', ' ')


sanitation_df = (
    pd
    .read_excel(water_treatment, sheet_name='Sanitation', header=[0, 1, 2])
)

sanitation_df.columns = [join_cols(cols) for cols in sanitation_df.columns.values]
sanitation_df = sanitation_df.loc[:, ~sanitation_df.columns.duplicated()]

sanitation_df

Unnamed: 0,"COUNTRY, AREA OR TERRITORY",ISO3,Year,Population (thousands),% urban,NATIONAL: At least basic,NATIONAL: Limited (shared),NATIONAL: Unimproved,NATIONAL: Open defecation,NATIONAL: Annual rate of change in basic,...,RURAL: Proportion of population using improved sanitation facilities (including shared): Septic tanks,RURAL: Proportion of population using improved sanitation facilities (including shared): Sewer connections,URBAN: Proportion of population using improved sanitation facilities (excluding shared): Safely managed,URBAN: Proportion of population using improved sanitation facilities (excluding shared): Disposed in situ,URBAN: Proportion of population using improved sanitation facilities (excluding shared): Emptied and treated,URBAN: Proportion of population using improved sanitation facilities (excluding shared): Wastewater treated,URBAN: Proportion of population using improved sanitation facilities (including shared): Latrines and other,URBAN: Proportion of population using improved sanitation facilities (including shared): Septic tanks,URBAN: Proportion of population using improved sanitation facilities (including shared): Sewer connections,URBAN: Proportion of population using improved sanitation facilities (including shared): Sl
0,Afghanistan,AFG,2000.0,20093.755859,22.077999,23.5157,5.85019,44.6144,26.0197,1.1707,...,1.0696,<1,-,-,-,-,18.1889,17.2588,8.02808,26.0
1,Afghanistan,AFG,2001.0,20966.462891,22.169001,23.5228,5.85958,44.62,25.9976,1.1707,...,1.0696,<1,-,-,-,-,18.1889,17.2588,8.02808,27.0
2,Afghanistan,AFG,2002.0,21979.923828,22.261000,24.6451,6.16807,44.0197,25.1671,1.1707,...,1.0696,<1,-,-,-,-,20.6927,17.2588,8.03517,28.0
3,Afghanistan,AFG,2003.0,23064.851562,22.353001,25.7689,6.47776,43.4161,24.3373,1.1707,...,1.0696,<1,-,-,-,-,23.1964,17.2588,8.04225,29.0
4,Afghanistan,AFG,2004.0,24118.978516,22.500000,26.8996,6.79537,42.8095,23.4955,1.1707,...,1.0696,<1,-,-,-,-,25.7001,17.2588,8.04933,30.0
5,Afghanistan,AFG,2005.0,25070.798828,22.702999,28.0386,7.12212,42.1971,22.6422,1.1707,...,1.0696,<1,-,-,-,-,28.2038,17.2588,8.05642,31.0
6,Afghanistan,AFG,2006.0,25893.449219,22.907000,29.1808,7.45163,41.5773,21.7903,1.1707,...,1.0696,<1,-,-,-,-,30.7075,17.2588,8.0635,32.0
7,Afghanistan,AFG,2007.0,26616.792969,23.113001,30.4118,7.69867,40.9501,20.9395,1.1707,...,1.0696,<1,-,-,-,-,33.2112,17.2588,8.07058,33.0
8,Afghanistan,AFG,2008.0,27294.031250,23.320000,31.6547,7.93989,40.3154,20.0901,1.1707,...,1.0696,<1,-,-,-,-,35.7149,17.2588,8.07767,34.0
9,Afghanistan,AFG,2009.0,28004.330078,23.528000,32.9097,8.17512,39.6731,19.2421,1.1707,...,1.0696,<1,-,-,-,-,38.2186,17.2588,8.08475,35.0


In [9]:
# Water Management

def join_cols(col_triplets):
    col_names = [str(col) for col in col_triplets if any(str(col)) and not re.search('Unnamed', str(col))]

    if len(set(col_names)) == 1:
        return col_names[0].replace('\n', ' ')
        
    return ' - '.join(col_names).replace('\n', ' ')


water_mgmt_df = (
    pd
    .read_excel(water_resource_mgmt, sheet_name='Global SDG 6.5.1 data summary', header=[0, 1, 2])
)

water_mgmt_df.columns = [join_cols(cols) for cols in water_mgmt_df.columns.values]
water_mgmt_df = (
    water_mgmt_df
    # Column labels are a complete, inconsistent mess, and the WRM Score is what we want anyway,
    # so dropping the rest
    .loc[:, ['ISO code', '4.2 - Average - Final WRM Score']]
    .rename(columns={'4.2 - Average - Final WRM Score': 'Final WRM Score'})
)

water_mgmt_df

Unnamed: 0,ISO code,Final WRM Score
0,AFG,11.510101
1,ALB,43.141414
2,DZA,48.232323
3,AND,35.500000
4,AGO,37.069444
5,ATG,29.875000
6,ARG,38.198413
7,ARM,35.944444
8,AUS,85.500000
9,AUT,91.136364


In [10]:
# General Country Data from OECD

country_df = pd.read_csv(oecd)

data_labels = country_df.loc[:, ['INDICATOR', 'Indicator']].drop_duplicates().set_index('INDICATOR')

country_df = (
    country_df
    .drop(
        ['TIME', 'INDICATOR', 'Unit Code', 'Unit', 'PowerCode Code', 'PowerCode',
         'Reference Period Code', 'Reference Period', 'Flag Codes', 'Flags'],
        axis=1
    )
    .rename(columns={'LOCATION': 'Code'})
    .pivot_table(index=['Code', 'Country', 'Time'], columns=['Indicator'], values=['Value'])
    .droplevel(level=0, axis=1)
)

country_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Indicator,"General government expenditure by function, defence, percentage of GDP","General government expenditure by function, economic affairs, percentage of GDP","General government expenditure by function, education, percentage of GDP","General government expenditure by function, environment protection, percentage of GDP","General government expenditure by function, general public services, percentage of GDP","General government expenditure by function, health, percentage of GDP","General government expenditure by function, housing and community amenities, percentage of GDP","General government expenditure by function, public order and safety, percentage of GDP","General government expenditure by function, recreation, culture and religion, percentage of GDP","General government expenditure by function, social protection, percentage of GDP","Gross debt of general government, percentage of GDP","Gross domestic product (GDP), current PPPs, billions US dollars","Social benefits and social transfers in kind, percentage of GDP","Total expenditure of general government, percentage of GDP","Total general government (GG) revenue, percentage of GDP"
Code,Country,Time,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
AUS,Australia,2000,1.506242,4.286407,5.027614,0.517540,4.824797,5.679326,0.781062,1.541274,0.870415,10.257196,41.10381,537.761328,19.357195,36.589624,35.355555
AUS,Australia,2001,1.543646,4.439890,5.061962,0.558964,4.200712,5.850689,0.858200,1.593895,0.780905,9.954220,40.39798,567.709025,19.102078,36.119048,35.361344
AUS,Australia,2002,1.589343,4.124551,5.096135,0.600654,4.062250,5.873202,0.772947,1.631917,0.797543,9.709474,38.67659,599.301220,18.845760,35.153441,35.866211
AUS,Australia,2003,1.483208,4.142089,5.034615,0.571867,3.847096,5.957570,0.689516,1.637673,0.809255,10.116963,35.55205,635.085071,19.370900,35.280400,36.090816
AUS,Australia,2004,1.412989,4.041432,5.064231,0.621333,3.859835,6.221033,0.743735,1.606403,0.772899,9.782398,32.10780,675.451920,19.183929,35.265744,36.208098
AUS,Australia,2005,1.418564,3.954016,4.982487,0.631856,3.710045,6.155776,0.730608,1.638750,0.788916,9.474725,29.96307,717.705079,18.735385,34.937728,36.601467
AUS,Australia,2006,1.459700,3.913609,4.782287,0.724190,3.674145,6.093910,0.719589,1.694931,0.763671,9.322902,29.21775,774.327431,18.185834,34.739226,36.394492
AUS,Australia,2007,1.395296,3.942792,4.717182,0.707713,3.490661,6.212706,0.742114,1.675510,0.788575,9.165872,27.99366,825.040730,18.099945,34.671324,35.391608
AUS,Australia,2008,1.438644,3.968353,4.950621,0.726742,3.475711,6.344508,0.844823,1.670522,0.771102,10.803757,29.94624,851.983130,19.976273,36.985347,33.240064
AUS,Australia,2009,1.469938,4.274787,5.493805,0.852898,3.858252,6.649729,0.933592,1.712866,0.779735,9.335765,38.62356,902.670512,18.983931,38.369411,32.809975


In [11]:
wb_id_vars = ['Country Name', 'Country Code', 'Indicator Name']

pop_df = (
    pd
    .read_csv(pop, header=2)
    .drop('Indicator Code', axis=1)
    .melt(id_vars=wb_id_vars, value_vars=np.arange(1960, 2019).astype(str))
)


pop_df

Unnamed: 0,Country Name,Country Code,Indicator Name,variable,value
0,Aruba,ABW,"Population, total",1960,5.421100e+04
1,Afghanistan,AFG,"Population, total",1960,8.996973e+06
2,Angola,AGO,"Population, total",1960,5.454933e+06
3,Albania,ALB,"Population, total",1960,1.608800e+06
4,Andorra,AND,"Population, total",1960,1.341100e+04
5,Arab World,ARB,"Population, total",1960,9.219775e+07
6,United Arab Emirates,ARE,"Population, total",1960,9.241800e+04
7,Argentina,ARG,"Population, total",1960,2.048178e+07
8,Armenia,ARM,"Population, total",1960,1.874121e+06
9,American Samoa,ASM,"Population, total",1960,2.012300e+04


In [12]:
gini_df = (
    pd
    .read_csv(gini, header=2)
    .drop('Indicator Code', axis=1)
    .melt(id_vars=wb_id_vars, value_vars=np.arange(1960, 2019).astype(str))
)


gini_df

Unnamed: 0,Country Name,Country Code,Indicator Name,variable,value
0,Aruba,ABW,GINI index (World Bank estimate),1960,
1,Afghanistan,AFG,GINI index (World Bank estimate),1960,
2,Angola,AGO,GINI index (World Bank estimate),1960,
3,Albania,ALB,GINI index (World Bank estimate),1960,
4,Andorra,AND,GINI index (World Bank estimate),1960,
5,Arab World,ARB,GINI index (World Bank estimate),1960,
6,United Arab Emirates,ARE,GINI index (World Bank estimate),1960,
7,Argentina,ARG,GINI index (World Bank estimate),1960,
8,Armenia,ARM,GINI index (World Bank estimate),1960,
9,American Samoa,ASM,GINI index (World Bank estimate),1960,
