In [1]:
import pandas as pd
import numpy as np

# ignores warnings
import warnings
warnings.filterwarnings('ignore')

In [2]:
# reads in csv file
df = pd.read_csv("aggregate_hdi_indicators_with_population.csv")

In [3]:
# display dataframe created from csv file
df

Unnamed: 0,Entity,Code,Year,Population (historical estimates),"Fertility rate (Complete Gapminder, v12) (2017)","Mortality rate, infant (per 1,000 live births)",Life expectancy,"Average Total Years of Schooling for Adult Population (Lee-Lee (2016), Barro-Lee (2018) and UNDP (2018))",Expected Years of Schooling (years),"GNI per capita, PPP (constant 2017 international $)",Actual Human Development Index (UNDP)
0,Afghanistan,AFG,1800,3280000.0,7.00,,,,,,
1,Afghanistan,AFG,1801,3280000.0,7.00,,,,,,
2,Afghanistan,AFG,1802,3280000.0,7.00,,,,,,
3,Afghanistan,AFG,1803,3280000.0,7.00,,,,,,
4,Afghanistan,AFG,1804,3280000.0,7.00,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...
49729,Zimbabwe,ZWE,2017,14236599.0,3.68,4.05,60.812,8.2,10.3,3207.887558,0.535
49730,Zimbabwe,ZWE,2018,14438812.0,,3.93,61.195,,,3262.224759,
49731,Zimbabwe,ZWE,2019,14645473.0,,3.84,61.490,,,,
49732,Zimbabwe,ZWE,2020,14862927.0,,,,,,,


In [4]:
# calculate HDI rate per year 

#life expectancy index
#<-- ([Life expectancy] - [Life Expectancy Minimum]) / ([Life Expectancy Goalpost] - [Life Expectancy Minimum])
# ([life expentance of country] - 20) / (85 - 20))

#education expectancy index
#((([Average Total Years of Schooling for Adult Population (Lee-Lee (2016), Barro-Lee (2018) and UNDP (2018))] 
# - [Mean Years of Schooling Minimum]) / ([Mean Years of Schooling Goalpost] - [Mean Years of Schooling Minimum])) + (([Expected Years of Schooling (years)] - [Expected Years of Schooling Minimum]) 
# / ([Expected Years of Schooling Goalpost] - [Expected Years of Schooling Minimum]))) / 2

#income expectancy index
# (LN([GNI per capita, PPP (constant 2017 international $)]) - LN([GNI per Capita Minimum])) 
# / (LN([GNI per Capita Goalpost]) - LN([GNI per Capita Minimum]))

#HDI
# ([Calculated Life Expectancy Index] * [Calculated Education Index] * [Calculated GNI per Capita Index]) ^ (1/3)


# find life expectancy weight for each country (life expectancy/global life expetancy)
# apply weight to life expectany (weight (^) * life expectancy)
# create new column to store
# use all countries weighted life expectancies to get global life expectancy
# do with other factors
# find gobal HDI

In [5]:
# drop unnecessary columns & rename dataframe
factors = df.drop(columns = ['Fertility rate (Complete Gapminder, v12) (2017)', 'Mortality rate, infant (per 1,000 live births)', 'Code'])

In [6]:
factors

Unnamed: 0,Entity,Year,Population (historical estimates),Life expectancy,"Average Total Years of Schooling for Adult Population (Lee-Lee (2016), Barro-Lee (2018) and UNDP (2018))",Expected Years of Schooling (years),"GNI per capita, PPP (constant 2017 international $)",Actual Human Development Index (UNDP)
0,Afghanistan,1800,3280000.0,,,,,
1,Afghanistan,1801,3280000.0,,,,,
2,Afghanistan,1802,3280000.0,,,,,
3,Afghanistan,1803,3280000.0,,,,,
4,Afghanistan,1804,3280000.0,,,,,
...,...,...,...,...,...,...,...,...
49729,Zimbabwe,2017,14236599.0,60.812,8.2,10.3,3207.887558,0.535
49730,Zimbabwe,2018,14438812.0,61.195,,,3262.224759,
49731,Zimbabwe,2019,14645473.0,61.490,,,,
49732,Zimbabwe,2020,14862927.0,,,,,


In [8]:
# drop all rows that have less than 4 non-na values
factors = factors.dropna(thresh=4)

In [9]:
factors

Unnamed: 0,Entity,Year,Population (historical estimates),Life expectancy,"Average Total Years of Schooling for Adult Population (Lee-Lee (2016), Barro-Lee (2018) and UNDP (2018))",Expected Years of Schooling (years),"GNI per capita, PPP (constant 2017 international $)",Actual Human Development Index (UNDP)
70,Afghanistan,1870,4142928.0,,0.01,,,
75,Afghanistan,1875,4247357.0,,0.01,,,
80,Afghanistan,1880,4354370.0,,0.01,,,
85,Afghanistan,1885,4464010.0,,0.01,,,
90,Afghanistan,1890,4576341.0,,0.01,,,
...,...,...,...,...,...,...,...,...
49727,Zimbabwe,2015,13814642.0,59.534,8.20,10.3,2858.616749,0.529
49728,Zimbabwe,2016,14030338.0,60.294,8.20,10.3,2884.156050,0.532
49729,Zimbabwe,2017,14236599.0,60.812,8.20,10.3,3207.887558,0.535
49730,Zimbabwe,2018,14438812.0,61.195,,,3262.224759,


In [10]:
# create new rows to hold weights
factors["Weighted Life Expectancy"] = np.nan
factors["Weighted YOS"] = np.nan
factors["Weighted Expected YOS"] = np.nan
factors["Weighted GNI"] = np.nan

In [11]:
factors

Unnamed: 0,Entity,Year,Population (historical estimates),Life expectancy,"Average Total Years of Schooling for Adult Population (Lee-Lee (2016), Barro-Lee (2018) and UNDP (2018))",Expected Years of Schooling (years),"GNI per capita, PPP (constant 2017 international $)",Actual Human Development Index (UNDP),Weighted Life Expectancy,Weighted YOS,Weighted Expected YOS,Weighted GNI
70,Afghanistan,1870,4142928.0,,0.01,,,,,,,
75,Afghanistan,1875,4247357.0,,0.01,,,,,,,
80,Afghanistan,1880,4354370.0,,0.01,,,,,,,
85,Afghanistan,1885,4464010.0,,0.01,,,,,,,
90,Afghanistan,1890,4576341.0,,0.01,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...
49727,Zimbabwe,2015,13814642.0,59.534,8.20,10.3,2858.616749,0.529,,,,
49728,Zimbabwe,2016,14030338.0,60.294,8.20,10.3,2884.156050,0.532,,,,
49729,Zimbabwe,2017,14236599.0,60.812,8.20,10.3,3207.887558,0.535,,,,
49730,Zimbabwe,2018,14438812.0,61.195,,,3262.224759,,,,,


In [12]:
# reorder rows
factors = factors[["Entity",
                   "Year", 
                   "Population (historical estimates)", 
                   "Life expectancy", "Weighted Life Expectancy", 
                   "Average Total Years of Schooling for Adult Population (Lee-Lee (2016), Barro-Lee (2018) and UNDP (2018))", "Weighted YOS",
                   "Expected Years of Schooling (years)", "Weighted Expected YOS",
                   "GNI per capita, PPP (constant 2017 international $)", "Weighted GNI",
                   "Actual Human Development Index (UNDP)"
                  ]]

In [13]:
factors

Unnamed: 0,Entity,Year,Population (historical estimates),Life expectancy,Weighted Life Expectancy,"Average Total Years of Schooling for Adult Population (Lee-Lee (2016), Barro-Lee (2018) and UNDP (2018))",Weighted YOS,Expected Years of Schooling (years),Weighted Expected YOS,"GNI per capita, PPP (constant 2017 international $)",Weighted GNI,Actual Human Development Index (UNDP)
70,Afghanistan,1870,4142928.0,,,0.01,,,,,,
75,Afghanistan,1875,4247357.0,,,0.01,,,,,,
80,Afghanistan,1880,4354370.0,,,0.01,,,,,,
85,Afghanistan,1885,4464010.0,,,0.01,,,,,,
90,Afghanistan,1890,4576341.0,,,0.01,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...
49727,Zimbabwe,2015,13814642.0,59.534,,8.20,,10.3,,2858.616749,,0.529
49728,Zimbabwe,2016,14030338.0,60.294,,8.20,,10.3,,2884.156050,,0.532
49729,Zimbabwe,2017,14236599.0,60.812,,8.20,,10.3,,3207.887558,,0.535
49730,Zimbabwe,2018,14438812.0,61.195,,,,,,3262.224759,,


In [14]:
# test out to find mean of population for only rows where year = 1870
print(factors['Population (historical estimates)'].loc[factors['Year'] == 1870].mean())

10992194.0990991


In [15]:
# test out to find mean of population for only rows where year = 1870 and the life expectancy column is not nan
print(factors['Population (historical estimates)'].loc[(factors['Year'] == 1870) 
                                                       & (pd.notna(factors['Life expectancy']))].mean())

12518941.8


In [18]:
# test out weight equation on the above requirements
factors['Weighted Life Expectancy'].loc[(factors['Year'] == 1870) & (pd.notna(factors['Life expectancy']))] = (
    factors['Population (historical estimates)'].loc[(factors['Year'] == 1870) & (pd.notna(factors['Life expectancy']))] 
    / factors['Population (historical estimates)'].loc[(factors['Year'] == 1870) 
                                                       & (pd.notna(factors['Life expectancy']))].mean())

In [19]:
# take a look at dataframe to see how its been p
factors.loc[(factors['Year'] == 1870) & (pd.notna(factors['Life expectancy']))]

Unnamed: 0,Entity,Year,Population (historical estimates),Life expectancy,Weighted Life Expectancy,"Average Total Years of Schooling for Adult Population (Lee-Lee (2016), Barro-Lee (2018) and UNDP (2018))",Weighted YOS,Expected Years of Schooling (years),Weighted Expected YOS,"GNI per capita, PPP (constant 2017 international $)",Weighted GNI,Actual Human Development Index (UNDP)
2464,Austria,1870,4528451.0,34.400002,0.361728,2.13,,,,,,
4018,Belgium,1870,5097637.0,40.900002,0.407194,2.27,,,,,,
11333,Denmark,1870,1887969.0,45.970001,0.150809,2.39,,,,,,
14644,France,1870,38184704.0,36.389999,3.050154,0.66,,,,,,
19378,Iceland,1870,69836.0,38.34,0.005578,0.74,,,,,,
21448,Japan,1870,34388672.0,36.59264,2.746931,0.42,,,,,,
30430,Netherlands,1870,3609009.0,37.32,0.288284,2.89,,,,,,
32568,Norway,1870,1736968.0,50.849998,0.138747,3.82,,,,,,
42380,Sweden,1870,4168145.0,44.970001,0.332947,3.51,,,,,,
46846,United Kingdom,1870,31518027.0,40.605,2.517627,0.8,,,,,,


In [20]:
for year in range(1800, 2022):
    factors['Weighted Life Expectancy'].loc[(factors['Year'] == year) & (pd.notna(factors['Life expectancy']))] = (
        factors['Population (historical estimates)'].loc[(factors['Year'] == year) 
                                                         & (pd.notna(factors['Life expectancy']))] 
        / factors['Population (historical estimates)'].loc[(factors['Year'] == year) 
                                                           & (pd.notna(factors['Life expectancy']))].mean())
    
    
    factors['Weighted YOS'].loc[(factors['Year'] == year) & (pd.notna(factors['Average Total Years of Schooling for Adult Population (Lee-Lee (2016), Barro-Lee (2018) and UNDP (2018))']))] = (
        factors['Population (historical estimates)'].loc[(factors['Year'] == year) 
                                                         & (pd.notna(factors['Average Total Years of Schooling for Adult Population (Lee-Lee (2016), Barro-Lee (2018) and UNDP (2018))']))] 
        / factors['Population (historical estimates)'].loc[(factors['Year'] == year) 
                                                           & (pd.notna(factors['Average Total Years of Schooling for Adult Population (Lee-Lee (2016), Barro-Lee (2018) and UNDP (2018))']))].mean())
    
    factors['Weighted Expected YOS'].loc[(factors['Year'] == year) & (pd.notna(factors['Expected Years of Schooling (years)']))] = (
        factors['Population (historical estimates)'].loc[(factors['Year'] == year) 
                                                         & (pd.notna(factors['Expected Years of Schooling (years)']))] 
        / factors['Population (historical estimates)'].loc[(factors['Year'] == year) 
                                                           & (pd.notna(factors['Expected Years of Schooling (years)']))].mean())
    
    factors['Weighted GNI'].loc[(factors['Year'] == year) & (pd.notna(factors['GNI per capita, PPP (constant 2017 international $)']))] = (
        factors['Population (historical estimates)'].loc[(factors['Year'] == year) 
                                                         & (pd.notna(factors['GNI per capita, PPP (constant 2017 international $)']))] 
        / factors['Population (historical estimates)'].loc[(factors['Year'] == year) 
                                                           & (pd.notna(factors['GNI per capita, PPP (constant 2017 international $)']))].mean())

In [22]:
factors.loc[(factors['Year'] == 1900) & (pd.notna(factors['Life expectancy']))]

Unnamed: 0,Entity,Year,Population (historical estimates),Life expectancy,Weighted Life Expectancy,"Average Total Years of Schooling for Adult Population (Lee-Lee (2016), Barro-Lee (2018) and UNDP (2018))",Weighted YOS,Expected Years of Schooling (years),Weighted Expected YOS,"GNI per capita, PPP (constant 2017 international $)",Weighted GNI,Actual Human Development Index (UNDP)
1606,Argentina,1900,4689224.0,39.0,0.432181,0.78,0.347993,,,,,
3826,Belarus,1900,4909551.0,36.200001,0.452487,,,,,,,
4048,Belgium,1900,6717693.0,46.52,0.619134,2.58,0.498528,,,,,
5008,Bolivia,1900,1809836.0,26.0,0.166803,0.17,0.13431,,,,,
5746,Brazil,1900,18063384.0,29.0,1.664807,0.47,1.340505,,,,,
6262,Bulgaria,1900,3994654.0,40.200001,0.368166,1.33,0.296448,,,,,
8554,Chile,1900,3135464.0,29.0,0.288979,1.3,0.232686,,,,,
8998,Colombia,1900,4089052.0,29.0,0.376866,0.74,0.303453,,,,,
9736,Costa Rica,1900,316531.0,32.0,0.029173,1.21,0.02349,,,,,
10402,Cuba,1900,1718198.0,32.0,0.158357,0.38,0.127509,,,,,
