In [1]:
import pandas as pd
pd.set_option('display.max_rows', None)

In [27]:
# Reads in csv for table two of the UN data, using the fifth row as header.
hdi_data = pd.read_csv("UN_Data_Table_2.csv", header=4).drop([0,67,121,159,193])

# Selects only columns with valid, single year names.
selected_columns = []
for column in hdi_data:
    if 'Unnamed' not in column:
        if '-' not in column:
            selected_columns.append(column)

# Turns .. into NaN values. Drops rows with only NaN values.
hdi_data = hdi_data.replace("..","NaN") 
hdi_data = (hdi_data[selected_columns])
hdi_data.dropna(thresh = 9, inplace = True)

# Drops bottom rows of summary data.
hdi_data.drop(hdi_data.tail(36).index,inplace=True)

Unnamed: 0,HDI rank,Country,1990,2000,2010,2014,2015,2017,2018,2019
1,1.0,Norway,0.849,0.915,0.94,0.944,0.947,0.954,0.956,0.957
2,2.0,Ireland,0.773,0.867,0.901,0.928,0.935,0.947,0.951,0.955
3,2.0,Switzerland,0.84,0.898,0.941,0.942,0.947,0.949,0.955,0.955
4,4.0,"Hong Kong, China (SAR)",0.784,0.83,0.904,0.926,0.93,0.941,0.946,0.949
5,4.0,Iceland,0.807,0.867,0.898,0.931,0.934,0.943,0.946,0.949
6,6.0,Germany,0.808,0.876,0.927,0.937,0.938,0.943,0.946,0.947
7,7.0,Sweden,0.821,0.903,0.911,0.935,0.938,0.942,0.943,0.945
8,8.0,Australia,0.871,0.903,0.93,0.933,0.938,0.941,0.943,0.944
9,8.0,Netherlands,0.836,0.882,0.917,0.932,0.934,0.939,0.942,0.944
10,10.0,Denmark,0.806,0.87,0.917,0.935,0.933,0.936,0.939,0.94


In [32]:
# Transforms data from horizontal to vertical.
selected_years = pd.melt(hdi_data, id_vars = 'Country', value_vars = ['1990','2000','2010', '2014', '2015'])

# Renames columns.
selected_years = selected_years.rename(columns = {'Country':'country','variable':'year','value':'hdi'})

In [34]:
# Calculating potentially important variables for merging with UN dataset.

# Number of countries.
countries_number = len(selected_years['country'].unique())

# Alphabetical list of countries.
un_countries_list = sorted(selected_years['country'].unique())

# Prints variables.
print(f"Number of Unique Country Names: {countries_number}")
print(f"\nList of Unique Country Names: \n{un_countries_list}")

Number of Unique Country Names: 174

List of Unique Country Names: 
['Afghanistan', 'Albania', 'Algeria', 'Andorra', 'Angola', 'Antigua and Barbuda', 'Argentina', 'Armenia', 'Australia', 'Austria', 'Azerbaijan', 'Bahamas', 'Bahrain', 'Bangladesh', 'Barbados', 'Belarus', 'Belgium', 'Belize', 'Benin', 'Bhutan', 'Bolivia (Plurinational State of)', 'Bosnia and Herzegovina', 'Botswana', 'Brazil', 'Brunei Darussalam', 'Bulgaria', 'Cabo Verde', 'Cambodia', 'Cameroon', 'Canada', 'Chile', 'China', 'Colombia', 'Comoros', 'Congo', 'Costa Rica', 'Croatia', 'Cuba', 'Cyprus', 'Czechia', "Côte d'Ivoire", 'Denmark', 'Djibouti', 'Dominica', 'Dominican Republic', 'Ecuador', 'Egypt', 'El Salvador', 'Equatorial Guinea', 'Estonia', 'Eswatini (Kingdom of)', 'Ethiopia', 'Fiji', 'Finland', 'France', 'Gabon', 'Gambia', 'Georgia', 'Germany', 'Ghana', 'Greece', 'Grenada', 'Guatemala', 'Guyana', 'Haiti', 'Honduras', 'Hong Kong, China (SAR)', 'Hungary', 'Iceland', 'India', 'Indonesia', 'Iran (Islamic Republic of)'

In [35]:
# Countries that may need renaming or removal.
iffy = [] 
clear = []
reason = []
i = 0
commas = 0
spaces = 0
hyphens = 0
for name in un_countries_list:
    if "," in name:
        iffy.append(name)
        reason.append(1)
        commas += 1
    elif " " in name:
        iffy.append(name)
        reason.append(2)
        spaces += 1
    elif "-" in name:
        iffy.append(name)
        reason.append(3)
        hyphens += 1
    else:
        clear.append(name)
        
print(f"Iffy Countries: {len(iffy)}\n{commas} Commas, {spaces} Spaces, {hyphens} Hyphens\n-------------------\nCountry   ---   Reason\n-----------------------\n")
for name in iffy:
    if reason[i] == 1:
        print(f'{i}. {iffy[i]}\n--- Comma\n')
    elif reason[i] == 2:
        print(f'{i}. {iffy[i]}\n--- Space\n')
    elif reason[i] == 3:
        print(f'{i}. {iffy[i]}\n--- Hyphen\n')
    i += 1

Iffy Countries: 40
2 Commas, 37 Spaces, 1 Hyphens
-------------------
Country   ---   Reason
-----------------------

0. Antigua and Barbuda
--- Space

1. Bolivia (Plurinational State of)
--- Space

2. Bosnia and Herzegovina
--- Space

3. Brunei Darussalam
--- Space

4. Cabo Verde
--- Space

5. Costa Rica
--- Space

6. Côte d'Ivoire
--- Space

7. Dominican Republic
--- Space

8. El Salvador
--- Space

9. Equatorial Guinea
--- Space

10. Eswatini (Kingdom of)
--- Space

11. Hong Kong, China (SAR)
--- Comma

12. Iran (Islamic Republic of)
--- Space

13. Korea (Republic of)
--- Space

14. Lao People's Democratic Republic
--- Space

15. Marshall Islands
--- Space

16. Micronesia (Federated States of)
--- Space

17. Moldova (Republic of)
--- Space

18. New Zealand
--- Space

19. North Macedonia
--- Space

20. Palestine, State of
--- Comma

21. Papua New Guinea
--- Space

22. Russian Federation
--- Space

23. Saint Kitts and Nevis
--- Space

24. Saint Lucia
--- Space

25. Saint Vincent and t