![Callysto.ca Banner](https://github.com/callysto/curriculum-notebooks/blob/master/callysto-notebook-banner-top.jpg?raw=true)

# United Nations



### Code: 

Run the code cells below to import the libraries we need for this project. Libraries are pre-made code that make it easier to analyze our data.

In [None]:
import pandas as pd
import plotly_express as px
from plotly.subplots import make_subplots
import folium
import geopandas as gpd
import plotly.graph_objs as go
import ipywidgets
from ipywidgets import interact
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.impute import SimpleImputer
from sklearn.metrics import r2_score
import warnings
import math
warnings.filterwarnings("ignore")
print("Libraries imported.")

In [None]:
HDI_components = pd.read_excel("https://raw.githubusercontent.com/callysto/data-files/main/SocialStudies/UnitedNations/HDI_components.xlsx")
HDI_inequality = pd.read_excel("https://raw.githubusercontent.com/callysto/data-files/main/SocialStudies/UnitedNations/HDI_inequality_adjusted.xlsx")
HDI_years = pd.read_excel("https://raw.githubusercontent.com/callysto/data-files/main/SocialStudies/UnitedNations/HDI_years.xlsx")

display(HDI_components, HDI_inequality, HDI_years)

### Examining HDI Trends

In [None]:
HDI_years

In [None]:
for index in HDI_years.index:
    print(f"Country Name: {HDI_years['Country'][index]}, Change in HDI Ranking: {HDI_years['Change in HDI rank 2015-2021'][index]}")

In [None]:
for col in HDI_years.columns[11:15]:
    try:
        highest_val = HDI_years[col].max()
        lowest_val = HDI_years[col].min()
    except:
        print(f"Values in {col} column are not numeric. Let's convert them to numeric.\n")
        HDI_years[col] = pd.to_numeric(HDI_years[col], errors='coerce')
        highest_val = HDI_years[col].max()
        lowest_val = HDI_years[col].min()

    index_highest_val = HDI_years.loc[HDI_years[col].idxmax()]['Country']
    index_lowest_val = HDI_years.loc[HDI_years[col].idxmin()]['Country']

    print(f"Highest {col}: {highest_val} Country: {index_highest_val} \nLowest {col}: {lowest_val} Country: {index_lowest_val}")

In [None]:
try:
    maximum_change = HDI_years['Change in HDI rank 2015-2021'].max()
    minimum_change = HDI_years['Change in HDI rank 2015-2021'].min()
except TypeError:
    print("Values in `Change in HDI rank 2015-2021` column are not numeric. Let's convert them to numeric.")

In [None]:
# Now we can find the max and min values
HDI_years['Change in HDI rank 2015-2021'] = pd.to_numeric(HDI_years['Change in HDI rank 2015-2021'], errors='coerce')

maximum_change = HDI_years['Change in HDI rank 2015-2021'].max()
minimum_change = HDI_years['Change in HDI rank 2015-2021'].min()

max_country = HDI_years.loc[HDI_years['Change in HDI rank 2015-2021'].idxmax()]['Country']
min_country = HDI_years.loc[HDI_years['Change in HDI rank 2015-2021'].idxmin()]['Country']

print(f"Maximum Change: {maximum_change}, Country: {max_country}")
print(f"Minimum Change: {minimum_change}, Country: {min_country}")

In [None]:
HDI_cols = HDI_years.columns[2:10]
for column in HDI_cols:
    HDI_years[column] = pd.to_numeric(HDI_years[column], errors='coerce')
    print(f"Maximum value in {column}: {HDI_years[column].max()}")
    max_hdi_country = HDI_years.loc[HDI_years[column].idxmax()]['Country']
    print(f"Country with the highest HDI: {max_hdi_country}\n")

In [None]:
def update_scatter_plot(selected_year):
    data = []
    for country in HDI_years['Country']:
        trace = go.Scatter(x=[country], y=[HDI_years.loc[HDI_years['Country'] == country, selected_year].values[0]], mode='markers', name=country)
        data.append(trace)

    layout = go.Layout(title=f'HDI Scatter Plot ({selected_year})', xaxis=dict(showticklabels=False, title='Country'), yaxis=dict(title='HDI'))
    selected_year_fig = go.Figure(data=data, layout=layout)
    selected_year_fig.show()

interact(update_scatter_plot, selected_year=HDI_cols)

In [None]:
countries_geojson = gpd.read_file('https://raw.githubusercontent.com/callysto/data-files/main/SocialStudies/UnitedNations/countries.geojson')
countries_geojson

In [None]:
geojson_country_names = countries_geojson['ADMIN']

hdi_country_names = HDI_years['Country']

matching_countries = set(geojson_country_names).intersection(hdi_country_names)
non_matching_countries = set(hdi_country_names) - matching_countries
non_matching_countries_geojson = set(geojson_country_names) - matching_countries

print(f'Non-matching geojson: {non_matching_countries_geojson}')
print(f'Non-matching dataframe countries: {non_matching_countries}')

In [None]:
no_HDI_years = HDI_years[HDI_years['HDI rank'].isnull()]
display(no_HDI_years)
HDI_years.dropna(subset=['HDI rank'], inplace=True)

In [None]:
mapping = {
    'Russian Federation': 'Russia',
    'Micronesia (Federated States of)': 'Federated States of Micronesia',
    'Cabo Verde': 'Cape Verde',
    "Korea (Democratic People's Rep. of)": 'North Korea',
    'North Macedonia': 'Macedonia',
    'Bahamas': 'The Bahamas',
    'Tanzania (United Republic of)': 'United Republic of Tanzania',
    'Türkiye': 'Turkey',
    'Serbia': 'Republic of Serbia',
    'Eswatini (Kingdom of)': 'Swaziland',
    'Guinea-Bissau': 'Guinea Bissau',
    'Timor-Leste': 'East Timor',
    "Lao People's Democratic Republic": 'Laos',
    'Congo': 'Republic of Congo',
    'Syrian Arab Republic': 'Syria',
    'Brunei Darussalam': 'Brunei',
    'Viet Nam': 'Vietnam',
    'Iran (Islamic Republic of)': 'Iran',
    'Czechia': 'Czech Republic',
    'Congo (Democratic Republic of the)': 'Democratic Republic of the Congo',
    'Bolivia (Plurinational State of)': 'Bolivia',
    'Moldova (Republic of)': 'Moldova',
    'Korea (Republic of)': 'South Korea',
    "Côte d'Ivoire": 'Ivory Coast',
    'Palestine, State of': 'Palestine',
    'Venezuela (Bolivarian Republic of)': 'Venezuela',
    'Hong Kong, China (SAR)': 'Hong Kong S.A.R.',
    'United States': 'United States of America'
}

HDI_years['Country'] = HDI_years['Country'].replace(mapping)

In [None]:
merged_data = pd.merge(HDI_years, countries_geojson, left_on='Country', right_on='ADMIN', how='left')
merged_data

In [None]:
HDI_by_country = ipywidgets.Output(layout={'border': '1px solid black'})

column_names = merged_data.columns[2:10].tolist()
dropdown_options = ipywidgets.Dropdown(
    options=column_names,
    value=column_names[0],
    description='Column:',
    disabled=False
)

def update_choropleth(change):
    HDI_by_country.clear_output()
    with HDI_by_country:
        m = folium.Map()
        folium.Choropleth(
            geo_data=countries_geojson,
            data=merged_data,
            columns=['ADMIN', dropdown_options.value],  
            key_on='feature.properties.ADMIN',  
            fill_color='YlGn',
            fill_opacity=0.7,
            line_opacity=0.2,
            legend_name=f'{dropdown_options.value} per Country',
        ).add_to(m)
        display(m)

dropdown_options.observe(update_choropleth, names='value')
display(dropdown_options)
update_choropleth({'new': column_names[0]})

HDI_by_country

In [None]:
HDI_components

In [None]:
HDI_components.rename(columns={'HDI rank.1': 'HDI rank 2020', 'Human Development Index (HDI) ': 'Human Development Index (HDI)'}, inplace=True)
HDI_components

In [None]:
no_HDI = HDI_components[HDI_components['HDI rank'].isnull()]
no_HDI

In [None]:
for col in HDI_components.columns[2:7]:
    try:
        HDI_components[col].max()
        HDI_components[col].min()
        HDI_components[col].mean()
    except:
        print(f"Values in {col} column are not numeric. Let's convert them to numeric.\n")
        HDI_components[col] = pd.to_numeric(HDI_components[col], errors='coerce')
        HDI_components[col].max()
        HDI_components[col].min()
        HDI_components[col].mean()
    
    max_country = HDI_components.loc[HDI_components[col].idxmax()]['Country']
    min_country = HDI_components.loc[HDI_components[col].idxmin()]['Country']
    print(f"Maximum {col}: {HDI_components[col].max()}, Country: {max_country}")
    print(f"Minimum {col}: {HDI_components[col].min()}, Country: {min_country}")
    print(f"Mean {col}: {HDI_components[col].mean()}\n")

In [None]:
try:
    counts = HDI_components['GNI per capita rank minus HDI rank'].apply(lambda x: 'positive' if x > 0 else 'negative').value_counts()
    print("Values are not numeric. Let's convert them to numeric. Converting...\n")
except:
    HDI_components['GNI per capita rank minus HDI rank'] = pd.to_numeric(HDI_components['GNI per capita rank minus HDI rank'], errors='coerce')
    counts = HDI_components['GNI per capita rank minus HDI rank'].apply(lambda x: 'positive' if x > 0 else 'negative').value_counts()
    positive_count = counts.get('positive', 0)
    negative_count = counts.get('negative', 0)

print("Number of Countries with a GNI ranking lower than their HDI ranking", positive_count)
print("Number of Countries with a GNI ranking higher than their HDI ranking:", negative_count)

In [None]:
rankings_fig = make_subplots(rows=1, cols=2, subplot_titles=("GNI per Capita Rank (without HDI rank) per Country", "HDI Rank per Country"))

rankings_fig.add_trace(go.Scatter(x=HDI_components['Country'], y=HDI_components['GNI per capita rank minus HDI rank'], mode='markers', name='GNI Fig'), row=1, col=1)
rankings_fig.add_trace(go.Scatter(x=HDI_components['Country'], y=HDI_components['HDI rank'], mode='markers', name='HDI Fig'), row=1, col=2)

rankings_fig.update_traces(hovertemplate='Country: %{x}<br>GNI per capita rank minus HDI rank: %{y}', 
                           row=1, col=1)

rankings_fig.update_traces(hovertemplate='Country: %{x}<br>HDI rank: %{y}', 
                           row=1, col=2)

rankings_fig.update_layout(title_text="Comparison of GNI per capita rank minus HDI rank and HDI rank for Different Countries",
                  showlegend=False)

rankings_fig.update_yaxes(title_text="GNI per capita rank minus HDI rank", row=1, col=1)
rankings_fig.update_yaxes(title_text="HDI rank", row=1, col=2)
rankings_fig.show()

In [None]:
no_HDI = HDI_components[HDI_components['HDI rank'].notnull()]
features = ['Life expectancy at birth (years)', 'Expected years of schooling (years)', 'Mean years of schooling (years)', 'Gross national income (GNI) per capita (2017 PPP $)']

X = no_HDI[features]
y = no_HDI['Human Development Index (HDI)']

imputer = SimpleImputer(strategy='mean')
X = imputer.fit_transform(X)

data = pd.concat([pd.DataFrame(X, columns=features), y], axis=1)
data = data.dropna()

X = data[features]
y = data['Human Development Index (HDI)']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = LinearRegression()
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

r2 = r2_score(y_test, y_pred)
print("R-squared:", r2)

In [None]:
import math

def get_index(value, min_val, max_val, is_income_index=False):
    if is_income_index:
        return (math.log(value) - math.log(min_val)) / (math.log(max_val) - math.log(min_val))
    else:
        return (value - min_val) / (max_val - min_val)

def calculate_index(life_expectancy, expected_schooling, mean_schooling, gni_per_capita):
    variables = [
        ('Life expectancy', life_expectancy, 20, 85),
        ('Expected schooling', expected_schooling, 0, 18),
        ('Mean schooling', mean_schooling, 0, 15),
        ('GNI per capita', gni_per_capita, 100, 75000)
    ]

    for i, (variable_name, value, min_val, max_val) in enumerate(variables):
        if value < min_val:
            variables[i] = (variable_name, min_val, min_val, max_val)
        elif value > max_val:
            variables[i] = (variable_name, max_val, min_val, max_val)

    life_expectancy, expected_schooling, mean_schooling, gni_per_capita = [v[1] for v in variables]

    health_index = get_index(life_expectancy, 20, 85)
    expected_schooling_index = get_index(expected_schooling, 0, 18)
    mean_schooling_index = get_index(mean_schooling, 0, 15)
    income_index = get_index(gni_per_capita, 100, 75000, True)
    return health_index, expected_schooling_index, mean_schooling_index, income_index

def calculate_hdi(life_expectancy, expected_schooling, mean_schooling, gni_per_capita):
    health_index, expected_schooling_index, mean_schooling_index, income_index = calculate_index(
        life_expectancy, expected_schooling, mean_schooling, gni_per_capita
    )

    index_education = (expected_schooling_index + mean_schooling_index) / 2
    hdi = (health_index * index_education * income_index) ** (1 / 3)
    return round(hdi, 3)

In [None]:
# Example usage for Switzerland, a valid HDI score
life_expectancy = 83.9872
expected_schooling = 16.500299
mean_schooling = 13.85966
gni_per_capita = 66933.00454	

hdi = calculate_hdi(life_expectancy, expected_schooling, mean_schooling, gni_per_capita)
print(f"Switzerland HDI score: {hdi}")

# Example usage for an invalid HDI score
life_expectancy = 5
expected_schooling = 20
mean_schooling = 8
gni_per_capita = 80000

nonvalid_hdi = calculate_hdi(life_expectancy, expected_schooling, mean_schooling, gni_per_capita)
print(f"Calculated HDI score (should be 0): {nonvalid_hdi}")

In [None]:
australia = HDI_components.loc[HDI_components['Country'] == 'Australia']
south_sudan = HDI_components.loc[HDI_components['Country'] == 'South Sudan']

australia_hdi = calculate_hdi(australia['Life expectancy at birth (years)'].values[0], australia['Expected years of schooling (years)'].values[0],
                              australia['Mean years of schooling (years)'].values[0], australia['Gross national income (GNI) per capita (2017 PPP $)'].values[0])
south_sudan_hdi = calculate_hdi(south_sudan['Life expectancy at birth (years)'].values[0], south_sudan['Expected years of schooling (years)'].values[0],
                               south_sudan['Mean years of schooling (years)'].values[0], south_sudan['Gross national income (GNI) per capita (2017 PPP $)'].values[0])

predicted_australia = model.predict(australia[features])
predicted_south_sudan = model.predict(south_sudan[features])
print(f"Australia Predicted HDI score (expected value high): {predicted_australia[0]}")
print(f"South Sudan Predicted HDI score (expected value low): {predicted_south_sudan[0]}\n")

print(f"Australia Actual HDI score: {australia_hdi}")
print(f"South Sudan Actual HDI score: {south_sudan_hdi}")

In [None]:
X_test = imputer.fit_transform(no_HDI[features])

no_HDI['Predicted HDI'] = model.predict(X_test)
ml_fig = make_subplots(rows=1, cols=2, subplot_titles=("Actual HDI Value", "Predicted HDI Value"))

ml_fig.add_trace(go.Scatter(x=no_HDI['Country'], y=no_HDI['Human Development Index (HDI)'], name="Actual HDI Value"), row=1, col=1)
ml_fig.add_trace(go.Scatter(x=no_HDI['Country'], y=no_HDI['Predicted HDI'], name="Predicted HDI Value"), row=1, col=2)

ml_fig.update_layout(title_text="Actual vs Predicted HDI Value",
                  showlegend=True)

ml_fig.show()

In [None]:
null_countries = HDI_components[HDI_components['HDI rank'].isnull()]
null_countries

In [None]:
null_countries = HDI_components[HDI_components['HDI rank'].isnull()]

features = ['Life expectancy at birth (years)', 'Expected years of schooling (years)', 'Mean years of schooling (years)', 'Gross national income (GNI) per capita (2017 PPP $)']
imputer = SimpleImputer(strategy='mean')
X_test = imputer.fit_transform(HDI_components[features])

HDI_components['Predicted HDI'] = model.predict(X_test)

null_countries_fig = go.Figure()
for index, row in null_countries.iterrows():
    null_countries_fig.add_trace(go.Scatter(x=[row['Country']], y=[row['Predicted HDI']], mode='markers', name=row['Country']))

null_countries_fig.update_layout(title_text="Predicted HDI Values for Countries without HDI Values",
                  xaxis_title="Country",
                  yaxis_title="Predicted HDI Value")

null_countries_fig.show()

In [None]:
HDI_inequality