In [1]:
import pandas as pd
import os

pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)
pd.set_option('display.width', None)
pd.set_option('display.expand_frame_repr', False)

In [2]:
import numpy as np
from sklearn.model_selection import KFold
from sklearn.impute import KNNImputer
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.ensemble import BaggingRegressor
from sklearn.tree import DecisionTreeRegressor
from statsmodels.stats.outliers_influence import variance_inflation_factor
from statsmodels.tools.tools import add_constant
import re

In [3]:
folder_loc = 'F:/class/BANA 698/week 8'


file_list = [f for f in os.listdir(folder_loc) if os.path.isfile(os.path.join(folder_loc, f))]
for file in file_list:
    print(file)

#week 8 - y.lnk
basic_stats_Group1DatasetCleaned.knn.standarddeviation.csv
basic_stats_Group1DatasetRaw.onehotted.csv
basic_stats_Group1Dataset_LCU_removed(except_exchange_rate).income.groups.regions.onehotted.csv
correlations.all.csv
correlations.all.to.life_expectancy_total.csv
Group1DatasetCleaned.knn.standarddeviation.csv
Group1DatasetRaw.csv
Group1DatasetRaw.onehotted.csv
Group1Dataset_LCU_removed(except_exchange_rate).income.groups.regions.csv
Group1Dataset_LCU_removed(except_exchange_rate).income.groups.regions.onehotted.20percent.nans.dropped.csv
Group1Dataset_LCU_removed(except_exchange_rate).income.groups.regions.onehotted.csv
ImputationAndStandardize.ipynb
README.md


In [4]:
file = 'Group1DatasetRaw.onehotted.csv'
df = pd.read_csv(os.path.join(folder_loc, file))

In [5]:
#for col in sorted(df.columns): print(col)

In [6]:
target = 'Life expectancy at birth, total (years)'

cols_to_exclude = [
    'Life expectancy at birth, total (years)',
    'Life expectancy at birth, female (years)',
    'Life expectancy at birth, male (years)',
    'CountryShortName',
    'Year'
]


In [7]:
bag_params = {
    'bootstrap': True,
    'bootstrap_features': False,
    'estimator': DecisionTreeRegressor(),
    'max_features': 1.0,
    'max_samples': 1.0,
    'n_estimators': 10,
    'n_jobs': -1,
    'oob_score': False,
    'random_state': 1,
    'verbose': 0,
    'warm_start': False
 }
#got default parameters from running:
#model = BaggingRegressor()
#model.get_params()

In [8]:
def bagging_features(cols_to_exclude = cols_to_exclude, target = target, df = df, kfold_random_state = 1, n_neighbors = 5):
    X = df.drop(columns = cols_to_exclude)
    y = df[target]

    imputer = KNNImputer(n_neighbors = n_neighbors)
    model = BaggingRegressor(**bag_params)

    cv = KFold(n_splits=5, shuffle=True, random_state = kfold_random_state)

    mae_scores = []
    mse_scores = []
    r2_scores = []
    feature_importance_list = []

    for train_idx, val_idx in cv.split(X):
        X_train, X_val = X.iloc[train_idx], X.iloc[val_idx]
        y_train, y_val = y.iloc[train_idx], y.iloc[val_idx]
        
        X_train_imputed = imputer.fit_transform(X_train)
        X_val_imputed = imputer.transform(X_val)
        
        X_train_imputed = pd.DataFrame(X_train_imputed, columns=X_train.columns)
        X_val_imputed = pd.DataFrame(X_val_imputed, columns=X_val.columns)
        
        model.fit(X_train_imputed, y_train)
        y_pred = model.predict(X_val_imputed)

        mae = mean_absolute_error(y_val, y_pred)
        mse = mean_squared_error(y_val, y_pred)
        r2 = r2_score(y_val, y_pred)        
        mae_scores.append(mae)
        mse_scores.append(mse)
        r2_scores.append(r2)
        
        feature_importance_fold = np.mean([tree.feature_importances_ for tree in model.estimators_], axis=0)
        feature_importance_list.append(feature_importance_fold)


    avg_mae = np.mean(mae_scores)
    avg_mse = np.mean(mse_scores)
    avg_r2 = np.mean(r2_scores)

    print(f'Average MAE: {avg_mae:.4f}')
    print(f'Average MSE: {avg_mse:.4f}')
    print(f'Average R²: {avg_r2:.4f}')

    avg_feature_importance = np.mean(feature_importance_list, axis=0)
    feature_importance_df = pd.DataFrame({
        'Feature': X.columns,
        'Importance': avg_feature_importance
    })
    feature_importance_df = feature_importance_df.sort_values(by='Importance', ascending=False).reset_index(drop=True)
    feature_importance_df['Rank'] = feature_importance_df.index + 1
    feature_importance_df = feature_importance_df[['Feature', 'Importance', 'Rank']]

    print('\nSorted Average Feature Importance:')
    print(feature_importance_df.to_string(index=False))

In [9]:
#first run, with all features
bagging_features()

Average MAE: 0.3837
Average MSE: 0.3676
Average R²: 0.9947

Sorted Average Feature Importance:
                                                                                                              Feature   Importance  Rank
                                                              Mortality rate, adult, female (per 1,000 female adults) 6.499488e-01     1
                                                                Mortality rate, under-5, male (per 1,000 live births) 1.415335e-01     2
                                                                                 Death rate, crude (per 1,000 people) 7.212093e-02     3
                                                                      Mortality rate, under-5 (per 1,000 live births) 5.605925e-02     4
                                                              Mortality rate, under-5, female (per 1,000 live births) 4.233866e-02     5
                                                         Domestic private health ex

In [None]:
mortality_cols_to_exclude = [
    'Death rate, crude (per 1,000 people)',
    'Lifetime risk of maternal death (%)',
    'Lifetime risk of maternal death (1 in: rate varies by country)',
    'Maternal mortality ratio (modeled estimate, per 100,000 live births)',
    'Number of maternal deaths',
    'Number of deaths ages 5-9 years',
    'Number of under-five deaths',
    'Number of infant deaths',
    'Number of deaths ages 10-14 years',
    'Number of neonatal deaths',
    'Number of deaths ages 15-19 years',
    'Number of deaths ages 20-24 years',
    'Mortality rate, adult, female (per 1,000 female adults)',
    'Mortality rate, infant, male (per 1,000 live births)',
    'Mortality rate, infant (per 1,000 live births)',
    'Mortality rate, adult, male (per 1,000 male adults)',
    'Mortality rate, infant, female (per 1,000 live births)',
    'Mortality rate, under-5, male (per 1,000 live births)',
    'Mortality rate, under-5 (per 1,000 live births)',
    'Mortality rate, under-5, female (per 1,000 live births)',
    'Mortality rate, neonatal (per 1,000 live births)',
    'Probability of dying among youth ages 20-24 years (per 1,000)',
    'Probability of dying among adolescents ages 15-19 years (per 1,000)',
    'Probability of dying among children ages 5-9 years (per 1,000)',
    'Probability of dying among adolescents ages 10-14 years (per 1,000)'   
]

In [None]:
#second run, but dropping redundant features
addl_cols_to_exclude = [
    #features in column "Feature_1" from correlation_to_all_lister.ipynb
    'Total greenhouse gas emissions including LULUCF (Mt CO2e)',
    'GDP, PPP (constant 2021 international $)',
    'General government final consumption expenditure (constant 2015 US$)',
    'GNI, PPP (current international $)',
    'Carbon dioxide (CO2) emissions (total) excluding LULUCF (Mt CO2e)',
    'GDP, PPP (current international $)',
    'Cereal production (metric tons)',
    'Methane (CH4) emissions (total) excluding LULUCF (Mt CO2e)',
    'Life expectancy at birth, female (years)',
    'Adjusted net national income (current US$)',
    'Fluorinated greenhouse gases (F-gases) emissions from Industrial Processes (Mt CO2e)',
    'Primary education, pupils',
    'Final consumption expenditure (constant 2015 US$)',
    'Adjusted net national income per capita (current US$)',
    'Number of deaths ages 15-19 years',
    'Self-employed, female (% of female employment) (modeled ILO estimate)',
    'Nitrous oxide (N2O) emissions (total) excluding LULUCF (Mt CO2e)',
    'Self-employed, male (% of male employment) (modeled ILO estimate)',
    'Mortality rate, infant (per 1,000 live births)',
    'GDP (constant 2015 US$)',
    'Life expectancy at birth, total (years)',
    'Carbon dioxide (CO2) emissions from Power Industry (Energy) (Mt CO2e)',
    'Carbon dioxide (CO2) emissions from Industrial Combustion (Energy) (Mt CO2e)',
    'Land under cereal production (hectares)',
    'GDP per capita, PPP (constant 2021 international $)',
    'Methane (CH4) emissions from Industrial Combustion (Energy) (Mt CO2e)',
    'Self-employed, total (% of total employment) (modeled ILO estimate)',
    'Carbon dioxide (CO2) emissions from Fugitive Emissions (Energy) (Mt CO2e)',
    'Carbon dioxide (CO2) emissions from Building (Energy) (Mt CO2e)',
    'Number of deaths ages 10-14 years',
    'Methane (CH4) emissions from Agriculture (Mt CO2e)',
    'Number of deaths ages 20-24 years',
    'GNI, Atlas method (current US$)',
    'Prevalence of anemia among children (% of children ages 6-59 months)',
    'Nitrous oxide (N2O) emissions from Industrial Combustion (Energy) (Mt CO2e)',
    'Rural population',
    'Nitrous oxide (N2O) emissions from Agriculture (Mt CO2e)',
    'GNI per capita, Atlas method (current US$)',
    'Methane (CH4) emissions from Waste (Mt CO2e)',
    'Mortality rate, infant, female (per 1,000 live births)',
    'Mortality rate, infant, male (per 1,000 live births)',
    'Vulnerable employment, female (% of female employment) (modeled ILO estimate)',
    'Nitrous oxide (N2O) emissions from Power Industry (Energy) (Mt CO2e)',
    'GNI (current US$)',
    'GDP per capita (current US$)',
    'GDP per capita (constant 2015 US$)',
    'Population ages 0-14 (% of total population)',
    'Vulnerable employment, male (% of male employment) (modeled ILO estimate)',
    'Carbon dioxide (CO2) emissions from Industrial Processes (Mt CO2e)',
    'Birth rate, crude (per 1,000 people)',
    'Mortality rate, under-5 (per 1,000 live births)',
    'Access to electricity (% of population)',
    'Life expectancy at birth, male (years)',
    'Mortality rate, neonatal (per 1,000 live births)',
    'Nitrous oxide (N2O) emissions from Building (Energy) (Mt CO2e)',
    'Number of deaths ages 5-9 years',
    'Nitrous oxide (N2O) emissions from Waste (Mt CO2e)',
    'Exports of goods, services and primary income (BoP, current US$)',
    'Access to electricity, rural (% of rural population)',
    'Aquaculture production (metric tons)',
    'Vulnerable employment, total (% of total employment) (modeled ILO estimate)',
    'Current health expenditure per capita (current US$)',
    'Population ages 0-14, female (% of female population)',
    'Total fisheries production (metric tons)',
    'Mortality rate, under-5, female (per 1,000 live births)',
    'GDP (current US$)',
    'Unemployment, male (% of male labor force) (modeled ILO estimate)',
    'Unemployment, female (% of female labor force) (modeled ILO estimate)',
    'Out-of-pocket expenditure per capita (current US$)',
    'People using at least basic sanitation services, rural (% of rural population)',
    'Number of infant deaths',
    'Population ages 0-14, male',
    'Population ages 0-14, male (% of male population)',
    'School enrollment, primary, female (% gross)',
    'Population ages 10-14, female (% of female population)',
    'Population ages 10-14, male (% of male population)',
    'Mortality rate, under-5, male (per 1,000 live births)',
    'Population ages 0-14, female',
    'Access to clean fuels and technologies for cooking (% of population)',
    'Total alcohol consumption per capita (liters of pure alcohol, projected estimates, 15+ years of age)',
    'GDP per capita, PPP (current international $)',
    'Employment to population ratio, ages 15-24, female (%) (modeled ILO estimate)',
    'Domestic credit to private sector (% of GDP)',
    'Employment to population ratio, ages 15-24, male (%) (modeled ILO estimate)',
    'Methane (CH4) emissions from Power Industry (Energy) (Mt CO2e)',
    'Unemployment, total (% of total labor force) (modeled ILO estimate)',
    'Current health expenditure per capita, PPP (current international $)',
    'Forest area (sq. km)',
    'GNI per capita, PPP (current international $)',
    'Employment to population ratio, 15+, female (%) (modeled ILO estimate)',
    'Carbon dioxide (CO2) emissions from Transport (Energy) (Mt CO2e)',
    'Wage and salaried workers, female (% of female employment) (modeled ILO estimate)',
    'Prevalence of anemia among non-pregnant women (% of women ages 15-49)',
    'Maternal mortality ratio (modeled estimate, per 100,000 live births)',
    'Methane (CH4) emissions from Building (Energy) (Mt CO2e)',
    'Methane (CH4) emissions from Fugitive Emissions (Energy) (Mt CO2e)',
    'Prevalence of anemia among pregnant women (%)',
    'Urban population',
    'School enrollment, primary, male (% gross)',
    'Unemployment, youth male (% of male labor force ages 15-24) (modeled ILO estimate)',
    'Unemployment, youth female (% of female labor force ages 15-24) (modeled ILO estimate)',
    'Probability of dying among adolescents ages 10-14 years (per 1,000)',
    'Rural population (% of total population)',
    'Total greenhouse gas emissions excluding LULUCF (Mt CO2e)',
    'Total alcohol consumption per capita, female (liters of pure alcohol, projected estimates, female 15+ years of age)',
    'Share of youth not in education, employment or training, female (% of female youth population) (modeled ILO estimate)',
    'Probability of dying among adolescents ages 15-19 years (per 1,000)',
    'Methane (CH4) emissions from Transport (Energy) (Mt CO2e)',
    'Population ages 15-19, female (% of female population)',
    'People using at least basic sanitation services, urban (% of urban population)',
    'Carbon dioxide (CO2) emissions excluding LULUCF per capita (t CO2e/capita)',
    'Communications, computer, etc. (% of service exports, BoP)',
    'Communications, computer, etc. (% of service imports, BoP)',
    'Domestic general government health expenditure per capita (current US$)',
    'Domestic private health expenditure per capita (current US$)',
    'Employment to population ratio, 15+, male (%) (modeled ILO estimate)',
    'Employment to population ratio, 15+, total (%) (modeled ILO estimate)',
    'Employment to population ratio, ages 15-24, total (%) (modeled ILO estimate)',
    'GDP growth (annual %)',
    'Immunization, DPT (% of children ages 12-23 months)',
    'Inflation, GDP deflator (annual %)',
    'Labor force participation rate for ages 15-24, female (%) (modeled ILO estimate)',
    'Labor force participation rate for ages 15-24, male (%) (modeled ILO estimate)',
    'Labor force participation rate, female (% of female population ages 15+) (modeled ILO estimate)',
    'Labor force participation rate, female (% of female population ages 15-64) (modeled ILO estimate)',
    'Labor force, female (% of total labor force)',
    'Land area (sq. km)',
    'Lifetime risk of maternal death (%)',
    'Monetary Sector credit to private sector (% GDP)',
    'Mortality rate, adult, female (per 1,000 female adults)',
    'Nitrous oxide (N2O) emissions from Industrial Processes (Mt CO2e)',
    'Number of maternal deaths',
    'Number of neonatal deaths',
    'Out-of-pocket expenditure (% of current health expenditure)',
    'People using at least basic drinking water services, urban (% of urban population)',
    'Wage and salaried workers, male (% of male employment) (modeled ILO estimate)'
]
new_cols_to_exclude = cols_to_exclude + mortality_cols_to_exclude + addl_cols_to_exclude
new_cols_to_exclude = list(set(new_cols_to_exclude))
bagging_features(new_cols_to_exclude)

Average MAE: 0.6989
Average MSE: 1.3180
Average R²: 0.9811

Sorted Average Feature Importance:
                                                                                                          Feature   Importance  Rank
                                                People using at least basic sanitation services (% of population) 6.273059e-01     1
                         Domestic general government health expenditure per capita, PPP (current international $) 1.196438e-01     2
                                                                                        Region_Sub-Saharan Africa 5.415043e-02     3
                                                             Access to electricity, urban (% of urban population) 2.438145e-02     4
                                 Prevalence of stunting, height for age (modeled estimate, % of children under 5) 1.934422e-02     5
                                                            Domestic credit to private sector by banks (% o

In [None]:
#third run, but dropping different redundant features
addl_cols_to_exclude = [
    #features in column "Feature_2" from correlation_to_all_lister.ipynb
    'Total greenhouse gas emissions excluding LULUCF (Mt CO2e)',
    'Nitrous oxide (N2O) emissions from Waste (Mt CO2e)',
    'Urban population',
    'Labor force, total',
    'Nitrous oxide (N2O) emissions from Transport (Energy) (Mt CO2e)',
    'Methane (CH4) emissions from Power Industry (Energy) (Mt CO2e)',
    'Carbon dioxide (CO2) emissions from Power Industry (Energy) (Mt CO2e)',
    'Carbon dioxide (CO2) emissions from Building (Energy) (Mt CO2e)',
    'GDP (current US$)',
    'GNI (current US$)',
    'Mortality rate, under-5, male (per 1,000 live births)',
    'Nitrous oxide (N2O) emissions from Power Industry (Energy) (Mt CO2e)',
    'GNI, Atlas method (current US$)',
    'Nitrous oxide (N2O) emissions from Agriculture (Mt CO2e)',
    'Nitrous oxide (N2O) emissions (total) excluding LULUCF (Mt CO2e)',
    'Wage and salaried workers, total (% of total employment) (modeled ILO estimate)',
    'Carbon dioxide (CO2) emissions from Transport (Energy) (Mt CO2e)',
    'Nitrous oxide (N2O) emissions from Building (Energy) (Mt CO2e)',
    'Nitrous oxide (N2O) emissions from Industrial Combustion (Energy) (Mt CO2e)',
    'Mortality rate, under-5 (per 1,000 live births)',
    'Carbon dioxide (CO2) emissions (total) excluding LULUCF (Mt CO2e)',
    'Wage and salaried workers, male (% of male employment) (modeled ILO estimate)',
    'Wage and salaried workers, female (% of female employment) (modeled ILO estimate)',
    'Methane (CH4) emissions from Waste (Mt CO2e)',
    'GDP (constant 2015 US$)',
    'Domestic general government health expenditure per capita, PPP (current international $)',
    'Mortality rate, under-5, female (per 1,000 live births)',
    'Population ages 0-14, total',
    'Probability of dying among children ages 5-9 years (per 1,000)',
    'Number of under-five deaths',
    'Number of neonatal deaths',
    'Population ages 0-14, male',
    'Vulnerable employment, total (% of total employment) (modeled ILO estimate)',
    'Carbon dioxide (CO2) emissions from Industrial Processes (Mt CO2e)',
    'Mortality rate, infant, male (per 1,000 live births)',
    'Unemployment, youth total (% of total labor force ages 15-24) (modeled ILO estimate)',
    'GDP per capita (current US$)',
    'Cereal production (metric tons)',
    'Current health expenditure per capita (current US$)',
    'Methane (CH4) emissions from Fugitive Emissions (Energy) (Mt CO2e)',
    'Domestic general government health expenditure per capita (current US$)',
    'Methane (CH4) emissions (total) excluding LULUCF (Mt CO2e)',
    'Mortality rate, neonatal (per 1,000 live births)',
    'Methane (CH4) emissions from Industrial Combustion (Energy) (Mt CO2e)',
    'GDP per person employed (constant 2021 PPP $)',
    'Number of deaths ages 20-24 years',
    'Population ages 10-14, female (% of female population)',
    'Vulnerable employment, male (% of male employment) (modeled ILO estimate)',
    'Carbon dioxide (CO2) emissions from Industrial Combustion (Energy) (Mt CO2e)',
    'Current health expenditure per capita, PPP (current international $)',
    'Total greenhouse gas emissions including LULUCF (Mt CO2e)',
    'Fluorinated greenhouse gases (F-gases) emissions from Industrial Processes (Mt CO2e)',
    'Population ages 10-14, male (% of male population)',
    'GDP, PPP (current international $)',
    'Population ages 0-14, female',
    'People using at least basic sanitation services (% of population)',
    'Number of infant deaths',
    'Mortality rate, adult, male (per 1,000 male adults)',
    'Number of deaths ages 5-9 years',
    'Prevalence of anemia among women of reproductive age (% of women ages 15-49)',
    'Population ages 0-14, male (% of male population)',
    'Probability of dying among adolescents ages 10-14 years (per 1,000)',
    'Vulnerable employment, female (% of female employment) (modeled ILO estimate)',
    'Mortality rate, infant, female (per 1,000 live births)',
    'Mortality rate, infant (per 1,000 live births)',
    'Mortality rate, adult, female (per 1,000 female adults)',
    'GDP, PPP (constant 2021 international $)',
    'GNI per capita, Atlas method (current US$)',
    'Labor force participation rate for ages 15-24, total (%) (modeled ILO estimate)',
    'GNI, PPP (current international $)',
    'Unemployment, youth male (% of male labor force ages 15-24) (modeled ILO estimate)',
    'Population ages 15-19, male (% of male population)',
    'People using at least basic sanitation services, urban (% of urban population)',
    'Population ages 0-14, female (% of female population)',
    'Adjusted net national income (current US$)',
    'Carbon dioxide (CO2) emissions from Fugitive Emissions (Energy) (Mt CO2e)',
    'Domestic credit to private sector by banks (% of GDP)',
    'GDP per capita, PPP (current international $)',
    'Prevalence of anemia among pregnant women (%)',
    'Unemployment, total (% of total labor force) (modeled ILO estimate)',
    'GDP per capita (constant 2015 US$)',
    'People using at least basic drinking water services (% of population)',
    'School enrollment, primary (% gross)',
    'Self-employed, total (% of total employment) (modeled ILO estimate)',
    'Surface area (sq. km)',
    'Total alcohol consumption per capita, male (liters of pure alcohol, projected estimates, male 15+ years of age)',
    'Employment to population ratio, ages 15-24, total (%) (modeled ILO estimate)',
    'Population ages 15-19, female (% of female population)',
    'Labor force participation rate, female (% of female population ages 15-64) (modeled ILO estimate)',
    'Land under cereal production (hectares)',
    'Methane (CH4) emissions from Transport (Energy) (Mt CO2e)',
    'Nitrous oxide (N2O) emissions from Industrial Processes (Mt CO2e)',
    'Life expectancy at birth, total (years)',
    'Out-of-pocket expenditure per capita, PPP (current international $)',
    'Total alcohol consumption per capita, female (liters of pure alcohol, projected estimates, female 15+ years of age)',
    'Life expectancy at birth, male (years)',
    'Total fisheries production (metric tons)',
    'Domestic private health expenditure per capita, PPP (current international $)',
    'Domestic private health expenditure (% of current health expenditure)',
    'Total greenhouse gas emissions per capita excluding LULUCF (t CO2e/capita)',
    'Unemployment, youth female (% of female labor force ages 15-24) (modeled ILO estimate)',
    'Land area (sq. km)',
    'Maternal mortality ratio (modeled estimate, per 100,000 live births)',
    'Methane (CH4) emissions from Agriculture (Mt CO2e)',
    'Computer, communications and other services (% of commercial service imports)',
    'Urban population (% of total population)',
    'Computer, communications and other services (% of commercial service exports)',
    'Monetary Sector credit to private sector (% GDP)',
    'Access to electricity, urban (% of urban population)',
    'Access to electricity (% of population)',
    'Share of youth not in education, employment or training, total (% of youth population) (modeled ILO estimate)',
    'School enrollment, primary, male (% gross)',
    'Self-employed, male (% of male employment) (modeled ILO estimate)',
    'GDP per capita growth (annual %)',
    'Inflation, GDP deflator: linked series (annual %)',
    'Population ages 0-14 (% of total population)',
    'GNI per capita, PPP (current international $)',
    'Labor force participation rate for ages 15-24, female (%) (modeled ILO estimate)',
    'Labor force participation rate for ages 15-24, male (%) (modeled ILO estimate)',
    'Number of deaths ages 15-19 years',
    'Labor force participation rate, female (% of female population ages 15+) (modeled ILO estimate)',
    'GDP per capita, PPP (constant 2021 international $)',
    'Access to clean fuels and technologies for cooking, urban (% of urban population)',
    'Immunization, measles (% of children ages 12-23 months)',
    'Prevalence of anemia among non-pregnant women (% of women ages 15-49)',
    'Labor force participation rate, male (% of male population ages 15+) (modeled ILO estimate)',
    'Final consumption expenditure (constant 2015 US$)',
    'Labor force participation rate, total (% of total population ages 15+) (modeled ILO estimate)',
    'Probability of dying among youth ages 20-24 years (per 1,000)',
    'Ratio of female to male labor force participation rate (%) (modeled ILO estimate)',
    'Rural population',
    'Labor force participation rate, total (% of total population ages 15-64) (modeled ILO estimate)',
    'Access to clean fuels and technologies for cooking, rural (% of rural population)'
]
new_cols_to_exclude = cols_to_exclude + mortality_cols_to_exclude + addl_cols_to_exclude
new_cols_to_exclude = list(set(new_cols_to_exclude))
bagging_features(new_cols_to_exclude)

Average MAE: 0.6931
Average MSE: 1.1971
Average R²: 0.9828

Sorted Average Feature Importance:
                                                                                                              Feature   Importance  Rank
                                                                 Access to electricity, rural (% of rural population) 2.319018e-01     1
                                                                                 Birth rate, crude (per 1,000 people) 1.829953e-01     2
                                                                                            Region_Sub-Saharan Africa 1.549752e-01     3
                                                 Prevalence of anemia among children (% of children ages 6-59 months) 1.262994e-01     4
                                                         Domestic private health expenditure per capita (current US$) 8.571319e-02     5
                                                                Adjusted net nation