# Analysis of Overall Healthcare Worldwide

In [1]:
# Dependencies and Setup
import pandas as pd
import matplotlib.pyplot as plt
import scipy.stats as st
import numpy as np

# Study data files
country_data_path = "Data/final.csv"
doctors_data_path = "Data/Medical_doctors.csv"
population_data_path = "Data/population.csv"

# Read the mouse data and the study results
country_data = pd.read_csv(country_data_path)
doctors_data = pd.read_csv(doctors_data_path)
population_data = pd.read_csv(population_data_path)

## Data Cleaning

### Understanding the data - Pre-cleaning

In [2]:
# Check "final" dataset info
country_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9928 entries, 0 to 9927
Data columns (total 22 columns):
 #   Column                                   Non-Null Count  Dtype  
---  ------                                   --------------  -----  
 0   Country                                  9928 non-null   object 
 1   Year                                     9928 non-null   int64  
 2   Gender                                   9928 non-null   object 
 3   Life expectancy                          9928 non-null   float64
 4   Unemployment                             9928 non-null   float64
 5   Infant Mortality                         9928 non-null   float64
 6   GDP                                      9928 non-null   float64
 7   GNI                                      9928 non-null   float64
 8   Clean fuels and cooking technologies     9928 non-null   float64
 9   Per Capita                               9928 non-null   float64
 10  Mortality caused by road traffic injury  9928 no

In [3]:
# Check "Medical_doctors" dataset info
doctors_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3583 entries, 0 to 3582
Data columns (total 7 columns):
 #   Column                                        Non-Null Count  Dtype  
---  ------                                        --------------  -----  
 0   Country                                       3583 non-null   object 
 1   Year                                          3583 non-null   int64  
 2   Medical doctors (per 10 000 population)       2844 non-null   float64
 3   Medical doctors (number)                      3410 non-null   float64
 4   Generalist medical practitioners (number)     1279 non-null   float64
 5   Specialist medical practitioners (number)     1195 non-null   float64
 6   Medical doctors not further defined (number)  2780 non-null   float64
dtypes: float64(5), int64(1), object(1)
memory usage: 196.1+ KB


In [4]:
# Check "population" dataset info
population_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 266 entries, 0 to 265
Data columns (total 67 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   Country Name    266 non-null    object 
 1   Country Code    266 non-null    object 
 2   Indicator Name  266 non-null    object 
 3   Indicator Code  266 non-null    object 
 4   1960            265 non-null    float64
 5   1961            265 non-null    float64
 6   1962            265 non-null    float64
 7   1963            265 non-null    float64
 8   1964            265 non-null    float64
 9   1965            265 non-null    float64
 10  1966            265 non-null    float64
 11  1967            265 non-null    float64
 12  1968            265 non-null    float64
 13  1969            265 non-null    float64
 14  1970            265 non-null    float64
 15  1971            265 non-null    float64
 16  1972            265 non-null    float64
 17  1973            265 non-null    flo

In [5]:
# Compare countries and years present in 2 datasets.
print(f"Number of countries in the final file is {country_data['Country'].nunique()}")
print(f"Number of countries in the Medical_doctors file is {doctors_data['Country'].nunique()}")
print(f"Number of years in the final file is {country_data['Year'].nunique()}, the earliest is {country_data['Year'].min()}, the latest is {country_data['Year'].max()}")
print(f"Number of years in the Medical_doctors file is {doctors_data['Year'].nunique()}, the earliest is {doctors_data['Year'].min()}, the latest is {doctors_data['Year'].max()}")
print(f"\nNumber of Male and Female rows:\n{country_data.groupby('Gender').size()}")

Number of countries in the final file is 256
Number of countries in the Medical_doctors file is 196
Number of years in the final file is 20, the earliest is 2000, the latest is 2019
Number of years in the Medical_doctors file is 69, the earliest is 1952, the latest is 2020

Number of Male and Female rows:
Gender
Female    4964
Male      4964
dtype: int64


### Cleaning the data

In [6]:
# Define list of the years to be included into the analysis
first_year = 2015
last_year = 2019
years_list = list(range(first_year,last_year+1, 1))

years_list

[2015, 2016, 2017, 2018, 2019]

#### Clean "population" dataset

In [7]:
# Remove unnecessary columns
population_data_reduced = population_data.drop(columns=['Country Code','Indicator Name','Indicator Code'])

# Rename column Country Name to align with other datasets
population_data_reduced.rename(columns={'Country Name': 'Country'}, inplace=True)

# Transpose years into columns
population_data_reduced = pd.melt(population_data_reduced, id_vars=['Country'], 
                                var_name= 'Year', value_name='Population, female (percent of total)')

# Change Year values into integer
population_data_reduced['Year'] = population_data_reduced['Year'].astype(int)

# Filter on years
population_data_recent = population_data_reduced[population_data_reduced["Year"].isin(years_list)]

population_data_recent

Unnamed: 0,Country,Year,"Population, female (percent of total)"
14630,Aruba,2015,52.589275
14631,Africa Eastern and Southern,2015,50.531940
14632,Afghanistan,2015,49.423184
14633,Africa Western and Central,2015,49.796591
14634,Angola,2015,50.657496
...,...,...,...
15955,Kosovo,2019,50.127604
15956,"Yemen, Rep.",2019,49.428680
15957,South Africa,2019,51.438495
15958,Zambia,2019,50.673761


#### Clean country level "final" dataset:

In [8]:
# Remove unnecessary columns
country_data_reduced = country_data.drop(columns=['Unemployment',
                                                'Infant Mortality',
                                                'GNI',
                                                'Clean fuels and cooking technologies',
                                                'Mortality caused by road traffic injury',
                                                'Tuberculosis Incidence',
                                                'Tuberculosis treatment',
                                                'Non-communicable Mortality',
                                                'Sucide Rate'
                                                ]
                                        )

# Filter on years
country_data_recent = country_data_reduced[country_data_reduced["Year"].isin(years_list)]

# Remove duplicated rows (if any)
country_data_recent.drop_duplicates(["Country","Year","Gender"])

# Identify countries that do not have all the records for 5 years
countries_missing_year = country_data_recent.groupby("Country").filter(lambda x: len(x) <
                                                     (last_year - first_year+1)*2)["Country"].unique()

# Print list of the countriest that are excluded from the analysis due to the missing information
print(f"{countries_missing_year} were removed from the analysis,\n because of incomplete information in column 'Year'.")

# Remove countries that do not have all the records for 5 years
country_data_recent = country_data_recent[~country_data_recent["Country"].isin(countries_missing_year)]

country_data_recent

['Curacao' 'Greenland' 'Sint Maarten (Dutch part)'] were removed from the analysis,
 because of incomplete information in column 'Year'.


Unnamed: 0,Country,Year,Gender,Life expectancy,GDP,Per Capita,DPT Immunization,HepB3 Immunization,Measles Immunization,Hospital beds,Basic sanitation services,Urban population,Rural population
15,Afghanistan,2015,Female,64.877,1.913421e+10,556.007221,65.0,65.0,63.0,0.500000,42.626023,24.803,75.197
16,Afghanistan,2016,Female,65.275,1.811656e+10,512.012778,66.0,66.0,64.0,0.500000,44.201217,25.020,74.980
17,Afghanistan,2017,Female,65.656,1.875347e+10,516.679862,66.0,66.0,64.0,0.390000,45.789127,25.250,74.750
18,Afghanistan,2018,Female,66.026,1.805323e+10,485.668419,66.0,66.0,64.0,0.432222,47.390662,25.495,74.505
19,Afghanistan,2019,Female,66.388,1.879945e+10,494.179350,66.0,66.0,64.0,0.432222,49.006170,25.754,74.246
...,...,...,...,...,...,...,...,...,...,...,...,...,...
9923,Zimbabwe,2015,Male,57.778,1.996312e+10,1445.069702,87.0,87.0,86.0,2.350000,38.120646,32.385,67.615
9924,Zimbabwe,2016,Male,58.565,2.054868e+10,1464.588957,90.0,90.0,95.0,2.350000,37.529193,32.296,67.704
9925,Zimbabwe,2017,Male,59.105,1.758489e+10,1235.189032,89.0,89.0,90.0,2.350000,36.941673,32.237,67.763
9926,Zimbabwe,2018,Male,59.501,1.811554e+10,1254.642265,89.0,89.0,88.0,2.350000,36.357160,32.209,67.791


#### Merge population and final datasets

In [9]:
# Merge country "final" and "population" datasets:
country_population = pd.merge(country_data_recent, population_data_recent, on= ["Country", "Year"], how="left")

country_population

Unnamed: 0,Country,Year,Gender,Life expectancy,GDP,Per Capita,DPT Immunization,HepB3 Immunization,Measles Immunization,Hospital beds,Basic sanitation services,Urban population,Rural population,"Population, female (percent of total)"
0,Afghanistan,2015,Female,64.877,1.913421e+10,556.007221,65.0,65.0,63.0,0.500000,42.626023,24.803,75.197,49.423184
1,Afghanistan,2016,Female,65.275,1.811656e+10,512.012778,66.0,66.0,64.0,0.500000,44.201217,25.020,74.980,49.414608
2,Afghanistan,2017,Female,65.656,1.875347e+10,516.679862,66.0,66.0,64.0,0.390000,45.789127,25.250,74.750,49.419284
3,Afghanistan,2018,Female,66.026,1.805323e+10,485.668419,66.0,66.0,64.0,0.432222,47.390662,25.495,74.505,49.437209
4,Afghanistan,2019,Female,66.388,1.879945e+10,494.179350,66.0,66.0,64.0,0.432222,49.006170,25.754,74.246,49.455485
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2455,Zimbabwe,2015,Male,57.778,1.996312e+10,1445.069702,87.0,87.0,86.0,2.350000,38.120646,32.385,67.615,52.999890
2456,Zimbabwe,2016,Male,58.565,2.054868e+10,1464.588957,90.0,90.0,95.0,2.350000,37.529193,32.296,67.704,52.973111
2457,Zimbabwe,2017,Male,59.105,1.758489e+10,1235.189032,89.0,89.0,90.0,2.350000,36.941673,32.237,67.763,52.948393
2458,Zimbabwe,2018,Male,59.501,1.811554e+10,1254.642265,89.0,89.0,88.0,2.350000,36.357160,32.209,67.791,52.923755


In [10]:
# List Countries/Years that do not have Population, female (percent of total) data
country_no_ratio = country_population[
    country_population['Population, female (percent of total)'].isnull()]['Country'].unique()

print (f'Countries, with no information on Male/Female population ratio: {country_no_ratio},\n assumption applied that Male/Female ration is 50%')

Countries, with no information on Male/Female population ratio: ['Czech Republic' 'Turkey'],
 assumption applied that Male/Female ration is 50%


In [11]:
# Replace missing values in the Population, female (percent of total) Column with 50%.
country_population['Population, female (percent of total)'].fillna(50, inplace=True)
country_population.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 2460 entries, 0 to 2459
Data columns (total 14 columns):
 #   Column                                 Non-Null Count  Dtype  
---  ------                                 --------------  -----  
 0   Country                                2460 non-null   object 
 1   Year                                   2460 non-null   int64  
 2   Gender                                 2460 non-null   object 
 3   Life expectancy                        2460 non-null   float64
 4   GDP                                    2460 non-null   float64
 5   Per Capita                             2460 non-null   float64
 6   DPT Immunization                       2460 non-null   float64
 7   HepB3 Immunization                     2460 non-null   float64
 8   Measles Immunization                   2460 non-null   float64
 9   Hospital beds                          2460 non-null   float64
 10  Basic sanitation services              2460 non-null   float64
 11  Urba

#### Clean "Medical_doctors" dataset:


In [12]:
# Remove unnecessary columns
doctors_data_reduced = doctors_data.drop(columns=['Medical doctors (per 10 000 population)',
                                                'Generalist medical practitioners (number)',
                                                'Specialist medical practitioners (number)',
                                                'Medical doctors not further defined (number)',
                                                ]
                                        )

# Filter on years
doctors_data_recent = doctors_data_reduced[doctors_data_reduced["Year"].isin(years_list)]

doctors_data_recent

Unnamed: 0,Country,Year,Medical doctors (number)
1,Afghanistan,2019,8082.0
2,Afghanistan,2018,11026.0
3,Afghanistan,2017,8744.0
4,Afghanistan,2016,9842.0
5,Afghanistan,2015,9808.0
...,...,...,...
3557,Zambia,2016,1514.0
3565,Zimbabwe,2018,3026.0
3566,Zimbabwe,2017,2646.0
3567,Zimbabwe,2016,2508.0


### Creating a combined Data Frame 

#### Create and clean dataset broken down by Gender

In [13]:
# Combine everything into a single DataFrame
country_doctors_fm = pd.merge(country_population,doctors_data_recent, on= ["Country", "Year"], how= "left")

country_doctors_fm

Unnamed: 0,Country,Year,Gender,Life expectancy,GDP,Per Capita,DPT Immunization,HepB3 Immunization,Measles Immunization,Hospital beds,Basic sanitation services,Urban population,Rural population,"Population, female (percent of total)",Medical doctors (number)
0,Afghanistan,2015,Female,64.877,1.913421e+10,556.007221,65.0,65.0,63.0,0.500000,42.626023,24.803,75.197,49.423184,9808.0
1,Afghanistan,2016,Female,65.275,1.811656e+10,512.012778,66.0,66.0,64.0,0.500000,44.201217,25.020,74.980,49.414608,9842.0
2,Afghanistan,2017,Female,65.656,1.875347e+10,516.679862,66.0,66.0,64.0,0.390000,45.789127,25.250,74.750,49.419284,8744.0
3,Afghanistan,2018,Female,66.026,1.805323e+10,485.668419,66.0,66.0,64.0,0.432222,47.390662,25.495,74.505,49.437209,11026.0
4,Afghanistan,2019,Female,66.388,1.879945e+10,494.179350,66.0,66.0,64.0,0.432222,49.006170,25.754,74.246,49.455485,8082.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2455,Zimbabwe,2015,Male,57.778,1.996312e+10,1445.069702,87.0,87.0,86.0,2.350000,38.120646,32.385,67.615,52.999890,2508.0
2456,Zimbabwe,2016,Male,58.565,2.054868e+10,1464.588957,90.0,90.0,95.0,2.350000,37.529193,32.296,67.704,52.973111,2508.0
2457,Zimbabwe,2017,Male,59.105,1.758489e+10,1235.189032,89.0,89.0,90.0,2.350000,36.941673,32.237,67.763,52.948393,2646.0
2458,Zimbabwe,2018,Male,59.501,1.811554e+10,1254.642265,89.0,89.0,88.0,2.350000,36.357160,32.209,67.791,52.923755,3026.0


In [14]:
# Remove rows without Medical doctors (number) information
country_doctors_fm_filtered = country_doctors_fm.dropna(subset=["Medical doctors (number)"])

country_doctors_fm_filtered

Unnamed: 0,Country,Year,Gender,Life expectancy,GDP,Per Capita,DPT Immunization,HepB3 Immunization,Measles Immunization,Hospital beds,Basic sanitation services,Urban population,Rural population,"Population, female (percent of total)",Medical doctors (number)
0,Afghanistan,2015,Female,64.877,1.913421e+10,556.007221,65.0,65.0,63.0,0.500000,42.626023,24.803,75.197,49.423184,9808.0
1,Afghanistan,2016,Female,65.275,1.811656e+10,512.012778,66.0,66.0,64.0,0.500000,44.201217,25.020,74.980,49.414608,9842.0
2,Afghanistan,2017,Female,65.656,1.875347e+10,516.679862,66.0,66.0,64.0,0.390000,45.789127,25.250,74.750,49.419284,8744.0
3,Afghanistan,2018,Female,66.026,1.805323e+10,485.668419,66.0,66.0,64.0,0.432222,47.390662,25.495,74.505,49.437209,11026.0
4,Afghanistan,2019,Female,66.388,1.879945e+10,494.179350,66.0,66.0,64.0,0.432222,49.006170,25.754,74.246,49.455485,8082.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2453,Zambia,2018,Male,60.533,2.631159e+10,1516.368371,90.0,90.0,94.0,1.966667,31.052407,43.521,56.479,50.690913,2026.0
2455,Zimbabwe,2015,Male,57.778,1.996312e+10,1445.069702,87.0,87.0,86.0,2.350000,38.120646,32.385,67.615,52.999890,2508.0
2456,Zimbabwe,2016,Male,58.565,2.054868e+10,1464.588957,90.0,90.0,95.0,2.350000,37.529193,32.296,67.704,52.973111,2508.0
2457,Zimbabwe,2017,Male,59.105,1.758489e+10,1235.189032,89.0,89.0,90.0,2.350000,36.941673,32.237,67.763,52.948393,2646.0


#### Create and clean dataset for total population (without divison between Female and Male):

In [15]:
# Create a full list of countries (from the main "final" file)
countries_list = country_doctors_fm["Country"].unique().tolist()

# Create a list of columns to compare Male/Female values
columns_list = country_doctors_fm.columns.tolist()
excluded_columns = ["Country","Year","Gender", "Population, female (percent of total)", "Medical doctors (number)"]
columns_to_compare = [x for x in columns_list if x not in excluded_columns]

# Creat lists to store column names for columns with equal values for Female and Male across one country/year and columns with diffrenet values
equal_column_list = []
non_equal_column_list = []

# Check which columns values are different for Male and Female, and which ones are the same. 
# Loop through countries
for country in countries_list:
    # Loop throught years
    for year in years_list:
        female_row = (country_doctors_fm["Country"] == country) & (country_doctors_fm["Year"] == year) & (country_doctors_fm["Gender"] == "Female")
        male_row = (country_doctors_fm["Country"] == country) & (country_doctors_fm["Year"] == year) & (country_doctors_fm["Gender"] == "Male")
        # Loop through columns
        for column in columns_to_compare:
            female_value = country_doctors_fm.loc[female_row, column].values[0]
            male_value = country_doctors_fm.loc[male_row, column].values[0]
      
            # Compare values across columns for Female and Male:
            if female_value == male_value:
                # Create a list of columns where Male/Female information is the same across a country and a year.
                equal_column_list.append(column)
            else:
                # Create a list of columns where Male/Female information varies.
                non_equal_column_list.append(column)
          
equal_unique = list(set(equal_column_list))
non_equal_unique = list(set(non_equal_column_list))

print(f"Columns with different values for Male and Female: {non_equal_unique}")
print(f"Columns with repeating values for Male and Female: {equal_unique}")

Columns with different values for Male and Female: ['Life expectancy']
Columns with repeating values for Male and Female: ['Basic sanitation services', 'Urban population', 'GDP', 'Per Capita', 'Hospital beds', 'Rural population', 'Measles Immunization', 'DPT Immunization', 'HepB3 Immunization']


In [16]:
# Create a new table for total values.
# Filter original data frame by keeping column with differnet values for Male and Female ("Life Expectancy") and "identifyer" columns - "Country", "Year", "Gender".
columns_to_drop = equal_unique + ["Medical doctors (number)"]
country_doctors_fm_reduced = country_doctors_fm.drop(columns=columns_to_drop)

# Transpose 'Gender' column into 'Female' and 'Male' columns
country_doctors_transposed = country_doctors_fm_reduced.pivot(index=["Country","Year"],columns=["Gender"], values=["Life expectancy","Population, female (percent of total)"]).reset_index()

# Flatten the column index of country_doctors_transposed_reduced DataFrame
country_doctors_transposed.columns = country_doctors_transposed.columns.map(''.join)

# Add column to calculate Life expectancy for the entire population (weighted average)
country_doctors_transposed["Life expectancy total population"] = (country_doctors_transposed["Life expectancyFemale"] * 
                                                                country_doctors_transposed["Population, female (percent of total)Female"]/100 + 
                                                                country_doctors_transposed["Life expectancyMale"] * 
                                                                (100 - country_doctors_transposed["Population, female (percent of total)Female"])/100
                                                                )

# Drop unimportant columns for the further merging steps
country_doctors_transposed_reduced = country_doctors_transposed.drop(columns=[("Life expectancyFemale"), 
                                                                            ("Life expectancyMale"), 
                                                                            ("Population, female (percent of total)Female"),
                                                                            ("Population, female (percent of total)Male")
                                                                            ]
                                                                    )

# Merge with country_doctors_fm dataframe to have all the information available.
country_doctors_total_merged = pd.merge(country_doctors_fm,country_doctors_transposed_reduced, on= ["Country","Year"], how= "left")

# Drop unimportant columns
country_doctors_total = country_doctors_total_merged.drop(columns=["Gender","Life expectancy"])

# Remove duplicated rows
country_doctors_total = country_doctors_total.drop_duplicates(["Country","Year"])

country_doctors_total

Unnamed: 0,Country,Year,GDP,Per Capita,DPT Immunization,HepB3 Immunization,Measles Immunization,Hospital beds,Basic sanitation services,Urban population,Rural population,"Population, female (percent of total)",Medical doctors (number),Life expectancy total population
0,Afghanistan,2015,1.913421e+10,556.007221,65.0,65.0,63.0,0.500000,42.626023,24.803,75.197,49.423184,9808.0,63.404203
1,Afghanistan,2016,1.811656e+10,512.012778,66.0,66.0,64.0,0.500000,44.201217,25.020,74.980,49.414608,9842.0,63.791836
2,Afghanistan,2017,1.875347e+10,516.679862,66.0,66.0,64.0,0.390000,45.789127,25.250,74.750,49.419284,8744.0,64.161340
3,Afghanistan,2018,1.805323e+10,485.668419,66.0,66.0,64.0,0.432222,47.390662,25.495,74.505,49.437209,11026.0,64.519734
4,Afghanistan,2019,1.879945e+10,494.179350,66.0,66.0,64.0,0.432222,49.006170,25.754,74.246,49.455485,8082.0,64.868632
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1225,Zimbabwe,2015,1.996312e+10,1445.069702,87.0,87.0,86.0,2.350000,38.120646,32.385,67.615,52.999890,2508.0,59.477176
1226,Zimbabwe,2016,2.054868e+10,1464.588957,90.0,90.0,95.0,2.350000,37.529193,32.296,67.704,52.973111,2508.0,60.235772
1227,Zimbabwe,2017,1.758489e+10,1235.189032,89.0,89.0,90.0,2.350000,36.941673,32.237,67.763,52.948393,2646.0,60.754342
1228,Zimbabwe,2018,1.811554e+10,1254.642265,89.0,89.0,88.0,2.350000,36.357160,32.209,67.791,52.923755,3026.0,61.140049


In [17]:
# Remove rows without "Medical doctors (number)" information.
country_doctors_total_filtered = country_doctors_total.dropna(subset=["Medical doctors (number)"])

country_doctors_total_filtered

Unnamed: 0,Country,Year,GDP,Per Capita,DPT Immunization,HepB3 Immunization,Measles Immunization,Hospital beds,Basic sanitation services,Urban population,Rural population,"Population, female (percent of total)",Medical doctors (number),Life expectancy total population
0,Afghanistan,2015,1.913421e+10,556.007221,65.0,65.0,63.0,0.500000,42.626023,24.803,75.197,49.423184,9808.0,63.404203
1,Afghanistan,2016,1.811656e+10,512.012778,66.0,66.0,64.0,0.500000,44.201217,25.020,74.980,49.414608,9842.0,63.791836
2,Afghanistan,2017,1.875347e+10,516.679862,66.0,66.0,64.0,0.390000,45.789127,25.250,74.750,49.419284,8744.0,64.161340
3,Afghanistan,2018,1.805323e+10,485.668419,66.0,66.0,64.0,0.432222,47.390662,25.495,74.505,49.437209,11026.0,64.519734
4,Afghanistan,2019,1.879945e+10,494.179350,66.0,66.0,64.0,0.432222,49.006170,25.754,74.246,49.455485,8082.0,64.868632
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1223,Zambia,2018,2.631159e+10,1516.368371,90.0,90.0,94.0,1.966667,31.052407,43.521,56.479,50.690913,2026.0,63.530861
1225,Zimbabwe,2015,1.996312e+10,1445.069702,87.0,87.0,86.0,2.350000,38.120646,32.385,67.615,52.999890,2508.0,59.477176
1226,Zimbabwe,2016,2.054868e+10,1464.588957,90.0,90.0,95.0,2.350000,37.529193,32.296,67.704,52.973111,2508.0,60.235772
1227,Zimbabwe,2017,1.758489e+10,1235.189032,89.0,89.0,90.0,2.350000,36.941673,32.237,67.763,52.948393,2646.0,60.754342


#### Create a dataset with averaged out information across different years for a country

In [18]:
# Average out the values across analyzed years for one country
country_doctors_total_avg = country_doctors_total.groupby("Country").mean()

# Reset index
country_doctors_total_avg = country_doctors_total_avg.reset_index()

# Drop column Year
country_doctors_total_avg = country_doctors_total_avg.drop(columns="Year")

country_doctors_total_avg

Unnamed: 0,Country,GDP,Per Capita,DPT Immunization,HepB3 Immunization,Measles Immunization,Hospital beds,Basic sanitation services,Urban population,Rural population,"Population, female (percent of total)",Medical doctors (number),Life expectancy total population
0,Afghanistan,1.857138e+10,512.909526,65.800000,65.800000,63.800000,0.450889,45.802640,25.264400,74.735600,49.429954,9500.4,64.149149
1,Africa Eastern and Southern,9.602807e+11,1531.783575,76.801024,76.422452,72.968797,0.911871,29.998931,35.340330,64.659670,50.513523,,63.214975
2,Africa Western and Central,7.342557e+11,1732.735510,66.188590,66.177998,63.048121,2.986346,33.227307,46.015701,53.984299,49.771207,,57.355333
3,Albania,1.336488e+10,4658.240106,98.800000,98.800000,95.600000,3.052308,98.493376,59.357200,40.642800,49.884581,4826.0,78.355262
4,Algeria,1.685577e+11,4073.145408,91.800000,91.800000,87.400000,1.820000,86.449686,72.035400,27.964600,49.045001,73456.5,76.491339
...,...,...,...,...,...,...,...,...,...,...,...,...,...
241,West Bank and Gaza,1.578318e+10,3527.863570,85.685266,81.307785,84.854727,2.986346,97.165781,75.898800,24.101200,50.202434,,73.786391
242,World,8.128929e+13,10806.022309,85.545898,83.760753,85.351016,2.766197,74.929771,54.823002,45.176998,49.681628,,72.429675
243,"Yemen, Rep.",2.939171e+10,1106.402264,69.200000,69.200000,66.600000,0.692889,52.607593,36.020400,63.979600,49.391027,,66.088948
244,Zambia,2.354070e+10,1395.132689,90.600000,90.600000,93.200000,1.966667,30.604350,42.982800,57.017200,50.714809,1770.0,62.942619


In [19]:
# 1st option - Remove rows without "Medical doctors (number)" information.
country_doctors_total_avg_filtered = country_doctors_total_avg.dropna(subset=["Medical doctors (number)"])

country_doctors_total_avg_filtered

Unnamed: 0,Country,GDP,Per Capita,DPT Immunization,HepB3 Immunization,Measles Immunization,Hospital beds,Basic sanitation services,Urban population,Rural population,"Population, female (percent of total)",Medical doctors (number),Life expectancy total population
0,Afghanistan,1.857138e+10,512.909526,65.8,65.8,63.8,0.450889,45.802640,25.2644,74.7356,49.429954,9500.4,64.149149
3,Albania,1.336488e+10,4658.240106,98.8,98.8,95.6,3.052308,98.493376,59.3572,40.6428,49.884581,4826.0,78.355262
4,Algeria,1.685577e+11,4073.145408,91.8,91.8,87.4,1.820000,86.449686,72.0354,27.9646,49.045001,73456.5,76.491339
5,Angola,1.060423e+11,3573.626620,58.8,54.8,47.8,0.800000,49.678755,64.8250,35.1750,50.627161,6496.5,60.446212
6,Antigua and Barbuda,1.506947e+09,15784.778096,95.2,96.0,93.4,2.934250,86.711731,24.7328,75.2672,52.309463,264.0,76.772637
...,...,...,...,...,...,...,...,...,...,...,...,...,...
231,United Arab Emirates,3.880432e+11,40798.165271,98.6,98.6,99.0,1.435556,98.873917,86.2396,13.7604,29.497255,23108.4,77.560149
235,Uruguay,6.009822e+10,17482.429717,93.6,93.6,96.0,2.435667,97.191354,95.2378,4.7622,51.650642,15260.5,77.648019
237,Vanuatu,8.482107e+08,2965.101870,84.2,84.2,79.0,3.150000,52.959592,25.1708,74.8292,49.659382,46.0,70.252099
244,Zambia,2.354070e+10,1395.132689,90.6,90.6,93.2,1.966667,30.604350,42.9828,57.0172,50.714809,1770.0,62.942619


In [20]:
# 2nd option - Average out the values across analyzed years for one country.
# Average out the values across analyzed years for one country
country_doctors_total_avg_filtered_v2 = country_doctors_total_filtered.groupby("Country").mean()

# Reset index
country_doctors_total_avg_filtered_v2 = country_doctors_total_avg_filtered_v2.reset_index()

# Drop column Year
country_doctors_total_avg_filtered_v2 = country_doctors_total_avg_filtered_v2.drop(columns="Year")

country_doctors_total_avg_filtered_v2

Unnamed: 0,Country,GDP,Per Capita,DPT Immunization,HepB3 Immunization,Measles Immunization,Hospital beds,Basic sanitation services,Urban population,Rural population,"Population, female (percent of total)",Medical doctors (number),Life expectancy total population
0,Afghanistan,1.857138e+10,512.909526,65.800000,65.800000,63.80,0.450889,45.802640,25.264400,74.735600,49.429954,9500.4,64.149149
1,Albania,1.413929e+10,4935.792872,98.666667,98.666667,95.00,3.052308,98.754871,59.989667,40.010333,49.900654,4826.0,78.441996
2,Algeria,1.674725e+11,4044.235503,91.000000,91.000000,87.00,1.800000,86.454289,72.044000,27.956000,49.044808,73456.5,76.494573
3,Angola,1.117385e+11,3692.727026,59.500000,55.500000,46.00,0.800000,50.498862,65.176500,34.823500,50.618667,6496.5,60.699323
4,Antigua and Barbuda,1.467978e+09,15383.576398,95.000000,95.000000,99.00,2.890000,87.504282,24.713000,75.287000,52.303032,264.0,76.773970
...,...,...,...,...,...,...,...,...,...,...,...,...,...
145,United Arab Emirates,3.880432e+11,40798.165271,98.600000,98.600000,99.00,1.435556,98.873917,86.239600,13.760400,29.497255,23108.4,77.560149
146,Uruguay,6.073531e+10,17703.263657,94.000000,94.000000,95.50,2.455000,97.047649,95.192000,4.808000,51.661990,15260.5,77.578703
147,Vanuatu,7.808896e+08,2805.665320,81.000000,81.000000,84.00,3.150000,53.148221,25.062000,74.938000,49.645856,46.0,70.097915
148,Zambia,2.363500e+10,1398.587457,90.500000,90.500000,93.50,1.966667,30.609018,42.979500,57.020500,50.713450,1770.0,63.001813


## Clean Datasets Overview and Limitations

In [21]:
# Data Frames overview
print(f"\033[1mAvailable Data Frames overview:\033[0m")
print(f"\nData frame country_doctors_fm has {country_doctors_fm['Country'].nunique()} countries, and {country_doctors_fm.shape[0]} rows of data.")
print(f"Data frame country_doctors_fm_filtered has {country_doctors_fm_filtered['Country'].nunique()} countries, "
      f"and {country_doctors_fm_filtered.shape[0]} rows of data. "
      )
print(f"\nData frame country_doctors_total has {country_doctors_total['Country'].nunique()} countries, and {country_doctors_total.shape[0]} rows of data.")
print(f"Data frame country_doctors_total_filtered has {country_doctors_total_filtered['Country'].nunique()} countries, "
      f"and {country_doctors_total_filtered.shape[0]} rows of data. "
      )
print(f"\nData frame country_doctors_total_avg has {country_doctors_total_avg['Country'].nunique()} countries, and {country_doctors_total_avg.shape[0]} rows of data.")
print(f"Data frame country_doctors_total_avg_filtered has {country_doctors_total_avg_filtered['Country'].nunique()} countries, "
      f"and {country_doctors_total_avg_filtered.shape[0]} rows of data. "
      )
print(f"\n{round(country_doctors_fm_filtered['Country'].nunique()/country_doctors_fm['Country'].nunique()*100,2)}% "
      f"of the countries were \033[4mfiltered out\033[0m from the original data set because of the missing values for # of Medical Doctors to create _filtered data frames."
      )

[1mAvailable Data Frames overview:[0m

Data frame country_doctors_fm has 246 countries, and 2460 rows of data.
Data frame country_doctors_fm_filtered has 150 countries, and 986 rows of data. 

Data frame country_doctors_total has 246 countries, and 1230 rows of data.
Data frame country_doctors_total_filtered has 150 countries, and 493 rows of data. 

Data frame country_doctors_total_avg has 246 countries, and 246 rows of data.
Data frame country_doctors_total_avg_filtered has 150 countries, and 150 rows of data. 

60.98% of the countries were [4mfiltered out[0m from the original data set because of the missing values for # of Medical Doctors to create _filtered data frames.


In [22]:
# List countries that did not have all 5 year's info after filtering based on the Medical doctors missing values. 
# Identify countries that do not have all the records for 5 years
filtered_countries_missing_year = country_doctors_total_filtered.groupby("Country").filter(lambda x: len(x) <
                                                     (last_year - first_year+1))["Country"].unique()

# Print list of the countriest that are excluded from the analysis due to the missing information
print(f"{len(filtered_countries_missing_year)} countries "
    f"({round(len(filtered_countries_missing_year)/country_doctors_total_filtered['Country'].nunique()*100,2)}%) "
    f"do not have complete information in column 'Year' in the _filtered data frames.\n {filtered_countries_missing_year}")

95 countries (63.33%) do not have complete information in column 'Year' in the _filtered data frames.
 ['Albania' 'Algeria' 'Angola' 'Antigua and Barbuda' 'Argentina' 'Armenia'
 'Bahrain' 'Barbados' 'Belize' 'Benin' 'Bosnia and Herzegovina' 'Botswana'
 'Brazil' 'Brunei Darussalam' 'Bulgaria' 'Burundi' 'Cabo Verde' 'Cameroon'
 'Canada' 'Central African Republic' 'Comoros' 'Costa Rica' 'Cuba'
 'Denmark' 'Dominican Republic' 'Ecuador' 'El Salvador'
 'Equatorial Guinea' 'Eswatini' 'Ethiopia' 'Fiji' 'Finland' 'Gabon'
 'Grenada' 'Guatemala' 'Guinea' 'Guinea-Bissau' 'Guyana' 'Haiti'
 'Honduras' 'India' 'Indonesia' 'Iraq' 'Jamaica' 'Japan' 'Jordan' 'Kenya'
 'Kuwait' 'Lesotho' 'Liberia' 'Libya' 'Luxembourg' 'Madagascar' 'Malawi'
 'Maldives' 'Mali' 'Malta' 'Mauritania' 'Mongolia' 'Morocco' 'Mozambique'
 'Myanmar' 'Namibia' 'Nepal' 'Nicaragua' 'Nigeria' 'North Macedonia'
 'Papua New Guinea' 'Paraguay' 'Peru' 'Philippines' 'Poland' 'Qatar'
 'Romania' 'Samoa' 'Sao Tome and Principe' 'Saudi Arabia' 

In [23]:
# Print the header
print(f"Countries with missing years in the _filtered Data Frames:")

# Return and print countries that do not have full 5 years of data present in the _filtered list.
filtered_years_list = country_doctors_total_filtered.pivot(index="Country",columns="Year", values="Year").reset_index()
filtered_years_list = filtered_years_list.fillna("")
filtered_year_missing = filtered_years_list[filtered_years_list["Country"].isin(filtered_countries_missing_year)].reset_index(drop=True)

# Print the results
pd.set_option('display.max_rows', None)
filtered_year_missing

Countries with missing years in the _filtered Data Frames:


Year,Country,2015,2016,2017,2018,2019
0,Albania,,2016.0,,2018.0,2019.0
1,Algeria,,2016.0,,2018.0,
2,Angola,,,2017.0,2018.0,
3,Antigua and Barbuda,,,2017.0,,
4,Argentina,2015.0,2016.0,2017.0,,2019.0
5,Armenia,2015.0,,2017.0,,
6,Bahrain,2015.0,,,,
7,Barbados,,,2017.0,,
8,Belize,,,2017.0,2018.0,
9,Benin,,2016.0,,2018.0,2019.0


In [24]:
# Print the header
print(f"Countries with all the years in the _filtered Data Frames:")

# Return and print countries that do not have full 5 years of data present in the _filtered list.
filtered_years_list = country_doctors_total_filtered.pivot(index="Country",columns="Year", values="Year").reset_index()
filtered_years_list = filtered_years_list.fillna("")
filtered_year_present = filtered_years_list[~filtered_years_list["Country"].isin(filtered_countries_missing_year)].reset_index(drop=True)

# Print the results
pd.set_option('display.max_rows', None)
filtered_year_present

Countries with all the years in the _filtered Data Frames:


Year,Country,2015,2016,2017,2018,2019
0,Afghanistan,2015.0,2016.0,2017.0,2018.0,2019.0
1,Australia,2015.0,2016.0,2017.0,2018.0,2019.0
2,Austria,2015.0,2016.0,2017.0,2018.0,2019.0
3,Azerbaijan,2015.0,2016.0,2017.0,2018.0,2019.0
4,Bangladesh,2015.0,2016.0,2017.0,2018.0,2019.0
5,Belarus,2015.0,2016.0,2017.0,2018.0,2019.0
6,Belgium,2015.0,2016.0,2017.0,2018.0,2019.0
7,Bhutan,2015.0,2016.0,2017.0,2018.0,2019.0
8,Burkina Faso,2015.0,2016.0,2017.0,2018.0,2019.0
9,Chad,2015.0,2016.0,2017.0,2018.0,2019.0


In [25]:
# Revert displaying of truncated view
pd.set_option('display.max_rows',10)

## Note (TO BE REMOVED WHEN PRESENTING):
Available Data Frames list:
- country_doctors_fm - Data Frame that contains information for Male/Female for all the countries/years.
- country_doctors_fm_filtered - Data Frame that contains filtered information for Male/Female - rows with null values in the column "Medical Doctors" (number) are filtered out.
- country_doctors_total - Data Frame that contains information for the whole population for all the countries/years.
- country_doctors_total_filtered - Data Frame that contains filtered information for the whole population, rows with null values in the column "Medical Doctors" (number) are filtered out.
- country_doctors_total_avg - Data Frame that contains information for the whole population for all the countries that is averaged out by years.
- country_doctors_total_avg_filtered - Data Frame that contains information for the whole population for all the countries that is averaged out by years, rows with null values in the column "Medical Doctors" (number) are filtered out.

For each Data Frame structure see below:

In [26]:
country_doctors_fm

Unnamed: 0,Country,Year,Gender,Life expectancy,GDP,Per Capita,DPT Immunization,HepB3 Immunization,Measles Immunization,Hospital beds,Basic sanitation services,Urban population,Rural population,"Population, female (percent of total)",Medical doctors (number)
0,Afghanistan,2015,Female,64.877,1.913421e+10,556.007221,65.0,65.0,63.0,0.500000,42.626023,24.803,75.197,49.423184,9808.0
1,Afghanistan,2016,Female,65.275,1.811656e+10,512.012778,66.0,66.0,64.0,0.500000,44.201217,25.020,74.980,49.414608,9842.0
2,Afghanistan,2017,Female,65.656,1.875347e+10,516.679862,66.0,66.0,64.0,0.390000,45.789127,25.250,74.750,49.419284,8744.0
3,Afghanistan,2018,Female,66.026,1.805323e+10,485.668419,66.0,66.0,64.0,0.432222,47.390662,25.495,74.505,49.437209,11026.0
4,Afghanistan,2019,Female,66.388,1.879945e+10,494.179350,66.0,66.0,64.0,0.432222,49.006170,25.754,74.246,49.455485,8082.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2455,Zimbabwe,2015,Male,57.778,1.996312e+10,1445.069702,87.0,87.0,86.0,2.350000,38.120646,32.385,67.615,52.999890,2508.0
2456,Zimbabwe,2016,Male,58.565,2.054868e+10,1464.588957,90.0,90.0,95.0,2.350000,37.529193,32.296,67.704,52.973111,2508.0
2457,Zimbabwe,2017,Male,59.105,1.758489e+10,1235.189032,89.0,89.0,90.0,2.350000,36.941673,32.237,67.763,52.948393,2646.0
2458,Zimbabwe,2018,Male,59.501,1.811554e+10,1254.642265,89.0,89.0,88.0,2.350000,36.357160,32.209,67.791,52.923755,3026.0


In [27]:
country_doctors_fm_filtered

Unnamed: 0,Country,Year,Gender,Life expectancy,GDP,Per Capita,DPT Immunization,HepB3 Immunization,Measles Immunization,Hospital beds,Basic sanitation services,Urban population,Rural population,"Population, female (percent of total)",Medical doctors (number)
0,Afghanistan,2015,Female,64.877,1.913421e+10,556.007221,65.0,65.0,63.0,0.500000,42.626023,24.803,75.197,49.423184,9808.0
1,Afghanistan,2016,Female,65.275,1.811656e+10,512.012778,66.0,66.0,64.0,0.500000,44.201217,25.020,74.980,49.414608,9842.0
2,Afghanistan,2017,Female,65.656,1.875347e+10,516.679862,66.0,66.0,64.0,0.390000,45.789127,25.250,74.750,49.419284,8744.0
3,Afghanistan,2018,Female,66.026,1.805323e+10,485.668419,66.0,66.0,64.0,0.432222,47.390662,25.495,74.505,49.437209,11026.0
4,Afghanistan,2019,Female,66.388,1.879945e+10,494.179350,66.0,66.0,64.0,0.432222,49.006170,25.754,74.246,49.455485,8082.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2453,Zambia,2018,Male,60.533,2.631159e+10,1516.368371,90.0,90.0,94.0,1.966667,31.052407,43.521,56.479,50.690913,2026.0
2455,Zimbabwe,2015,Male,57.778,1.996312e+10,1445.069702,87.0,87.0,86.0,2.350000,38.120646,32.385,67.615,52.999890,2508.0
2456,Zimbabwe,2016,Male,58.565,2.054868e+10,1464.588957,90.0,90.0,95.0,2.350000,37.529193,32.296,67.704,52.973111,2508.0
2457,Zimbabwe,2017,Male,59.105,1.758489e+10,1235.189032,89.0,89.0,90.0,2.350000,36.941673,32.237,67.763,52.948393,2646.0


In [28]:
country_doctors_total

Unnamed: 0,Country,Year,GDP,Per Capita,DPT Immunization,HepB3 Immunization,Measles Immunization,Hospital beds,Basic sanitation services,Urban population,Rural population,"Population, female (percent of total)",Medical doctors (number),Life expectancy total population
0,Afghanistan,2015,1.913421e+10,556.007221,65.0,65.0,63.0,0.500000,42.626023,24.803,75.197,49.423184,9808.0,63.404203
1,Afghanistan,2016,1.811656e+10,512.012778,66.0,66.0,64.0,0.500000,44.201217,25.020,74.980,49.414608,9842.0,63.791836
2,Afghanistan,2017,1.875347e+10,516.679862,66.0,66.0,64.0,0.390000,45.789127,25.250,74.750,49.419284,8744.0,64.161340
3,Afghanistan,2018,1.805323e+10,485.668419,66.0,66.0,64.0,0.432222,47.390662,25.495,74.505,49.437209,11026.0,64.519734
4,Afghanistan,2019,1.879945e+10,494.179350,66.0,66.0,64.0,0.432222,49.006170,25.754,74.246,49.455485,8082.0,64.868632
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1225,Zimbabwe,2015,1.996312e+10,1445.069702,87.0,87.0,86.0,2.350000,38.120646,32.385,67.615,52.999890,2508.0,59.477176
1226,Zimbabwe,2016,2.054868e+10,1464.588957,90.0,90.0,95.0,2.350000,37.529193,32.296,67.704,52.973111,2508.0,60.235772
1227,Zimbabwe,2017,1.758489e+10,1235.189032,89.0,89.0,90.0,2.350000,36.941673,32.237,67.763,52.948393,2646.0,60.754342
1228,Zimbabwe,2018,1.811554e+10,1254.642265,89.0,89.0,88.0,2.350000,36.357160,32.209,67.791,52.923755,3026.0,61.140049


In [29]:
country_doctors_total_filtered

Unnamed: 0,Country,Year,GDP,Per Capita,DPT Immunization,HepB3 Immunization,Measles Immunization,Hospital beds,Basic sanitation services,Urban population,Rural population,"Population, female (percent of total)",Medical doctors (number),Life expectancy total population
0,Afghanistan,2015,1.913421e+10,556.007221,65.0,65.0,63.0,0.500000,42.626023,24.803,75.197,49.423184,9808.0,63.404203
1,Afghanistan,2016,1.811656e+10,512.012778,66.0,66.0,64.0,0.500000,44.201217,25.020,74.980,49.414608,9842.0,63.791836
2,Afghanistan,2017,1.875347e+10,516.679862,66.0,66.0,64.0,0.390000,45.789127,25.250,74.750,49.419284,8744.0,64.161340
3,Afghanistan,2018,1.805323e+10,485.668419,66.0,66.0,64.0,0.432222,47.390662,25.495,74.505,49.437209,11026.0,64.519734
4,Afghanistan,2019,1.879945e+10,494.179350,66.0,66.0,64.0,0.432222,49.006170,25.754,74.246,49.455485,8082.0,64.868632
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1223,Zambia,2018,2.631159e+10,1516.368371,90.0,90.0,94.0,1.966667,31.052407,43.521,56.479,50.690913,2026.0,63.530861
1225,Zimbabwe,2015,1.996312e+10,1445.069702,87.0,87.0,86.0,2.350000,38.120646,32.385,67.615,52.999890,2508.0,59.477176
1226,Zimbabwe,2016,2.054868e+10,1464.588957,90.0,90.0,95.0,2.350000,37.529193,32.296,67.704,52.973111,2508.0,60.235772
1227,Zimbabwe,2017,1.758489e+10,1235.189032,89.0,89.0,90.0,2.350000,36.941673,32.237,67.763,52.948393,2646.0,60.754342


In [30]:
country_doctors_total_avg

# Specify the file path and name for the CSV file
csv_file_path = "./Data/totalavg.csv"

# Convert the DataFrame to CSV
country_doctors_total_avg.to_csv(csv_file_path, index=False)

country_doctors_total_avg.head()

Unnamed: 0,Country,GDP,Per Capita,DPT Immunization,HepB3 Immunization,Measles Immunization,Hospital beds,Basic sanitation services,Urban population,Rural population,"Population, female (percent of total)",Medical doctors (number),Life expectancy total population
0,Afghanistan,18571380000.0,512.909526,65.8,65.8,63.8,0.450889,45.80264,25.2644,74.7356,49.429954,9500.4,64.149149
1,Africa Eastern and Southern,960280700000.0,1531.783575,76.801024,76.422452,72.968797,0.911871,29.998931,35.34033,64.65967,50.513523,,63.214975
2,Africa Western and Central,734255700000.0,1732.73551,66.18859,66.177998,63.048121,2.986346,33.227307,46.015701,53.984299,49.771207,,57.355333
3,Albania,13364880000.0,4658.240106,98.8,98.8,95.6,3.052308,98.493376,59.3572,40.6428,49.884581,4826.0,78.355262
4,Algeria,168557700000.0,4073.145408,91.8,91.8,87.4,1.82,86.449686,72.0354,27.9646,49.045001,73456.5,76.491339


In [31]:
country_doctors_total_avg_filtered
# Specify the file path and name for the CSV file
csv_file_path = "./Data/totalavgfilter.csv"

# Convert the DataFrame to CSV
country_doctors_total_avg_filtered.to_csv(csv_file_path, index=False)

country_doctors_total_avg_filtered.head()

Unnamed: 0,Country,GDP,Per Capita,DPT Immunization,HepB3 Immunization,Measles Immunization,Hospital beds,Basic sanitation services,Urban population,Rural population,"Population, female (percent of total)",Medical doctors (number),Life expectancy total population
0,Afghanistan,18571380000.0,512.909526,65.8,65.8,63.8,0.450889,45.80264,25.2644,74.7356,49.429954,9500.4,64.149149
3,Albania,13364880000.0,4658.240106,98.8,98.8,95.6,3.052308,98.493376,59.3572,40.6428,49.884581,4826.0,78.355262
4,Algeria,168557700000.0,4073.145408,91.8,91.8,87.4,1.82,86.449686,72.0354,27.9646,49.045001,73456.5,76.491339
5,Angola,106042300000.0,3573.62662,58.8,54.8,47.8,0.8,49.678755,64.825,35.175,50.627161,6496.5,60.446212
6,Antigua and Barbuda,1506947000.0,15784.778096,95.2,96.0,93.4,2.93425,86.711731,24.7328,75.2672,52.309463,264.0,76.772637


## Questions analysis

TO BE COMPLITED HERE