# Current Health Expenditure & Life Expectancy Analysis

This project is an analysis of the Current Health Expenditure Per Capita in US$ and Life Expectancy across multiple countries.

In [310]:
# import dependencies
import pandas as pd
import numpy as np

In [312]:
%matplotlib inline
from matplotlib import style
style.use('fivethirtyeight')
import matplotlib.pyplot as plt

## Cleaning and Exploring Data

In [554]:
# read in current health expenditure data set
che = pd.read_csv("data/current_health_expenditure.csv")
che.head()

Unnamed: 0,IndicatorCode,Indicator,ValueType,ParentLocationCode,ParentLocation,Location type,SpatialDimValueCode,Location,Period type,Period,...,FactValueUoM,FactValueNumericLowPrefix,FactValueNumericLow,FactValueNumericHighPrefix,FactValueNumericHigh,Value,FactValueTranslationID,FactComments,Language,DateModified
0,GHED_CHE_pc_US_SHA2011,Current health expenditure (CHE) per capita in...,numeric,AMR,Americas,Country,CUB,Cuba,Year,2021,...,,,,,,,,,EN,2023-12-05T08:00:00.000Z
1,GHED_CHE_pc_US_SHA2011,Current health expenditure (CHE) per capita in...,numeric,AFR,Africa,Country,GHA,Ghana,Year,2021,...,,,,,,100.0,,,EN,2023-12-05T08:00:00.000Z
2,GHED_CHE_pc_US_SHA2011,Current health expenditure (CHE) per capita in...,numeric,SEAR,South-East Asia,Country,MDV,Maldives,Year,2021,...,,,,,,1038.67,,,EN,2023-12-05T08:00:00.000Z
3,GHED_CHE_pc_US_SHA2011,Current health expenditure (CHE) per capita in...,numeric,EUR,Europe,Country,BGR,Bulgaria,Year,2021,...,,,,,,1040.02,,,EN,2023-12-05T08:00:00.000Z
4,GHED_CHE_pc_US_SHA2011,Current health expenditure (CHE) per capita in...,numeric,AMR,Americas,Country,ARG,Argentina,Year,2021,...,,,,,,1044.77,,,EN,2023-12-05T08:00:00.000Z


In [556]:
print(che.isnull().sum())

IndicatorCode                    0
Indicator                        0
ValueType                        0
ParentLocationCode               0
ParentLocation                   0
Location type                    0
SpatialDimValueCode              0
Location                         0
Period type                      0
Period                           0
IsLatestYear                     0
Dim1 type                     4153
Dim1                          4153
Dim1ValueCode                 4153
Dim2 type                     4153
Dim2                          4153
Dim2ValueCode                 4153
Dim3 type                     4153
Dim3                          4153
Dim3ValueCode                 4153
DataSourceDimValueCode        4153
DataSource                    4153
FactValueNumericPrefix        4153
FactValueNumeric                19
FactValueUoM                  4153
FactValueNumericLowPrefix     4153
FactValueNumericLow           4153
FactValueNumericHighPrefix    4153
FactValueNumericHigh

In [558]:
che.drop(columns=['Indicator','IndicatorCode', 'ValueType', 'ParentLocationCode', 'SpatialDimValueCode','Period type','IsLatestYear',
                        'Dim1 type', 'Dim1', 'Dim1ValueCode', 'Dim2 type', 'Dim2', 'Dim2ValueCode', 'Dim3 type', 'Dim3', 
                        'Dim3ValueCode', 'DataSourceDimValueCode', 'DataSource', 'FactValueNumericPrefix', 'FactValueNumeric',
                       'FactValueUoM', 'FactValueNumericLowPrefix', 'FactValueNumericLow', 'FactValueNumericHighPrefix',
                       'FactValueNumericHigh', 'FactValueTranslationID', 'FactComments', 'Language', 'DateModified'], inplace=True)

In [560]:
che.head()

Unnamed: 0,ParentLocation,Location type,Location,Period,Value
0,Americas,Country,Cuba,2021,
1,Africa,Country,Ghana,2021,100.0
2,South-East Asia,Country,Maldives,2021,1038.67
3,Europe,Country,Bulgaria,2021,1040.02
4,Americas,Country,Argentina,2021,1044.77


In [562]:
che.rename(columns={'ParentLocation': 'Continent', 'Period':'Year', 'Value': 'CHE'}, inplace=True)

In [564]:
print(che.isnull().sum())

Continent         0
Location type     0
Location          0
Year              0
CHE              19
dtype: int64


In [566]:
che.fillna({'CHE':0}, inplace = True)

In [568]:
print(che.isnull().sum())

Continent        0
Location type    0
Location         0
Year             0
CHE              0
dtype: int64


In [570]:
che.head()

Unnamed: 0,Continent,Location type,Location,Year,CHE
0,Americas,Country,Cuba,2021,0.0
1,Africa,Country,Ghana,2021,100.0
2,South-East Asia,Country,Maldives,2021,1038.67
3,Europe,Country,Bulgaria,2021,1040.02
4,Americas,Country,Argentina,2021,1044.77


In [572]:
# read in current health expenditure data set
hle = pd.read_csv("data/health_life_expectancy.csv")
hle.head()

Unnamed: 0,IndicatorCode,Indicator,ValueType,ParentLocationCode,ParentLocation,Location type,SpatialDimValueCode,Location,Period type,Period,...,FactValueUoM,FactValueNumericLowPrefix,FactValueNumericLow,FactValueNumericHighPrefix,FactValueNumericHigh,Value,FactValueTranslationID,FactComments,Language,DateModified
0,WHOSIS_000002,Healthy life expectancy (HALE) at birth (years),text,AFR,Africa,Country,LSO,Lesotho,Year,2021,...,,,42.23,,44.08,43.1 [42.2 – 44.1],,,EN,2024-08-02T07:00:00.000Z
1,WHOSIS_000002,Healthy life expectancy (HALE) at birth (years),text,AFR,Africa,Country,CAF,Central African Republic,Year,2021,...,,,42.74,,44.97,43.8 [42.7 – 45.0],,,EN,2024-08-02T07:00:00.000Z
2,WHOSIS_000002,Healthy life expectancy (HALE) at birth (years),text,AFR,Africa,Country,LSO,Lesotho,Year,2021,...,,,43.66,,45.76,44.6 [43.7 – 45.8],,,EN,2024-08-02T07:00:00.000Z
3,WHOSIS_000002,Healthy life expectancy (HALE) at birth (years),text,AFR,Africa,Country,CAF,Central African Republic,Year,2021,...,,,44.26,,46.48,45.4 [44.3 – 46.5],,,EN,2024-08-02T07:00:00.000Z
4,WHOSIS_000002,Healthy life expectancy (HALE) at birth (years),text,AFR,Africa,Country,SWZ,Eswatini,Year,2021,...,,,44.85,,46.98,45.8 [44.8 – 47.0],,,EN,2024-08-02T07:00:00.000Z


In [574]:
print(hle.isnull().sum())

IndicatorCode                     0
Indicator                         0
ValueType                         0
ParentLocationCode                0
ParentLocation                    0
Location type                     0
SpatialDimValueCode               0
Location                          0
Period type                       0
Period                            0
IsLatestYear                      0
Dim1 type                         0
Dim1                              0
Dim1ValueCode                     0
Dim2 type                     24420
Dim2                          24420
Dim2ValueCode                 24420
Dim3 type                     24420
Dim3                          24420
Dim3ValueCode                 24420
DataSourceDimValueCode        24420
DataSource                    24420
FactValueNumericPrefix        24420
FactValueNumeric                  0
FactValueUoM                  24420
FactValueNumericLowPrefix     24420
FactValueNumericLow              40
FactValueNumericHighPrefix  

In [576]:
hle.drop(columns=['IndicatorCode','IndicatorCode', 'ValueType', 'ParentLocationCode', 'SpatialDimValueCode','Period type','IsLatestYear',
                        'Dim1 type', 'Dim1ValueCode', 'Dim2 type', 'Dim2', 'Dim2ValueCode', 'Dim3 type', 'Dim3', 
                        'Dim3ValueCode', 'DataSourceDimValueCode', 'DataSource', 'FactValueNumericPrefix', 'Value',
                       'FactValueUoM', 'FactValueNumericLowPrefix', 'FactValueNumericLow', 'FactValueNumericHighPrefix',
                       'FactValueNumericHigh', 'FactValueTranslationID', 'FactComments', 'Language', 'DateModified'], inplace=True)

In [578]:
hle.head()

Unnamed: 0,Indicator,ParentLocation,Location type,Location,Period,Dim1,FactValueNumeric
0,Healthy life expectancy (HALE) at birth (years),Africa,Country,Lesotho,2021,Male,43.1
1,Healthy life expectancy (HALE) at birth (years),Africa,Country,Central African Republic,2021,Male,43.78
2,Healthy life expectancy (HALE) at birth (years),Africa,Country,Lesotho,2021,Both sexes,44.63
3,Healthy life expectancy (HALE) at birth (years),Africa,Country,Central African Republic,2021,Both sexes,45.43
4,Healthy life expectancy (HALE) at birth (years),Africa,Country,Eswatini,2021,Male,45.79


In [580]:
hle.rename(columns={'ParentLocation': 'Continent', 'Period':'Year', 'Dim1': 'Gender', 'FactValueNumeric': 'Avg HLE'}, inplace=True)

In [582]:
hle.head()

Unnamed: 0,Indicator,Continent,Location type,Location,Year,Gender,Avg HLE
0,Healthy life expectancy (HALE) at birth (years),Africa,Country,Lesotho,2021,Male,43.1
1,Healthy life expectancy (HALE) at birth (years),Africa,Country,Central African Republic,2021,Male,43.78
2,Healthy life expectancy (HALE) at birth (years),Africa,Country,Lesotho,2021,Both sexes,44.63
3,Healthy life expectancy (HALE) at birth (years),Africa,Country,Central African Republic,2021,Both sexes,45.43
4,Healthy life expectancy (HALE) at birth (years),Africa,Country,Eswatini,2021,Male,45.79


In [584]:
print(hle.isnull().sum())

Indicator        0
Continent        0
Location type    0
Location         0
Year             0
Gender           0
Avg HLE          0
dtype: int64


In [586]:
hle = hle[hle['Gender'] != 'Male']

In [588]:
hle = hle[hle['Gender'] != 'Female']

In [590]:
hle = hle[hle['Indicator'] != 'Healthy life expectancy (HALE) at age 60 (years)']

In [592]:
hle.head()

Unnamed: 0,Indicator,Continent,Location type,Location,Year,Gender,Avg HLE
2,Healthy life expectancy (HALE) at birth (years),Africa,Country,Lesotho,2021,Both sexes,44.63
3,Healthy life expectancy (HALE) at birth (years),Africa,Country,Central African Republic,2021,Both sexes,45.43
8,Healthy life expectancy (HALE) at birth (years),Eastern Mediterranean,Country,Somalia,2021,Both sexes,47.42
9,Healthy life expectancy (HALE) at birth (years),Africa,Country,Eswatini,2021,Both sexes,47.47
15,Healthy life expectancy (HALE) at birth (years),Africa,Country,Mozambique,2021,Both sexes,49.72


In [594]:
hle.drop(columns=['Gender', 'Indicator'], inplace = True)

In [596]:
hle.head()

Unnamed: 0,Continent,Location type,Location,Year,Avg HLE
2,Africa,Country,Lesotho,2021,44.63
3,Africa,Country,Central African Republic,2021,45.43
8,Eastern Mediterranean,Country,Somalia,2021,47.42
9,Africa,Country,Eswatini,2021,47.47
15,Africa,Country,Mozambique,2021,49.72


In [598]:
combined_df = pd.merge(che, hle, on=['Location','Year', ], how='left')

In [600]:
combined_df.head()

Unnamed: 0,Continent_x,Location type_x,Location,Year,CHE,Continent_y,Location type_y,Avg HLE
0,Americas,Country,Cuba,2021,0.0,Americas,Country,64.62
1,Africa,Country,Ghana,2021,100.0,Africa,Country,57.88
2,South-East Asia,Country,Maldives,2021,1038.67,South-East Asia,Country,66.67
3,Europe,Country,Bulgaria,2021,1040.02,Europe,Country,62.39
4,Americas,Country,Argentina,2021,1044.77,Americas,Country,64.79


In [602]:
combined_df.drop(columns=['Continent_y', 'Location type_y', 'Location type_x'], inplace = True)

In [604]:
combined_df.head()

Unnamed: 0,Continent_x,Location,Year,CHE,Avg HLE
0,Americas,Cuba,2021,0.0,64.62
1,Africa,Ghana,2021,100.0,57.88
2,South-East Asia,Maldives,2021,1038.67,66.67
3,Europe,Bulgaria,2021,1040.02,62.39
4,Americas,Argentina,2021,1044.77,64.79


In [606]:
combined_df.rename(columns={'Continent_x': 'Continent', 'Period':'Year', 'Dim1': 'Gender', 'FactValueNumeric': 'Avg HLE'}, inplace=True)

In [608]:
combined_df.head()

Unnamed: 0,Continent,Location,Year,CHE,Avg HLE
0,Americas,Cuba,2021,0.0,64.62
1,Africa,Ghana,2021,100.0,57.88
2,South-East Asia,Maldives,2021,1038.67,66.67
3,Europe,Bulgaria,2021,1040.02,62.39
4,Americas,Argentina,2021,1044.77,64.79


In [610]:
duplicates = combined_df[combined_df.duplicated(keep=False)]

In [612]:
print(duplicates)

Empty DataFrame
Columns: [Continent, Location, Year, CHE, Avg HLE]
Index: []
