In [1]:
# Imports

import pandas as pd

## 1. Load Data 

In the first step we need to load our data

In [2]:
vax_rates = pd.read_csv('COVID-19_vaccination_rates.csv') 
vax_rates = vax_rates.loc[(vax_rates["date"]=="2021-10-25")]
vax_rates = vax_rates[["location","people_fully_vaccinated_per_hundred"]]
vax_rates.rename(columns={'location': 'Country Name'}, inplace=True)
vax_rates = vax_rates.loc[(vax_rates["people_fully_vaccinated_per_hundred"]==vax_rates["people_fully_vaccinated_per_hundred"])]
vax_rates

Unnamed: 0,Country Name,people_fully_vaccinated_per_hundred
568,Africa,5.53
901,Albania,30.57
2713,Argentina,55.56
3219,Aruba,71.37
3578,Asia,41.54
...,...,...
65140,Vietnam,22.23
65472,Wales,70.69
65732,Wallis and Futuna,49.25
66097,World,37.62


In [3]:
gdp = pd.read_csv('gdp_per_capita.csv') 
gdp = gdp[["Country Name","2020"]]
gdp.rename(columns={'2020': 'gdp_per_capita_2020'}, inplace=True)
gdp = gdp.loc[(gdp["gdp_per_capita_2020"]==gdp["gdp_per_capita_2020"])]
gdp = gdp.set_index('Country Name')
gdp

Unnamed: 0_level_0,gdp_per_capita_2020
Country Name,Unnamed: 1_level_1
Africa Eastern and Southern,3677.111255
Afghanistan,2077.874554
Africa Western and Central,4240.534782
Angola,6443.032680
Albania,13899.933981
...,...
Samoa,6766.228447
Kosovo,11383.404733
South Africa,13355.598709
Zambia,3456.322534


In [4]:
# Change gov_trst from Country code to country name
codes = pd.read_csv('country_codes.csv')

lookup_table = codes[['Alpha-3 code','Country']]

# Lookup table to dict
c_map = {}
for i, t in lookup_table.iterrows():
    key = t[0][2:5]
    val = t[1]
    
    c_map[key] = val
    
trust_in_gov = pd.read_csv('trust_in_government.csv') 
    
# enhance government trust df
trust_in_gov = trust_in_gov.assign(country=trust_in_gov['LOCATION'].map(c_map))
 
trust_in_gov = trust_in_gov.loc[(trust_in_gov["TIME"]==2019)][["country","Value"]]
trust_in_gov.rename(columns={"Value": "trust_in_government_2019"}, inplace=True)
trust_in_gov

Unnamed: 0,country,trust_in_government_2019
12,Australia,46.870001
26,Austria,51.233826
40,Belgium,32.790115
55,Canada,54.928097
83,Denmark,63.299903
96,Finland,63.918815
111,France,38.184924
126,Germany,56.846043
140,Greece,39.642462
154,Hungary,48.393419


In [5]:
secondary_edu = pd.read_csv('secondary_education.csv')  
secondary_edu = secondary_edu.loc[(secondary_edu["TIME"]==2019)].loc[(secondary_edu["SUBJECT"]=="UPPSRY")][["LOCATION","Value"]]
secondary_edu.rename(columns={"Value": "secondary_education_2019"}, inplace=True)

# Lookup table to dict
c_map = {}
for i, t in lookup_table.iterrows():
    key = t[0][2:5]
    val = t[1]
    
    c_map[key] = val
        
# enhance government trust df
secondary_edu = secondary_edu.assign(country=secondary_edu['LOCATION'].map(c_map))
secondary_edu = secondary_edu[['country', 'secondary_education_2019']]

secondary_edu = secondary_edu.dropna()
secondary_edu

Unnamed: 0,country,secondary_education_2019
9,Austria,77.636
33,Belgium,77.047
50,Canada,84.317
79,Czech Republic,80.51
104,Denmark,81.629
137,Finland,84.907
164,Germany,73.092
180,Greece,91.409
203,Hungary,77.257
232,Iceland,80.011


In [6]:
tert_edu = pd.read_csv('tertiary_education.csv')[["LOCATION","SUBJECT", "TIME", "Value"]]
tert_edu = tert_edu.loc[(tert_edu["TIME"]==2019)].loc[(tert_edu["SUBJECT"]=="BACHR_MEN")|(tert_edu["SUBJECT"]=="BACHR_WOMEN")]
tert_edu.rename(columns={"Value": "tertiary_education_2019"}, inplace=True)

tert_edu = tert_edu.pivot(index="LOCATION", columns="SUBJECT", values="tertiary_education_2019")
tert_edu['tertiary_education_2019'] = tert_edu["BACHR_MEN"] + tert_edu['BACHR_WOMEN']
tert_edu = tert_edu["tertiary_education_2019"].to_frame()
tert_edu = tert_edu.reset_index() 

# Lookup table to dict
c_map = {}
for i, t in lookup_table.iterrows():
    key = t[0][2:5]
    val = t[1]
    
    c_map[key] = val
        
# enhance government trust df
tert_edu = tert_edu.assign(country=tert_edu['LOCATION'].map(c_map))

tert_edu = tert_edu[['country', 'tertiary_education_2019']]
tert_edu

Unnamed: 0,country,tertiary_education_2019
0,Australia,69.343
1,Austria,37.418
2,Belgium,80.736
3,Switzerland,72.556
4,Chile,52.185
5,Czech Republic,50.212
6,Germany,64.156
7,Denmark,81.471
8,Spain,63.931
9,Estonia,54.38


In [7]:
cov_data = pd.read_csv('./worldwide_covid_data.csv')
cov_deaths = cov_data.rename(columns={'Country':'location'})[['location','Deaths/1M population']].set_index('location')
cov_deaths

Unnamed: 0_level_0,Deaths/1M population
location,Unnamed: 1_level_1
Afghanistan,182
Albania,1034
Algeria,133
Andorra,1679
Angola,50
...,...
Venezuela,176
Vietnam,230
Yemen,62
Zambia,192


In [8]:
betterlife = pd.read_csv('./betterlifeindex_2019.csv')

health = betterlife 
health.rename(columns={"Country": "Country Name"}, inplace=True)
health = health[["LOCATION", "Country Name","Indicator", "Value"]].loc[(health["Inequality"]=="Total")]
health = health.loc[(health["Indicator"]=="Self-reported health")][["Country Name", "LOCATION", "Value"]]
health.rename(columns={"Value": "Self-reported_health_2019"}, inplace=True)
health = health.set_index('Country Name')
health

Unnamed: 0_level_0,LOCATION,Self-reported_health_2019
Country Name,Unnamed: 1_level_1,Unnamed: 2_level_1
Australia,AUS,85.0
Austria,AUT,70.0
Belgium,BEL,74.0
Canada,CAN,88.0
Czech Republic,CZE,60.0
Denmark,DNK,71.0
Finland,FIN,70.0
France,FRA,66.0
Germany,DEU,65.0
Greece,GRC,74.0


In [9]:
life_exp = betterlife.loc[(betterlife["Indicator"]=="Life expectancy")][["Country Name", "LOCATION", "Value"]]
life_exp.rename(columns={"Value": "Life_expectancy_2019"}, inplace=True)
life_exp = life_exp.set_index('Country Name')
life_exp = life_exp[['Life_expectancy_2019']]
life_exp

Unnamed: 0_level_0,Life_expectancy_2019
Country Name,Unnamed: 1_level_1
Australia,82.5
Austria,81.7
Belgium,81.5
Canada,81.9
Czech Republic,79.1
...,...
Lithuania,74.8
Colombia,73.1
Lithuania,69.5
Colombia,79.4


In [10]:
work = betterlife.loc[(betterlife["Indicator"]=="Employees working very long hours")][["Country Name", "LOCATION", "Value"]]
work.rename(columns={"Value": "Employees_working_hours_2019"}, inplace=True)
work = work.set_index('Country Name')
work = work[['Employees_working_hours_2019']]
work

Unnamed: 0_level_0,Employees_working_hours_2019
Country Name,Unnamed: 1_level_1
Australia,13.04
Austria,6.66
Belgium,4.75
Canada,3.69
Czech Republic,5.65
...,...
Lithuania,0.54
Colombia,32.09
Lithuania,0.67
Colombia,19.37


In [11]:
df1 = pd.read_csv('unemployment_rate.csv') 
df1 = df1[["Country Name","2020"]] # select and reduce
df1.rename(columns={"2020": "unemplyment_2020"}, inplace=True)
df1=df1.loc[(df1["unemplyment_2020"]==df1["unemplyment_2020"])]