In [2]:
#Imports and preamble
import pandas as pd
import matplotlib.pyplot as plt


In [None]:
# Load data for GDP and remove everything that isn't related to the year 2017
gdp = pd.read_csv("data/gdp-per-capita-worldbank.csv")
gdp_year_not_2017 = gdp[ gdp['Year'] != 2017]
gdp = gdp.drop(gdp_year_not_2017.index, axis=0)



In [None]:
# Load data for Life Expectancy and remove everything that isn't related to the year 2017
le = pd.read_csv("data/life-expectancy.csv")
le_year_not_2017 = le[ le['Year'] != 2017]
le = le.drop(le_year_not_2017.index, axis=0)




In [None]:
# Find countries with life expectancy one standard deviation above the mean (in 2017)
mean = le["Life expectancy (years)"].mean()
std = le["Life expectancy (years)"].std()
le_one_std_above_mean = le[le["Life expectancy (years)"] > mean+std]
print(le_one_std_above_mean[["Entity", "Life expectancy (years)"]].to_latex(index=False))



In [None]:
# Merge GDP with LE
le_gdp = pd.merge(left=gdp, right=le, left_on=["Year", "Code", "Entity"], right_on=["Year", "Code", "Entity"])

In [None]:
# Find countries with high GDP and low Life Expectancy
# As "high" is subjective, we'll here define it has being above mean. "Low" is defined in the opposite manner. You can change the variable num_std to use your own definition of "high".

gdp_mean = le_gdp["GDP per capita (int.-$) (constant 2011 international $)"].mean()
le_mean = le_gdp["Life expectancy (years)"].mean()
gdp_std = le_gdp["GDP per capita (int.-$) (constant 2011 international $)"].std()

countries_with_low_gdp_high_le = le_gdp.loc[(le_gdp["GDP per capita (int.-$) (constant 2011 international $)"] < gdp_mean)
    & (le_gdp["Life expectancy (years)"] > le_mean)]
    
print(countries_with_low_gdp_high_le[["Entity", "GDP per capita (int.-$) (constant 2011 international $)", "Life expectancy (years)"]].to_latex(index=False))


In [None]:
# Find countries with high GDP and low life expectancy
countries_with_high_gdp_low_le = le_gdp.loc[(le_gdp["GDP per capita (int.-$) (constant 2011 international $)"] > gdp_mean+(gdp_std))
     & (le_gdp["Life expectancy (years)"] < le_mean)]
print(countries_with_high_gdp_low_le)

In [None]:
#Make the plot
x = le_gdp["GDP per capita (int.-$) (constant 2011 international $)"]
y = le_gdp["Life expectancy (years)"]

plt.figure(figsize=(30,20))
plt.scatter(x, y, s=500, color="b", alpha=0.5)
plt.xlabel("GDP",fontsize=30)
plt.ylabel("Life Expectancy", fontsize=30)
plt.tick_params(axis="both",which="major",labelsize=20)

#Puts the country code on the points, this for loop takes some time to run
for i, code in enumerate(le_gdp["Code"]):
    plt.annotate(code, (x[i], y[i]), fontsize=10, ha="center", va="center")

plt.show

In [3]:
# Load data for PISA Test scores and remove everything that isn't related to the year 2016
PISA = pd.read_csv("data/pisa-test-score-mean-performance-on-the-science-scale.csv")
PISA_year_not_2016 = PISA[ PISA['Year'] != 2017]
PISA = PISA.drop(PISA_year_not_2016.index, axis=0)

In [6]:
# Load data for internet usage and remove everything that isn't related to the year 2016
internet = pd.read_csv("data/share-of-individuals-using-the-internet.csv")
internet_year_not_2016 = internet[ internet['Year'] != 2016]
internet = internet.drop(internet_year_not_2016.index, axis=0)
# Merge internet with PISA

internet_PISA = pd.merge(left=internet, right=PISA, left_on=["Year", "Code", "Entity"], right_on=["Year", "Code", "Entity"])
print(internet_PISA)

Empty DataFrame
Columns: [Entity, Code, Year, Individuals using the Internet, PISA: Mean performance on the science scale]
Index: []


In [28]:
# Make the plot
x = internet["Individuals using the Internet"]
y = PISA["PISA: Mean performance on the science scale"]

plt.figure(figsize=(30,20))
plt.scatter(x, y, s=500, color="b", alpha=0.5)
plt.xlabel("Internet usage", fontsize=30)
plt.ylabel("PISA test scores (science)", fontsize=30)
plt.tick_params(axis="both", which="major", labelsize=20)

# Puts the country code on the points, this for loop takes some time to run
for i, code in enumerate(internet_PISA["Code"]):
    plt.annotate(code,(x[i],y[i]), fontsize=10, ha="center", va="center")

    plt.show

KeyError: 'Individuals using the Internet'