Extract the subset of the data we want to look at.

In [None]:
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

In [None]:
sns.set()

# Read in the data

In [None]:
raw_df = pd.read_csv("../data/division.csv")

In [None]:
raw_df.head()

In [None]:
state_df = pd.read_csv("../data/viz/state.csv")

In [None]:
state_df.head()

In [None]:
oecd_df = pd.read_csv("../data/oecd/DP_LIVE_13082022233500657.csv")
oecd_df = oecd_df[['LOCATION', 'Value']]

country_names = {'AUS': 'Australia',
                 'AUT': 'Austria',
                 'BEL': 'Belgium',
                 'CAN': 'Canada',
                 'CHE': 'Switzerland',
                 'DEU': 'Germany',
                 'DNK': 'Denmark',
                 'ESP': 'Spain',
                 'FIN': 'Finland',
                 'FRA': 'France',
                 'GBR': 'United Kingdom',
                 'IRL': 'Ireland',
                 'ISR': 'Israel', 
                 'ITA': 'Italy',
                 'JPN': 'Japan',
                 'KOR': 'South Korea',
                 'NLD': 'Netherlands',
                 'NOR': 'Norway',
                 'NZL': 'New Zealand',
                 'PRT': 'Portugal',
                 'SWE': 'Sweden',
                 'USA': 'USA'}

oecd_df['name'] = oecd_df['LOCATION'].map(lambda x: country_names.get(x))

oecd_df = oecd_df[oecd_df['name'].notna()]
oecd_df = oecd_df.set_index('LOCATION')
oecd_df.head()

In [None]:
oecd_mean = oecd_df['Value'].mean()

# Plot

In [None]:
fig, ax = plt.subplots()
tdf = state_df[state_df['group'] != 'AIAN']
tdf = tdf[tdf['life expectancy'] > 0]
col_order = tdf.groupby('group').mean().sort_values('life expectancy')
for i, group in enumerate(col_order.index):
    ttdf = tdf[tdf['group'] == group]
    ax.scatter(ttdf['group'], ttdf['life expectancy'], alpha=0.7)

ax.axhline(oecd_mean, alpha=0.5)
ax.annotate("OECD Comparison mean", (1.2, oecd_mean - 2))

jpn_val = oecd_df.loc['JPN', 'Value']
ax.axhline(jpn_val, alpha=0.5)
ax.annotate("JPN", (1.2, jpn_val + 1))

In [None]:
state_df[state_df['group'] == 'Asian'].sort_values("life expectancy")

In [None]:
a_tdf = state_df[state_df['group'] == 'Asian']
len(a_tdf[a_tdf['life expectancy'] > 84.6])

In [None]:
h_tdf = state_df[state_df['group'] == 'Hispanic']
len(h_tdf[h_tdf['life expectancy'] > 84.6])

In [None]:
h_tdf.sort_values('life expectancy')

# Life Expectancy vs. Income

In [None]:
col_order = state_df.groupby('group').mean().sort_values('life expectancy').index
g = sns.FacetGrid(tdf, col='group', col_order=col_order)
g.map(sns.regplot, "income", "life expectancy")