Let us data a look at a few details

In [None]:
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
sns.set()

# Data details

How do our state sumaries compare to the ones from the Robert Wood Johnson Foundation / U of Wisc and OECD?

In [None]:
state_df = pd.read_csv("../data/viz/state_2020.csv")
state_df.head()

In [None]:
rwjf_state_df = pd.read_csv("../data/rwjf/state_2020.csv")
rwjf_state_df = rwjf_state_df[['FIPS', 'State', 'Life Expectancy', 'Population']]

In [None]:
rwjf_state_df.head()

In [None]:
oecd_df = pd.read_csv("../data/oecd/DP_LIVE_13082022233500657.csv")
oecd_df = oecd_df[['LOCATION', 'Value']]
oecd_df = oecd_df.set_index('LOCATION')

In [None]:
oecd_df.head()

The **mean** of the life expectancy values from the RWJF data is the same as what is shown in _Our World in Data_, which comes from the OECD data.

In [None]:
rwjf_mv = rwjf_state_df['Life Expectancy'].mean()
oecd_mv = oecd_df.loc['USA', 'Value']
print(f"Life Expectancy as mean of states {rwjf_mv:.1f} vs. OECD {oecd_mv:.1f}")

If we compute the population-weighted mean, then our result is about 0.5 years little heigher.

In [None]:
pop_total = rwjf_state_df['Population'].sum()
pop_frac =  rwjf_state_df['Population'] / pop_total
result = {'population': pop_total,
          'life expectancy': (rwjf_state_df['Life Expectancy'] * pop_frac).sum()}
rwjf_ev = (rwjf_state_df['Life Expectancy'] * pop_frac).sum()
print(f"Life Expectancy as weighted mean of states {rwjf_ev:.1f}")

The state summaries we get by summarizing the higher-resolution data are a bit different, and about 1y higher on average.

In [None]:
results = []
for name, gdf in state_df.reset_index().groupby(["State"]):
    tdf = gdf.dropna()
    pop_total = tdf['population'].sum()
    pop_frac =  tdf['population'] / pop_total
    result = {'State': name,
              'population': pop_total,
              'life expectancy': (tdf['life expectancy'] * pop_frac).sum()}
    results.append(result)
state_summary_df = pd.DataFrame(results)

In [None]:
state_summary_df.head()

In [None]:
(state_summary_df['life expectancy'] - rwjf_state_df['Life Expectancy']).describe()

In [None]:
pop_total = state_summary_df['population'].sum()
pop_frac =  state_summary_df['population'] / pop_total
result = {'population': pop_total,
          'life expectancy': (state_summary_df['life expectancy'] * pop_frac).sum()}
print("Life Expectancy", (state_summary_df['life expectancy'] * pop_frac).sum())