# Exploring Economic Wellbeing and Income Inequality in the US Since 2005

In [80]:
import marimo as mo
import altair as alt
import polars as pl
import pandas as pd
from pathlib import Path
import geopandas as gpd
from vega_datasets import data

In [62]:
pctl = pd.read_csv('Data/IDDA/pctl_of_inc_all_data.csv')

# calculating some inequality measures
pctl['IQR'] = pctl['pctl75'] - pctl['pctl25']
pctl['90/10'] = pctl['pctl90'] / pctl['pctl10']
pctl['50/10'] = pctl['pctl50'] / pctl['pctl10']
pctl['90/50'] = pctl['pctl90'] / pctl['pctl50']

#just indv income data
idv = pctl[pctl['level'] == 'pik']

# getting all US data from 2005 - 2019
idv_us = idv.query("geo_var == 'usst' and group_var == 'xall' and inc_var == 'TC' and samp == 'all_w2_pik'")

median income

In [None]:
alt.Chart(idv_us).transform_fold(['pctl50'],
    as_=['metric', 'value']
).mark_line().encode(
    x = alt.X('year:O'),
    y = alt.Y('value:Q'),
    color = alt.Color('metric:N')
)

What we see is the median indivual income in the US is rising over the past 14 years. this seems to be good, but what if we compare this to other income percentiles in the US? How does the average American's improvements compare to the rest of income earners?

In [None]:
alt.Chart(idv_us).transform_fold(['pctl10', 'pctl25', 'pctl50', 'pctl75', 'pctl90', 'pctl95'],
    as_=['metric', 'value']
).mark_line().encode(
    x = alt.X('year:O'),
    y = alt.Y('value:Q'),
    color = alt.Color('metric:O')
)

The bottom 10% and the bottom 25% of income earners do not seem to have seen significant improvements in their incomes over the 14 year period. On the other hand, the top 25% of earners see gains slightly better than the median earners, but the top 10% and 5% see significantly higher increases in their incomes. 

Although the changes in income seem to be most significant for high earners, do these changes hold when analyzing the percent changes in income over the 14 years?

In [None]:
idv_us_year_filt = idv_us.query("year == 2005 or year == 2019")
cols_to_piv = ['pctl10', 'pctl25', 'pctl50', 'pctl75', 'pctl90', 'pctl95']
idv_us_pivot = idv_us_year_filt.pivot(index = 'year', columns= 'geo_abb', values= cols_to_piv)

idv_us_change = (idv_us_pivot.loc[2019] - idv_us_pivot.loc[2005]) / idv_us_pivot.loc[2005] * 100

us_change = idv_us_change.reset_index()

final_us_change = us_change.rename(columns={
    'level_0': 'Percentile', 0: 'Percentage_Change'
})

pct_chg_points = alt.Chart(final_us_change).mark_bar().encode(
    x = alt.X('Percentile:O'),
    y = alt.Y('Percentage_Change:Q'),
    color = alt.Color('Percentile:O', legend = None)
).properties(
    title = "jo",
    width = 1000
)

pct_chg_points

## ADD TREND LINE

It appears that there is an inverse relationship between the percentile level and the percetnage change in income. This is potentially good. The bottom 10% seeing greater differences relative to their original position is great, but these differences are not very significant as we move from the bottom to the top 10.

Bringing in more data, we can look at how these trends compare when we account for inflation (adjusting incomes to 2019 price levels for each year). Do the increases seem to outpace the ever-increasing price levels?

In [None]:
alt.Chart(idv_us).transform_fold(['pctl10', 'pctl10_adj', 'pctl25', 'pctl25_adj', 'pctl50', 'pctl50_adj', 'pctl75', 'pctl75_adj', 'pctl90', 'pctl90_adj', 'pctl95', 'pctl95_adj'],  # List of inequality metrics
    as_=['metric', 'value'] 
).mark_line().encode(
    x = alt.X('year:O'),
    y = alt.Y('value:Q'),
    color = alt.Color('metric:O')
)

For just about every percentile, any steepness suggesting increases in incomes have significantly diminished. However, what we do see is pretty significant increses still for the top percentiles of earners. 

What does this look like in terms of percentage changes?

In [None]:
idv_us_year_filt_adj = idv_us.query("year == 2005 or year == 2019")
cols_to_piv_adj = ['pctl10_adj', 'pctl25_adj', 'pctl50_adj', 'pctl75_adj', 'pctl90_adj', 'pctl95_adj']
idv_us_pivot_adj = idv_us_year_filt_adj.pivot(index = 'year', columns= 'geo_abb', values= cols_to_piv_adj)

idv_us_change_adj = (idv_us_pivot_adj.loc[2019] - idv_us_pivot_adj.loc[2005]) / idv_us_pivot_adj.loc[2005] * 100

us_change_adj = idv_us_change_adj.reset_index()

final_us_change_adj = us_change_adj.rename(columns={
    'level_0': 'Percentile', 0: 'Percentage_Change'
})

pct_chg_points_adj = alt.Chart(final_us_change_adj).mark_bar().encode(
    x = alt.X('Percentile:O'),
    y = alt.Y('Percentage_Change:Q'),
    color = alt.Color('Percentile:O', legend = None)
).properties(
    title = "jo",
    width = 1000
)

pct_chg_points_adj
pct_chg_points + pct_chg_points_adj

WOuld love if I could get lines to work on these / get the colors better at least

The discrepancies between the top and the bottom are apparant. Income is rapidly expanding at the top, while those at the bottom are not seeing very main gains at all in income

We have seen that, in the United States as a whole, incomes are rising much more rapidly for high income earners than they are for low income earners. But inequality can be investigated in other ways than between percentiles. How do median incomes differ state by state?

In [None]:
geo_us_states = gpd.read_file(data.us_10m.url, driver='TopoJSON', layer='states')
idv_state_all = idv.query("geo_var == 'state' and group_var == 'xall'")

In [None]:
idv_2005_state_all = idv_state_all.query("year == 2005")

alt.Chart(geo_us_states).mark_geoshape().transform_lookup(
    lookup='id',
    from_=alt.LookupData(data=idv_2005_state_all, key='geo_var_val', fields=['pctl50'])
).encode(
    alt.Color('pctl50:Q', title='pctl50', scale = alt.Scale(scheme= 'inferno'))
).project(
    type='albersUsa'
).properties(
    title = 'pctl50 Individual Income by State in 2005'
)

Not sure i want the above graph, might add more confusion

In [None]:
idv_2019_state_all = idv_state_all.query("year == 2019")

alt.Chart(geo_us_states).mark_geoshape().transform_lookup(
    lookup='id',
    from_=alt.LookupData(data=idv_2019_state_all, key='geo_var_val', fields=['pctl50'])
).encode(
    alt.Color('pctl50:Q', title='pctl50', scale = alt.Scale(scheme= 'inferno'))
).project(
    type='albersUsa'
).properties(
    title = 'pctl50 Individual Income by State in 2019'
)

In [None]:
alt.Chart(idv_state_all).mark_boxplot().encode(
    alt.X('pctl50:Q'),
    alt.Y('year:O')
)

In [None]:
alt.Chart(geo_us_states).mark_geoshape().transform_lookup(
    lookup='id',
    from_=alt.LookupData(data=idv_2019_state_all, key='geo_var_val', fields=['pctl10'])
).encode(
    alt.Color('pctl10:Q', title='pctl10', scale = alt.Scale(scheme= 'inferno'))
).project(
    type='albersUsa'
).properties(
    title = 'pctl10 Individual Income by State in 2019'
)

In [None]:
alt.Chart(idv_state_all).mark_boxplot().encode(
    alt.X('pctl10:Q'),
    alt.Y('year:O')
)

In [None]:
alt.Chart(geo_us_states).mark_geoshape().transform_lookup(
    lookup='id',
    from_=alt.LookupData(data=idv_2019_state_all, key='geo_var_val', fields=['pctl90'])
).encode(
    alt.Color('pctl90:Q', title='pctl90', scale = alt.Scale(scheme= 'inferno'))
).project(
    type='albersUsa'
).properties(
    title = 'pctl90 Individual Income by State in 2019'
)

In [None]:
alt.Chart(idv_state_all).mark_boxplot().encode(
    alt.X('pctl90:Q'),
    alt.Y('year:O')
)

A cross-state comparison shows evidence of income inequality as well. In any given year, there are disparities between states in terms of median income, 10th percentile income, and 90th percentile income. Moreover, this gap between states has expanded as time has gone on.

Are there disparities within states as well? Probably, but lets look at how severe they are. First, lets look at how these percentiles evolve within each state over the 14 year period:

The important question to ask is: in states where incomes at all percentiles are particularly high (NJ, DC, NY, MA, etc), are the people at the bottom still doing alright in the grand scheme of things? It can seem like the low earners in a place like California might still be alright due to the portrayal of the map, 

In [None]:
alt.Chart(idv_state_all).transform_fold(
    ['pctl10', 'pctl25', 'pctl50', 'pctl75', 'pctl90', 'pctl95', 'pctl98'],
    as_=['percentile', 'value']
).mark_rect().encode(
    x=alt.X('year:O', axis=alt.Axis(labels=False, title="'05 - '19")),
    y='percentile:O',
    color= alt.Color('value:Q', scale = alt.Scale(scheme = 'inferno'))
).properties(
    width = 40,
    height = 80
).facet('geo_abb:N', columns= 17)

Looking at how these percentiles evolve state by state, we can see that some states are growing far past others.  

There are various measures of inequality:
    1. IQR (75th percentile - 25th percentile)
    2. 90/10 (90th Percentile / 10th Percentile)
    3. 90/50 (90th Percentile / 50th Percentile)
    4. 50/10 (50th Percentile / 10th Percentile)

These measures of inequality within each state in 2019:

In [None]:
alt.Chart(geo_us_states).mark_geoshape().transform_lookup(
    lookup='id',
    from_=alt.LookupData(data=idv_2019_state_all, key='geo_var_val', fields=['IQR'])
).encode(
    alt.Color('IQR:Q', title='pctl90', scale = alt.Scale(scheme= 'inferno'))
).project(
    type='albersUsa'
).properties(
    title = 'IQR Individual Income by State in 2019'
)

In [None]:
alt.Chart(geo_us_states).mark_geoshape().transform_lookup(
    lookup='id',
    from_=alt.LookupData(data=idv_2019_state_all, key='geo_var_val', fields=['90/10'])
).encode(
    alt.Color('90/10:Q', title='pctl90', scale = alt.Scale(scheme= 'inferno'))
).project(
    type='albersUsa'
).properties(
    title = '90/10 Individual Income by State in 2019'
)

In [None]:
alt.Chart(geo_us_states).mark_geoshape().transform_lookup(
    lookup='id',
    from_=alt.LookupData(data=idv_2019_state_all, key='geo_var_val', fields=['50/10'])
).encode(
    alt.Color('50/10:Q', title='pctl90', scale = alt.Scale(scheme= 'inferno'))
).project(
    type='albersUsa'
).properties(
    title = '50/10 Individual Income by State in 2019'
)

In [None]:
alt.Chart(geo_us_states).mark_geoshape().transform_lookup(
    lookup='id',
    from_=alt.LookupData(data=idv_2019_state_all, key='geo_var_val', fields=['90/50'])
).encode(
    alt.Color('90/50:Q', title='pctl90', scale = alt.Scale(scheme= 'inferno'))
).project(
    type='albersUsa'
).properties(
    title = '90/50 Individual Income by State in 2019'
)

Who's winning? Are different demographics struggling at the bottom/expanding at the top?