# IDDA 

In [220]:
# best to follow convention, "notebook" style imports are allowed/preferred
import marimo as mo
import altair as alt
import polars as pl
import pandas as pd
from pathlib import Path
import geopandas as gpd
from vega_datasets import data

In [None]:
pctl = pd.read_csv('Data/IDDA/pctl_of_inc_all_data.csv')
pctl.columns

In [None]:
idv = pctl[pctl['level'] == 'pik']
hh = pctl[pctl['level'] == 'mafid']

idv_2019_state_all = idv.query("year == 2019 and geo_var == 'state' and group_var == 'xall'")

idv_2019_state_all['IQR'] = idv_2019_state_all['pctl75'] - idv_2019_state_all['pctl25']

idv_2019_state_all['90/10'] = idv_2019_state_all['pctl90'] / idv_2019_state_all['pctl10']

idv_2019_state_all['90/50'] = idv_2019_state_all['pctl90'] / idv_2019_state_all['pctl50']

idv_2019_state_all['50/10'] = idv_2019_state_all['pctl50'] / idv_2019_state_all['pctl10']

idv_2019_state_all.head()

In [None]:
geo_us_states = gpd.read_file(data.us_10m.url, driver='TopoJSON', layer='states')

## Percentile IDV Income by State in Year

In [None]:
def graph_income_pctl_by_state(pctl_val):
    return alt.Chart(geo_us_states).mark_geoshape().transform_lookup(
    lookup='id',
    from_=alt.LookupData(data=idv_2019_state_all, key='geo_var_val', fields=[pctl_val])
).encode(
    alt.Color(f'{pctl_val}:Q', title=pctl_val, scale = alt.Scale(scheme= 'inferno'))
).project(
    type='albersUsa'
).properties(
    title = f'{pctl_val} Individual Income by State in 2019'
)

graph_income_pctl_by_state('90/10')

In [None]:
idv_state_all = idv.query("geo_var == 'state' and group_var == 'xall'")
idv_state_all.head()
idv_state_all.columns

In [None]:
alt.Chart(idv_state_all).transform_fold(
    ['pctl10', 'pctl25', 'pctl50', 'pctl75', 'pctl90', 'pctl95'],  # List of percentiles
    as_=['percentile', 'value']
).mark_rect().encode(
    x='year:O',
    y='percentile:O',
    color= alt.Color('value:Q', scale = alt.Scale(scheme = 'inferno'))
).facet('geo_abb:N', columns= 3)

In [None]:
alt.Chart(idv_state_all).transform_fold(
    ['pctl10', 'pctl25', 'pctl50', 'pctl75', 'pctl90', 'pctl95'],
    as_=['percentile', 'value']
).mark_line().encode(
    x=alt.X('year:O', axis=alt.Axis(labels=False, title="2005 - 2019")),
    y=alt.Y('value:Q'),
    color=alt.Color('percentile:N', scale=alt.Scale(scheme='inferno'))
).properties(
    width=40,
    height=80
).facet('geo_abb:N', columns=17)

In [None]:
idv_us = idv.query("geo_var == 'usst' and group_var == 'xall' and inc_var == 'TC' and samp == 'all_w2_pik'")
idv_us['IQR'] = idv_us['pctl75'] - idv_us['pctl25']
#do the same now for 90/10, 90/50, 50/10

idv_us.head(20)

In [None]:
prop = pd.read_csv('Data/IDDA/prop_share_all_data.csv')

prop.head(20)

In [None]:
prop_all = prop.query("group_var == 'xrea' and percentile == 98.0 and inc_var == 'TC'")


prop_all.head(20)

In [None]:
alt.Chart(prop_all).mark_bar().encode(
    x=alt.X('sum(proportion):Q', stack = 'normalize', axis=alt.Axis(labels=False, title=None)),
    y=alt.Y('geo_abb:N', title="State"),
    color=alt.Color('group_var_val:N', title="Ethnic Group", scale=alt.Scale(scheme='dark2')),
    tooltip=['geo_abb:N', 'group_var_val:N', 'proportion:Q']
).properties(
    title="Proportion of 98th Percentile Income Earners by State and Ethnic Group",
    width=55,
    height = 500
).facet('year:O', columns = 15)


In [None]:
prop_all_2019 = prop_all.query("year == 2019")
prop_all_2019.head()

In [None]:
alt.Chart(prop_all_2019).mark_bar().encode(
    x=alt.X('sum(proportion):Q', stack = 'normalize'),
    y=alt.Y('geo_abb:N', title="State"),
    color=alt.Color('group_var_val:N', title="Ethnic Group", scale=alt.Scale(scheme='dark2')),
    tooltip=['geo_abb:N', 'group_var_val:N', 'proportion:Q']
).properties(
    title="Proportion of 98th Percentile Income Earners by State and Ethnic Group in 2019",
    width = 1000,
    height = 500
    )

In [None]:
inc = pd.read_csv('Data/IDDA/inc_share_all_data.csv')
inc.head()