In [1]:

# if kernel crashes, make sure pywin32 and pipywin32 are installed. 
# Followed instructions here: https://github.com/jupyter/notebook/issues/4909 
import win32api
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import plotly.express as px
import dash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output
import dash_bootstrap_components as dbc
import microdf as mdf
import os
import us



In [2]:
# Import data from Ipums
person_raw = pd.read_csv("cps_00041.csv.gz")

# Create copy and lower column names
person = person_raw.copy(deep=True)
person.columns = person.columns.str.lower()
person.asecwt /= 3

# Crate booleans for demographics
person["adult"] = person.age > 17
person["child"] = person.age < 18

person["black"] = person.race == 200
person["white_non_hispanic"] = (person.race == 100) & (person.hispan == 0)
person["hispanic"] = (person.hispan > 1) & person.hispan < 700
person["pwd"] = person.diffany == 2
person["non_citizen"] = person.citizen == 5
person["non_citizen_child"] = (person.citizen == 5) & (person.age < 18)
person["non_citizen_adult"] = (person.citizen == 5) & (person.age > 17)

# Remove NIUs
person["taxinc"].replace({9999999: 0}, inplace=True)
person["adjginc"].replace({99999999: 0}, inplace=True)
person["incss"].replace({999999: 0}, inplace=True)
person["incssi"].replace({999999: 0}, inplace=True)
person["incunemp"].replace({99999: 0}, inplace=True)
person["incunemp"].replace({999999: 0}, inplace=True)
person["ctccrd"].replace({999999: 0}, inplace=True)
person["actccrd"].replace({99999: 0}, inplace=True)
person["eitcred"].replace({9999: 0}, inplace=True)
person["fica"].replace({99999: 0}, inplace=True)
person["fedtaxac"].replace({99999999: 0}, inplace=True)
person["stataxac"].replace({9999999: 0}, inplace=True)

# Change fip codes to state names
person["statefip"]=person["statefip"].astype(str)
person["statefip"]=person["statefip"].apply(lambda x: us.states.lookup(x))
person["statefip"]=person["statefip"].astype('str')

# Aggregate deductible and refundable child tax credits
person["ctc"] = person.ctccrd + person.actccrd

# Calculate the number of people per smp unit
person["person"] = 1
spm = person.groupby(["spmfamunit", "year"])[["person"]].sum()
spm.columns = ["numper"]
person = person.merge(spm, left_on=["spmfamunit", "year"], right_index=True)

person["weighted_state_tax"] = person.asecwt * person.stataxac
person["weighted_agi"] = person.asecwt * person.adjginc

# Calculate the total taxable income and total people in each state
state_groups_taxinc = person.groupby(["statefip"])[
    ["weighted_state_tax", "weighted_agi"]
].sum()
state_groups_taxinc.columns = ["state_tax_revenue", "state_taxable_income"]
person = person.merge(
    state_groups_taxinc, left_on=["statefip"], right_index=True
)

person.head().T

Unnamed: 0,0,1,2,3,4
year,2018,2018,2018,2018,2018
statefip,Maine,Maine,Maine,Maine,Maine
asecwt,438.673333,274.763333,274.763333,243.89,368.823333
age,73,48,55,12,6
race,100,100,100,100,200
citizen,1,1,1,1,1
hispan,0,0,0,0,0
diffany,2,1,1,0,0
incss,6000,0,0,0,0
incssi,0,0,0,0,0


In [3]:
# Create dataframe with aggregated spm unit data
PERSON_COLUMNS = [
    "adjginc",
    "fica",
    "fedtaxac",
    "ctc",
    "incssi",
    "incunemp",
    "eitcred",
    "child",
    "adult",
    "non_citizen",
    "non_citizen_child",
    "non_citizen_adult",
    "person",
    "stataxac",
]
SPMU_COLUMNS = [
    "spmheat",
    "spmsnap",
    "spmfamunit",
    "spmthresh",
    "spmtotres",
    "spmwt",
    "year",
    "statefip",
    "state_tax_revenue",
    "state_taxable_income",
]

spmu = person.groupby(SPMU_COLUMNS, observed=False)[PERSON_COLUMNS].sum().reset_index()
spmu[["fica", "fedtaxac", "stataxac"]] *= -1
spmu.rename(columns={"person": "numper"}, inplace=True)

spmu.spmwt /= 3

spmu.head()

Unnamed: 0,spmheat,spmsnap,spmfamunit,spmthresh,spmtotres,spmwt,year,statefip,state_tax_revenue,state_taxable_income,...,incssi,incunemp,eitcred,child,adult,non_citizen,non_citizen_child,non_citizen_adult,numper,stataxac
0,0.0,0,1001,14700.0,86459.0,517.633333,2020,Maine,1526793000.0,40179000000.0,...,0,0,0,0,2,0,0,0,2,-4624
1,0.0,0,2001,11004.31484,47514.0,438.673333,2018,Maine,1526793000.0,40179000000.0,...,0,0,0,0,1,0,0,0,1,0
2,0.0,0,2001,14700.0,55275.0,330.163333,2020,Maine,1526793000.0,40179000000.0,...,0,0,0,0,2,0,0,0,2,0
3,0.0,0,3001,11920.0,28653.0,501.756667,2020,Maine,1526793000.0,40179000000.0,...,0,0,0,0,1,0,0,0,1,-1152
4,0.0,0,4001,26449.08415,105232.008,274.763333,2018,Maine,1526793000.0,40179000000.0,...,0,0,0,3,2,0,0,0,5,-4091


In [5]:
spmu.head().T

Unnamed: 0,0,1,2,3,4
spmheat,0.0,0.0,0.0,0.0,0.0
spmsnap,0,0,0,0,0
spmfamunit,1001,2001,2001,3001,4001
spmthresh,14700.0,11004.31484,14700.0,11920.0,26449.08415
spmtotres,86459.0,47514.0,55275.0,28653.0,105232.008
spmwt,517.633333,438.673333,330.163333,501.756667,274.763333
year,2020,2018,2020,2020,2018
statefip,Maine,Maine,Maine,Maine,Maine
state_tax_revenue,1526793245.48,1526793245.48,1526793245.48,1526793245.48,1526793245.48
state_taxable_income,40179001414.286758,40179001414.286758,40179001414.286758,40179001414.286758,40179001414.286758


# calculate US stuff first

In [6]:
person.head().T

Unnamed: 0,0,1,2,3,4
year,2018,2018,2018,2018,2018
statefip,Maine,Maine,Maine,Maine,Maine
asecwt,438.673333,274.763333,274.763333,243.89,368.823333
age,73,48,55,12,6
race,100,100,100,100,200
citizen,1,1,1,1,1
hispan,0,0,0,0,0
diffany,2,1,1,0,0
incss,6000,0,0,0,0
incssi,0,0,0,0,0


In [10]:
population = person.asecwt.sum()
adult_pop = (person.asecwt*person.adult).sum()
child_pop = (person.asecwt*person.child).sum()
black_pop = (person.asecwt*person.black).sum() 
white_non_hispanic_pop = (person.asecwt*person.white_non_hispanic).sum() 
hispanic_pop = (person.asecwt*person.hispanic).sum() 
pwd_pop = (person.asecwt*person.pwd).sum() 
non_citizen_pop = (person.asecwt*person.non_citizen).sum() 
non_citizen_adult_pop = (person.asecwt*person.non_citizen_adult).sum() 
non_citizen_child_pop = (person.asecwt*person.non_citizen_child).sum() 


