# Setup

In [33]:
# Install microdf
!pip install git+https://github.com/PSLmodels/microdf.git
# update plotly
!pip install plotly --upgrade

!pip install git+http://github.com/ubicenter/ubicenter.py

Collecting git+https://github.com/PSLmodels/microdf.git
  Cloning https://github.com/PSLmodels/microdf.git to /tmp/pip-req-build-2wy5o9ho
  Running command git clone -q https://github.com/PSLmodels/microdf.git /tmp/pip-req-build-2wy5o9ho
Building wheels for collected packages: microdf
  Building wheel for microdf (setup.py) ... [?25l[?25hdone
  Created wheel for microdf: filename=microdf-0.3.0-cp37-none-any.whl size=26003 sha256=e427516d83bc46fbb6c2f983bca614586ab67cd82adb62f09cf976c266d422cc
  Stored in directory: /tmp/pip-ephem-wheel-cache-i7j0cuk9/wheels/3d/53/af/92e56f83db191b0579d21e8385d61a92a502e66443b23c7e16
Successfully built microdf
Requirement already up-to-date: plotly in /usr/local/lib/python3.7/dist-packages (5.1.0)
Collecting git+http://github.com/ubicenter/ubicenter.py
  Cloning http://github.com/ubicenter/ubicenter.py to /tmp/pip-req-build-yjmad7ys
  Running command git clone -q http://github.com/ubicenter/ubicenter.py /tmp/pip-req-build-yjmad7ys
Building wheels for 

In [34]:
# Import libraries
from sklearn.ensemble import RandomForestClassifier
import pandas as pd
import numpy as np
import microdf as mdf
import plotly.express as px
from ubicenter import format_fig

In [3]:
# Import data
# person = ASEC 2018, 2019, 2020
# voter = voter supplement 2018, 2020
person = pd.read_csv('https://github.com/ngpsu22/Winners/raw/main/cps_00051.csv.gz')
voter = pd.read_csv('https://github.com/ngpsu22/Winners/raw/main/cps_00052.csv.gz')

In [4]:
# Lower columns
person.columns = person.columns.str.lower()
voter.columns = voter.columns.str.lower()

# Create pred_vote

In [5]:
# Create Boolean voted column
voter["vote"] = voter.voted == 2

In [6]:
# Creating training dataframe as copy of voter supplement
train = voter.sample(frac=1.0).copy()
train["vote"] = train.voted == 2

In [7]:
train.columns

Index(['year', 'serial', 'month', 'hwtfinl', 'cpsid', 'statefip', 'county',
       'cbsasz', 'faminc', 'pernum', 'wtfinl', 'cpsidp', 'age', 'sex', 'race',
       'marst', 'citizen', 'hispan', 'empstat', 'educ', 'hourwage', 'voted',
       'vosuppwt', 'vote'],
      dtype='object')

In [8]:
# Train the data on the 11 key demographics
XCOLS = ['county','statefip', 'age', 'sex', 'race', 'marst', 'citizen', 'hispan', 'empstat', 'educ', 'faminc', 'hourwage', 'cbsasz' ]
rf = RandomForestClassifier().fit(train[XCOLS], train.vote, train.vosuppwt)

In [9]:
# Apply training results to ASEC to create a predicted vote score
preds = rf.predict_proba(person[XCOLS])
person["pred_vote"] = preds[:, 1]

In [10]:
# Set under 18 to 0, already 0 for non-citizen
person.loc[person.age < 18, "pred_vote"] = 0

In [11]:
# Display most relevant demographics
pd.Series(rf.feature_importances_, index=XCOLS).sort_values(ascending=False)

age         0.231026
educ        0.158574
statefip    0.112076
faminc      0.082822
citizen     0.073790
county      0.073499
empstat     0.068115
marst       0.061631
cbsasz      0.054417
race        0.024558
hispan      0.021759
hourwage    0.019145
sex         0.018589
dtype: float64

# Calculate percent Winners

Overall percent winners
Eligible voter percent winners
Predicted voter percent winners

In [12]:
# Setup fixes
person = person.rename(columns={'asecwt':'weight','statefip': 'state'})
person.adjginc.replace({99999999: 0},inplace=True)
person.weight /= 3
person.spmwt /= 3

In [13]:
# Check population estimate
population = person.weight.sum()
population / 1e6

324.26003528

In [14]:
person['child'] = person.age < 18
person['adult'] = person.age >=18
person['person'] = 1

In [15]:
# Change fip codes to state codes for mapping
person['state'] = person['state'].astype(str)
person['state'].replace({'1':'AL','2':'AK', '4': 'AZ','5':'AR',
                         '6': 'CA', '8': 'CO', '9': 'CT',
                         '10':'DE', '11': 'DC', '12':'FL',
                         '13': 'GA','15':'HI', '16':'ID','17':'IL',
                         '18':'IN', '19':'IA','20':'KS', '21': 'KY',
                         '22':'LA', '23': 'ME', '24': 'MD',
                         '25':'MA', '26':'MI', '27': 'MN',
                         '28':'MS','29':'MO', '30': 'MT',
                         '31': 'NE', '32':'NV', '33': 'NH',
                         '34': 'NJ', '35': 'NM', '36':'NY',
                         '37':'NC', '38':'ND', '39': 'OH',
                         '40':'OK', '41': 'OR', '42':'PA',
                         '44':'RI','45':'SC', '46':'SD',
                         '47': 'TN', '48':'TX','49':'UT','50':'VT',
                         '51':'VA', '53':'WA', '54':'WV',
                         '55':'WI', '56':'WY'},inplace=True)

In [16]:
# Create voting eligible boolean column
person['eligible_voter'] = (person.age > 17) & (person.citizen < 5)

In [17]:
# Calculate total childrem=n, adults, and AGI for each SPM unit
group = person.groupby(['spmfamunit','year'])[['child','adult', 'adjginc', 'eligible_voter']].sum()
group.columns = ['total_children', 'total_adults', 'total_family_income', 'total_eligible_voter']
person = person.merge(group,left_on=['spmfamunit', 'year'], right_index=True)
person['total_people'] = person.total_adults + person.total_children

In [18]:
# Create dataframe with aggregated spm unit data
PERSON_COLUMNS = ['year', 'age', 'adjginc', 'person']
SPMU_COLUMNS = ['spmwt', 'spmtotres', 'spmfamunit', 'spmthresh', 'state']

spmu = person.groupby(SPMU_COLUMNS)[PERSON_COLUMNS].sum().reset_index()
spmu.rename(columns={'person':'numper'}, inplace=True)

In [19]:
# Calculate total AGI
total_agi = (spmu.adjginc * spmu.spmwt).sum()
total_agi / 1e12

10.90195144163018

In [20]:
# Create a 10% flat tax and revenue neutral UBI
# Note the amount of the flat tax does not matter for percent winners for a static model

flat_tax = 0.1
revenue = flat_tax * total_agi
ubi = revenue / population
ubi

3362.101478899888

In [21]:
# Determine if each individual is a winner from the flat tax and UBI
person['new_tax'] = person.total_family_income * flat_tax
person['total_ubi'] = ubi * person.total_people
person['change'] = person.total_ubi - person.new_tax
person['winner'] = person.change > 0

In [22]:
# Calculate the amount of eligible voters
eligible_population = (person.weight * person.eligible_voter).sum()
eligible_population / 1e6

229.53209475000008

In [23]:
predicted_population = (person.weight * person.pred_vote).sum()
predicted_population / 1e6

136.80556608455268

In [24]:
# Determine if each individual is a winner from only giving UBI to eligible voters
ubi_ev = revenue / eligible_population
person['total_ev_ubi'] = ubi_ev * person.total_eligible_voter
person['change_ev'] = person.total_ev_ubi - person.new_tax
person['winner_ev'] = person.change_ev > 0

In [25]:
# Create function with three outputs for each state
# 1) Overall percent winners
# 2) Eligible voters percent winners
# 3) Predicted voters percent winners

def winners_state(state):

  if state == 'US':
    target_persons = person.copy(deep=True)
  else:
    target_persons = person[person.state==state].copy(deep=True)

  target_population = target_persons.weight.sum()
  target_eligible_population = (target_persons.weight * target_persons.eligible_voter).sum()
  target_predicted_population = (target_persons.weight * target_persons.pred_vote).sum()

  total_winners = (target_persons.winner * target_persons.weight).sum()
  percent_total_winners = ((total_winners / target_population) * 100).round(1)

  total_eligible_winners = (target_persons.winner * target_persons.eligible_voter * target_persons.weight).sum()
  percent_eligible_winners = ((total_eligible_winners / target_eligible_population) * 100).round(1)

  total_predicted_winners = ((target_persons.winner * target_persons.weight *
                              target_persons.pred_vote).sum())
  
  percent_predicted_voters = ((total_predicted_winners / target_predicted_population) * 100).round(1)

  # Calculate share of winners for UBI only given to adult citizens

  total_winners_ev = (target_persons.winner_ev * target_persons.weight).sum()
  percent_total_winners_ev = ((total_winners_ev / target_population) * 100).round(1)

  total_eligible_winners_ev = (target_persons.winner_ev * target_persons.eligible_voter * target_persons.weight).sum()
  percent_eligible_winners_ev = ((total_eligible_winners_ev / target_eligible_population) * 100).round(1)

  total_predicted_winners_ev = ((target_persons.winner_ev * target_persons.weight *
                              target_persons.pred_vote).sum())
  
  percent_predicted_voters_ev = ((total_predicted_winners_ev / target_predicted_population) * 100).round(1)

  return pd.Series([percent_total_winners, 
                    percent_eligible_winners, 
                    percent_predicted_voters,
                    percent_total_winners_ev, 
                    percent_eligible_winners_ev, 
                    percent_predicted_voters_ev])

In [26]:
def winners_state_row(row):
  return winners_state(row.state)

In [27]:
states = person.state.unique().tolist()
summary = mdf.cartesian_product({'state':['US'] + states})

In [28]:
summary[['overall', 'eligible_voters','predicted_voters', 'overall_ev', 'eligible_voters_ev', 'predicted_voters_ev']] = summary.apply(winners_state_row, axis=1)

  f"evaluating in Python space because the {repr(op_str)} "


In [29]:
summary['majority'] = summary.predicted_voters > 50

In [30]:
summary = summary.sort_values(by='predicted_voters',ascending=True)

In [37]:
fig = px.choropleth(summary, 
              locations = 'state',
              color="predicted_voters", 
              color_continuous_scale="PRGn",
              color_continuous_midpoint=50,
              locationmode='USA-states',
              scope="usa",
              title='Share of predicted voters who benefit from flat tax funded UBI',
              height=600,
              labels={'predicted_voters': "Percent Winners",
                      'state':'State',
                    }
             )

fig.update_layout(coloraxis_showscale=False)

format_fig(fig)