The idea behind this notebook was to run a variety of EI on the data that we got from er total run. In particular, we are going to be making the following tables:
1. $2\times 2$ ER and EI -- (H,$SOR_O$)
2. $2\times 2$ ER and EI -- (Brazilian, $SOR_O$)
3. $2\times 2$ ER and EI -- (Guyanese, $SOR_O$)
4. $2\times 2$ ER and EI -- (Cabo Verdean, $SOR_O$)
5. $2\times 2$ ER and EI -- (Belizean, $SOR_O$)
6. $2 \times 4$ ER and EI -- (Brazilian, $SOR_O$, $W_0$, $B_0$)
7. $2\times 2$ ER and EI -- (Guyanese, H)
8. $2\times 2$ ER and EI -- (Cabo Verdean, $B_0$)
9. $2\times 2$ ER and EI -- (Belizean, H)
10. $2\times 2$ ER and EI -- (MENAW, $SOR_O$)
11. $2\times 2$ ER and EI -- (MENAH, $SOR_O$)
12. $2\times 2$ ER and EI -- (MENAS, $SOR_O$)
13. $2\times 2$ ER and EI -- (MENAW, $W_0$)
14. $2\times 2$ ER and EI -- (MENAH, $W_0$)
15. $2\times 2$ ER and EI -- (MENAS, $W_0$)

In [46]:
import json

import geopandas as gpd
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
import requests
import us

from census import Census
from shapely.geometry import Point
import numpy as np
import pandas as pd

from pyei.two_by_two import TwoByTwoEI
from pyei.goodmans_er import GoodmansER
from pyei.goodmans_er import GoodmansERBayes
from pyei.r_by_c import RowByColumnEI


Read in the dataframes

In [65]:

states_fips = [
    "06", # California
    "25", # Massachusetts
    "26", # Michigan
    "36", # New York
    "48" # Texas
] #these are the states whose files we got in ertotalrun.ipynb

state_dfs = ['0']*len(states_fips)

for state in states_fips:
    df = pd.read_csv('ecologicalcsvs/' + state + 'ecological_acs_pl.csv')
    state_dfs[i] = pd.read_csv('ecologicalcsvs/' + state + 'ecological_acs_pl.csv')

for i in range(len(state_dfs)):
    state_dfs[i] = state_dfs[i].fillna(0, inplace = True) #the EI won't run otherwise


06
25
26
36
48


AttributeError: 'str' object has no attribute 'fillna'

In [62]:
print(state_dfs)

['0', '0', '0', '0',       Unnamed: 0  totpop_PL  hispanic_PL  white_PL  black_PL  sor_PL  state  \
0              0       4641          310      4311       315     121     48   
1              1       5782         1610      5763      2174    1539     48   
2              2       7115         2002      7109      2924    1922     48   
3              3       4479         1631      3935       807     835     48   
4              4       6036         1369      5506      1620     690     48   
...          ...        ...          ...       ...       ...     ...    ...   
6891        6891       2567         2442      1561        10     374     48   
6892        6892        855          731       556         0      79     48   
6893        6893       1483         1288       788        12     199     48   
6894        6894       1744         1581      1257        16     286     48   
6895        6895       5588         5344      3743        61     999     48   

      county   tract        GE

In [22]:
def two_by_two(df, race, ancestry_or_ethnicity, total_col):
    candidate_name_2by2 = race
    demographic_group_name_2by2 = ancestry_or_ethnicity
    precinct_names = df['tract'].astype(str)
    group_fraction_2by2 = np.array(df[ancestry_or_ethnicity]) # Change this AND candidate name below
    votes_fraction_2by2 = np.array(df[race]) #Change this AND group name below
    precinct_pops = np.array(df[total_col]).astype(int)
    ei_2by2 = TwoByTwoEI(model_name="king99_pareto_modification", pareto_scale=15, pareto_shape=2)
    ei_2by2.fit(group_fraction_2by2,
          votes_fraction_2by2,
          precinct_pops,
          demographic_group_name=demographic_group_name_2by2,
          candidate_name=candidate_name_2by2,
          precinct_names=precinct_names, # omit this line if you don't have or don't want to use precinct names
          chains=4
    )
    # Generate a simple report to summarize the results
    print(ei_2by2.summary())
    return ei_2by2

In [24]:
def plot_ei(ei_2by2):

    fig, ax = plt.subplots(2)

    ei_2by2.plot(axes=ax)

    ax[0].set_xlim(-0.1,1.01)
    ax[1].set_xlim(-0.1, 1.01)

In [26]:
def er(group_fraction_2by2, votes_fraction_2by2, precinct_pops):
    goodmans_er = GoodmansER(is_weighted_regression="True")

    goodmans_er.fit(group_fraction_2by2,
        votes_fraction_2by2,
        precinct_pops, # Must include populations if weighting by population
        demographic_group_name=demographic_group_name_2by2,
        candidate_name=candidate_name_2by2
    )
    print(goodmans_er.summary())
    goodmans_er.plot()

# 2x2

## Tracts, California

In [None]:


# (H, SOR_0)
ca_sor_hispanic = two_by_two(state_dfs[0], 'sor_PL_pct', 'hispanic_PL_pct', 'totpop_PL')
plot_ei(ca_sor_hispanic)
er(np.array(state_dfs[0]['hispanic_PL_pct']), np.array(state_dfs[0]['sor_PL_pct']), np.array(state_dfs[0]['totpop_PL']).astype(int))

# (Brazilian, SOR_0)
ca_sor_brazil = two_by_two(state_dfs[0], 'sor_ACS_pct', 'brazilian_ACS_pct', 'totpop_ACS')
plot_ei(ca_sor_brazil)
er(np.array(state_dfs[0]['brazilian_ACS_pct']), np.array(state_dfs[0]['sor_ACS_pct']), np.array(state_dfs[0]['totpop_ACS']).astype(int))

# (Guyanese, SOR_0)
pop = 'guyanese_ACS_pct'
race = 'sor_ACS_pct'
total_pop = 'totpop_ACS'
ca_sor_guy = two_by_two(state_dfs[0], race, pop, total_pop)
plot_ei(ca_sor_guy)
er(np.array(state_dfs[0][pop]), np.array(state_dfs[0][race]), np.array(state_dfs[0][total_pop]).astype(int))

# (Cabo Verdean, SOR_0)
pop = 'cabo_verdean_ACS_pct'
race = 'sor_ACS_pct'
total_pop = 'totpop_ACS'
ca_sor_cabo = two_by_two(state_dfs[0], race, pop, total_pop)
plot_ei(ca_sor_cabo)
er(np.array(state_dfs[0][pop]), np.array(state_dfs[0][race]), np.array(state_dfs[0][total_pop]).astype(int))

# (Belizean, SOR_0)
pop = 'belizean_ACS_pct'
race = 'sor_ACS_pct'
total_pop = 'totpop_ACS'
ca_sor_bel = two_by_two(state_dfs[0], race, pop, total_pop)
plot_ei(ca_sor_bel)
er(np.array(state_dfs[0][pop]), np.array(state_dfs[0][race]), np.array(state_dfs[0][total_pop]).astype(int))

# (Brazilian, H)
pop = 'brazilian_ACS_pct'
race = 'hispanic_PL_pct'
total_pop = 'totpop_PL'
ca_hispanic_brazil = two_by_two(state_dfs[0], race, pop, total_pop)
plot_ei(ca_hispanic_brazil)
er(np.array(state_dfs[0][pop]), np.array(state_dfs[0][race]), np.array(state_dfs[0][total_pop]).astype(int))

# (Guyanese, H)
pop = 'guyanese_ACS_pct'
race = 'hispanic_PL_pct'
total_pop = 'totpop_PL'
ca_hispanic_guy = two_by_two(state_dfs[0], race, pop, total_pop)
plot_ei(ca_hispanic_guy)
er(np.array(state_dfs[0][pop]), np.array(state_dfs[0][race]), np.array(state_dfs[0][total_pop]).astype(int))

# (Cabo Verde, B_0)
pop = 'cabo_verdean_ACS_pct'
race = 'black_PL_pct'
total_pop = 'totpop_PL'
ca_black_cabo = two_by_two(state_dfs[0], race, pop, total_pop)
plot_ei(ca_black_cabo)
er(np.array(state_dfs[0][pop]), np.array(state_dfs[0][race]), np.array(state_dfs[0][total_pop]).astype(int))

# (Belizean, H)
pop = 'belizean_ACS_pct'
race = 'hispanic_PL_pct'
total_pop = 'totpop_PL'
ca_hispanic_belize = two_by_two(state_dfs[0], race, pop, total_pop)
plot_ei(ca_hispanic_belize)
er(np.array(state_dfs[0][pop]), np.array(state_dfs[0][race]), np.array(state_dfs[0][total_pop]).astype(int))

# (Mena_WB, SOR_0)
pop = 'mena_world_bank_ACS_pct'
race = 'sor_ACS_pct'
total_pop = 'totpop_ACS'
ca_sor_world_bank = two_by_two(state_dfs[0], race, pop, total_pop)
plot_ei(ca_sor_world_bank)
er(np.array(state_dfs[0][pop]), np.array(state_dfs[0][race]), np.array(state_dfs[0][total_pop]).astype(int))

# (Mena_UNHC, SOR_0)
pop = 'mena_unhcr_ACS_pct'
race = 'sor_ACS_pct'
total_pop = 'totpop_ACS'
ca_sor_unhcr = two_by_two(state_dfs[0], race, pop, total_pop)
plot_ei(ca_sor_unhcr)
er(np.array(state_dfs[0][pop]), np.array(state_dfs[0][race]), np.array(state_dfs[0][total_pop]).astype(int))

# (Mena_UNSC, SOR_0)
pop = 'mena_unsd_ACS_pct'
race = 'sor_ACS_pct'
total_pop = 'totpop_ACS'
ca_sor_unsd = two_by_two(state_dfs[0], race, pop, total_pop)
plot_ei(ca_sor_unsd)
er(np.array(state_dfs[0][pop]), np.array(state_dfs[0][race]), np.array(state_dfs[0][total_pop]).astype(int))

# (Mena_WB, W_0)
pop = 'mena_world_bank_ACS_pct'
race = 'white_PL_pct'
total_pop = 'totpop_PL'
ca_w_world_bank = two_by_two(state_dfs[0], race, pop, total_pop)
plot_ei(ca_w_world_bank)
er(np.array(state_dfs[0][pop]), np.array(state_dfs[0][race]), np.array(state_dfs[0][total_pop]).astype(int))

# (Mena_UNHC, W_0)
pop = 'mena_unhcr_ACS_pct'
race = 'white_PL_pct'
total_pop = 'totpop_PL'
ca_w_unhcr = two_by_two(state_dfs[0], race, pop, total_pop)
plot_ei(ca_w_unhcr)
er(np.array(state_dfs[0][pop]), np.array(state_dfs[0][race]), np.array(state_dfs[0][total_pop]).astype(int))

# (Mena_UNSC, SOR_0)
pop = 'mena_unsd_ACS_pct'
race = 'white_PL_pct'
total_pop = 'totpop_PL'
ca_w_unsd = two_by_two(state_dfs[0], race, pop, total_pop)
plot_ei(ca_w_unsd)
er(np.array(state_dfs[0][pop]), np.array(state_dfs[0][race]), np.array(state_dfs[0][total_pop]).astype(int))

## Tracts, Mass

In [None]:


# (H, SOR_0)
ma_sor_hispanic = two_by_two(state_dfs[1], 'sor_PL_pct', 'hispanic_PL_pct', 'totpop_PL')
plot_ei(ma_sor_hispanic)
er(np.array(state_dfs[1]['hispanic_PL_pct']), np.array(state_dfs[1]['sor_PL_pct']), np.array(state_dfs[1]['totpop_PL']).astype(int))

# (Brazilian, SOR_0)
ma_sor_brazil = two_by_two(state_dfs[1], 'sor_ACS_pct', 'brazilian_ACS_pct', 'totpop_ACS')
plot_ei(ma_sor_brazil)
er(np.array(state_dfs[1]['brazilian_ACS_pct']), np.array(state_dfs[1]['sor_ACS_pct']), np.array(state_dfs[1]['totpop_ACS']).astype(int))

# (Guyanese, SOR_0)
pop = 'guyanese_ACS_pct'
race = 'sor_ACS_pct'
total_pop = 'totpop_ACS'
ma_sor_guy = two_by_two(state_dfs[1], race, pop, total_pop)
plot_ei(ma_sor_guy)
er(np.array(state_dfs[1][pop]), np.array(state_dfs[1][race]), np.array(state_dfs[1][total_pop]).astype(int))

# (Cabo Verdean, SOR_0)
pop = 'cabo_verdean_ACS_pct'
race = 'sor_ACS_pct'
total_pop = 'totpop_ACS'
ma_sor_cabo = two_by_two(state_dfs[1], race, pop, total_pop)
plot_ei(ma_sor_cabo)
er(np.array(state_dfs[1][pop]), np.array(state_dfs[1][race]), np.array(state_dfs[1][total_pop]).astype(int))

# (Belizean, SOR_0)
pop = 'belizean_ACS_pct'
race = 'sor_ACS_pct'
total_pop = 'totpop_ACS'
ma_sor_bel = two_by_two(state_dfs[1], race, pop, total_pop)
plot_ei(ma_sor_bel)
er(np.array(state_dfs[1][pop]), np.array(state_dfs[1][race]), np.array(state_dfs[1][total_pop]).astype(int))

# (Brazilian, H)
pop = 'brazilian_ACS_pct'
race = 'hispanic_PL_pct'
total_pop = 'totpop_PL'
ma_hispanic_brazil = two_by_two(state_dfs[1], race, pop, total_pop)
plot_ei(ma_hispanic_brazil)
er(np.array(state_dfs[1][pop]), np.array(state_dfs[1][race]), np.array(state_dfs[1][total_pop]).astype(int))

# (Guyanese, H)
pop = 'guyanese_ACS_pct'
race = 'hispanic_PL_pct'
total_pop = 'totpop_PL'
ma_hispanic_guy = two_by_two(state_dfs[1], race, pop, total_pop)
plot_ei(ma_hispanic_guy)
er(np.array(state_dfs[1][pop]), np.array(state_dfs[1][race]), np.array(state_dfs[1][total_pop]).astype(int))

# (Cabo Verde, B_0)
pop = 'cabo_verdean_ACS_pct'
race = 'black_PL_pct'
total_pop = 'totpop_PL'
ma_black_cabo = two_by_two(state_dfs[1], race, pop, total_pop)
plot_ei(ma_black_cabo)
er(np.array(state_dfs[1][pop]), np.array(state_dfs[1][race]), np.array(state_dfs[1][total_pop]).astype(int))

# (Belizean, H)
pop = 'belizean_ACS_pct'
race = 'hispanic_PL_pct'
total_pop = 'totpop_PL'
ma_hispanic_belize = two_by_two(state_dfs[1], race, pop, total_pop)
plot_ei(ma_hispanic_belize)
er(np.array(state_dfs[1][pop]), np.array(state_dfs[1][race]), np.array(state_dfs[1][total_pop]).astype(int))

# (Mena_WB, SOR_0)
pop = 'mena_world_bank_ACS_pct'
race = 'sor_ACS_pct'
total_pop = 'totpop_ACS'
ma_sor_world_bank = two_by_two(state_dfs[1], race, pop, total_pop)
plot_ei(ma_sor_world_bank)
er(np.array(state_dfs[1][pop]), np.array(state_dfs[1][race]), np.array(state_dfs[1][total_pop]).astype(int))

# (Mena_UNHC, SOR_0)
pop = 'mena_unhcr_ACS_pct'
race = 'sor_ACS_pct'
total_pop = 'totpop_ACS'
ma_sor_unhcr = two_by_two(state_dfs[1], race, pop, total_pop)
plot_ei(ma_sor_unhcr)
er(np.array(state_dfs[1][pop]), np.array(state_dfs[1][race]), np.array(state_dfs[1][total_pop]).astype(int))

# (Mena_UNSC, SOR_0)
pop = 'mena_unsd_ACS_pct'
race = 'sor_ACS_pct'
total_pop = 'totpop_ACS'
ma_sor_unsd = two_by_two(state_dfs[1], race, pop, total_pop)
plot_ei(ma_sor_unsd)
er(np.array(state_dfs[1][pop]), np.array(state_dfs[1][race]), np.array(state_dfs[1][total_pop]).astype(int))

# (Mena_WB, W_0)
pop = 'mena_world_bank_ACS_pct'
race = 'white_PL_pct'
total_pop = 'totpop_PL'
ma_w_world_bank = two_by_two(state_dfs[1], race, pop, total_pop)
plot_ei(ma_w_world_bank)
er(np.array(state_dfs[1][pop]), np.array(state_dfs[1][race]), np.array(state_dfs[1][total_pop]).astype(int))

# (Mena_UNHC, W_0)
pop = 'mena_unhcr_ACS_pct'
race = 'white_PL_pct'
total_pop = 'totpop_PL'
ma_w_unhcr = two_by_two(state_dfs[1], race, pop, total_pop)
plot_ei(ma_w_unhcr)
er(np.array(state_dfs[1][pop]), np.array(state_dfs[1][race]), np.array(state_dfs[1][total_pop]).astype(int))

# (Mena_UNSC, W_0)
pop = 'mena_unsd_ACS_pct'
race = 'white_PL_pct'
total_pop = 'totpop_PL'
ma_w_unsd = two_by_two(state_dfs[1], race, pop, total_pop)
plot_ei(ma_w_unsd)
er(np.array(state_dfs[1][pop]), np.array(state_dfs[1][race]), np.array(state_dfs[1][total_pop]).astype(int))

## Tracts, Michigan

In [None]:


# (H, SOR_0)
mi_sor_hispanic = two_by_two(state_dfs[2], 'sor_PL_pct', 'hispanic_PL_pct', 'totpop_PL')
plot_ei(mi_sor_hispanic)
er(np.array(state_dfs[2]['hispanic_PL_pct']), np.array(state_dfs[2]['sor_PL_pct']), np.array(state_dfs[2]['totpop_PL']).astype(int))

# (Brazilian, SOR_0)
mi_sor_brazil = two_by_two(state_dfs[2], 'sor_ACS_pct', 'brazilian_ACS_pct', 'totpop_ACS')
plot_ei(mi_sor_brazil)
er(np.array(state_dfs[2]['brazilian_ACS_pct']), np.array(state_dfs[2]['sor_ACS_pct']), np.array(state_dfs[2]['totpop_ACS']).astype(int))

# (Guyanese, SOR_0)
pop = 'guyanese_ACS_pct'
race = 'sor_ACS_pct'
total_pop = 'totpop_ACS'
mi_sor_guy = two_by_two(state_dfs[2], race, pop, total_pop)
plot_ei(mi_sor_guy)
er(np.array(state_dfs[2][pop]), np.array(state_dfs[2][race]), np.array(state_dfs[2][total_pop]).astype(int))

# (Cabo Verdean, SOR_0)
pop = 'cabo_verdean_ACS_pct'
race = 'sor_ACS_pct'
total_pop = 'totpop_ACS'
mi_sor_cabo = two_by_two(state_dfs[2], race, pop, total_pop)
plot_ei(mi_sor_cabo)
er(np.array(state_dfs[2][pop]), np.array(state_dfs[2][race]), np.array(state_dfs[2][total_pop]).astype(int))

# (Belizean, SOR_0)
pop = 'belizean_ACS_pct'
race = 'sor_ACS_pct'
total_pop = 'totpop_ACS'
mi_sor_bel = two_by_two(state_dfs[2], race, pop, total_pop)
plot_ei(mi_sor_bel)
er(np.array(state_dfs[2][pop]), np.array(state_dfs[2][race]), np.array(state_dfs[2][total_pop]).astype(int))

# (Brazilian, H)
pop = 'brazilian_ACS_pct'
race = 'hispanic_PL_pct'
total_pop = 'totpop_PL'
mi_hispanic_brazil = two_by_two(state_dfs[2], race, pop, total_pop)
plot_ei(mi_hispanic_brazil)
er(np.array(state_dfs[2][pop]), np.array(state_dfs[2][race]), np.array(state_dfs[2][total_pop]).astype(int))

# (Guyanese, H)
pop = 'guyanese_ACS_pct'
race = 'hispanic_PL_pct'
total_pop = 'totpop_PL'
mi_hispanic_guy = two_by_two(state_dfs[2], race, pop, total_pop)
plot_ei(mi_hispanic_guy)
er(np.array(state_dfs[2][pop]), np.array(state_dfs[2][race]), np.array(state_dfs[2][total_pop]).astype(int))

# (Cabo Verde, B_0)
pop = 'cabo_verdean_ACS_pct'
race = 'black_PL_pct'
total_pop = 'totpop_PL'
mi_black_cabo = two_by_two(state_dfs[2], race, pop, total_pop)
plot_ei(mi_black_cabo)
er(np.array(state_dfs[2][pop]), np.array(state_dfs[2][race]), np.array(state_dfs[2][total_pop]).astype(int))

# (Belizean, H)
pop = 'belizean_ACS_pct'
race = 'hispanic_PL_pct'
total_pop = 'totpop_PL'
mi_hispanic_belize = two_by_two(state_dfs[2], race, pop, total_pop)
plot_ei(mi_hispanic_belize)
er(np.array(state_dfs[2][pop]), np.array(state_dfs[2][race]), np.array(state_dfs[2][total_pop]).astype(int))

# (Mena_WB, SOR_0)
pop = 'mena_world_bank_ACS_pct'
race = 'sor_ACS_pct'
total_pop = 'totpop_ACS'
mi_sor_world_bank = two_by_two(state_dfs[2], race, pop, total_pop)
plot_ei(mi_sor_world_bank)
er(np.array(state_dfs[2][pop]), np.array(state_dfs[2][race]), np.array(state_dfs[2][total_pop]).astype(int))

# (Mena_UNHC, SOR_0)
pop = 'mena_unhcr_ACS_pct'
race = 'sor_ACS_pct'
total_pop = 'totpop_ACS'
mi_sor_unhcr = two_by_two(state_dfs[2], race, pop, total_pop)
plot_ei(mi_sor_unhcr)
er(np.array(state_dfs[2][pop]), np.array(state_dfs[2][race]), np.array(state_dfs[2][total_pop]).astype(int))

# (Mena_UNSC, SOR_0)
pop = 'mena_unsd_ACS_pct'
race = 'sor_ACS_pct'
total_pop = 'totpop_ACS'
mi_sor_unsd = two_by_two(state_dfs[2], race, pop, total_pop)
plot_ei(mi_sor_unsd)
er(np.array(state_dfs[2][pop]), np.array(state_dfs[2][race]), np.array(state_dfs[2][total_pop]).astype(int))

# (Mena_WB, W_0)
pop = 'mena_world_bank_ACS_pct'
race = 'white_PL_pct'
total_pop = 'totpop_PL'
mi_w_world_bank = two_by_two(state_dfs[2], race, pop, total_pop)
plot_ei(mi_w_world_bank)
er(np.array(state_dfs[2][pop]), np.array(state_dfs[2][race]), np.array(state_dfs[2][total_pop]).astype(int))

# (Mena_UNHC, W_0)
pop = 'mena_unhcr_ACS_pct'
race = 'white_PL_pct'
total_pop = 'totpop_PL'
mi_w_unhcr = two_by_two(state_dfs[2], race, pop, total_pop)
plot_ei(mi_w_unhcr)
er(np.array(state_dfs[2][pop]), np.array(state_dfs[2][race]), np.array(state_dfs[2][total_pop]).astype(int))

# (Mena_UNSC, W_0)
pop = 'mena_unsd_ACS_pct'
race = 'white_PL_pct'
total_pop = 'totpop_PL'
mi_w_unsd = two_by_two(state_dfs[2], race, pop, total_pop)
plot_ei(mi_w_unsd)
er(np.array(state_dfs[2][pop]), np.array(state_dfs[2][race]), np.array(state_dfs[2][total_pop]).astype(int))

## Tracts, New York

In [None]:


# (H, SOR_0)
ny_sor_hispanic = two_by_two(state_dfs[3], 'sor_PL_pct', 'hispanic_PL_pct', 'totpop_PL')
plot_ei(ny_sor_hispanic)
er(np.array(state_dfs[3]['hispanic_PL_pct']), np.array(state_dfs[3]['sor_PL_pct']), np.array(state_dfs[3]['totpop_PL']).astype(int))

# (Brazilian, SOR_0)
ny_sor_brazil = two_by_two(state_dfs[3], 'sor_ACS_pct', 'brazilian_ACS_pct', 'totpop_ACS')
plot_ei(ny_sor_brazil)
er(np.array(state_dfs[3]['brazilian_ACS_pct']), np.array(state_dfs[3]['sor_ACS_pct']), np.array(state_dfs[3]['totpop_ACS']).astype(int))

# (Guyanese, SOR_0)
pop = 'guyanese_ACS_pct'
race = 'sor_ACS_pct'
total_pop = 'totpop_ACS'
ny_sor_guy = two_by_two(state_dfs[3], race, pop, total_pop)
plot_ei(ny_sor_guy)
er(np.array(state_dfs[3][pop]), np.array(state_dfs[3][race]), np.array(state_dfs[3][total_pop]).astype(int))

# (Cabo Verdean, SOR_0)
pop = 'cabo_verdean_ACS_pct'
race = 'sor_ACS_pct'
total_pop = 'totpop_ACS'
ny_sor_cabo = two_by_two(state_dfs[3], race, pop, total_pop)
plot_ei(ny_sor_cabo)
er(np.array(state_dfs[3][pop]), np.array(state_dfs[3][race]), np.array(state_dfs[3][total_pop]).astype(int))

# (Belizean, SOR_0)
pop = 'belizean_ACS_pct'
race = 'sor_ACS_pct'
total_pop = 'totpop_ACS'
ny_sor_bel = two_by_two(state_dfs[3], race, pop, total_pop)
plot_ei(ny_sor_bel)
er(np.array(state_dfs[3][pop]), np.array(state_dfs[3][race]), np.array(state_dfs[3][total_pop]).astype(int))

# (Brazilian, H)
pop = 'brazilian_ACS_pct'
race = 'hispanic_PL_pct'
total_pop = 'totpop_PL'
ny_hispanic_brazil = two_by_two(state_dfs[3], race, pop, total_pop)
plot_ei(ny_hispanic_brazil)
er(np.array(state_dfs[3][pop]), np.array(state_dfs[3][race]), np.array(state_dfs[3][total_pop]).astype(int))

# (Guyanese, H)
pop = 'guyanese_ACS_pct'
race = 'hispanic_PL_pct'
total_pop = 'totpop_PL'
ny_hispanic_guy = two_by_two(state_dfs[3], race, pop, total_pop)
plot_ei(ny_hispanic_guy)
er(np.array(state_dfs[3][pop]), np.array(state_dfs[3][race]), np.array(state_dfs[3][total_pop]).astype(int))

# (Cabo Verde, B_0)
pop = 'cabo_verdean_ACS_pct'
race = 'black_PL_pct'
total_pop = 'totpop_PL'
ny_black_cabo = two_by_two(state_dfs[3], race, pop, total_pop)
plot_ei(ny_black_cabo)
er(np.array(state_dfs[3][pop]), np.array(state_dfs[3][race]), np.array(state_dfs[3][total_pop]).astype(int))

# (Belizean, H)
pop = 'belizean_ACS_pct'
race = 'hispanic_PL_pct'
total_pop = 'totpop_PL'
ny_hispanic_belize = two_by_two(state_dfs[3], race, pop, total_pop)
plot_ei(ny_hispanic_belize)
er(np.array(state_dfs[3][pop]), np.array(state_dfs[3][race]), np.array(state_dfs[3][total_pop]).astype(int))

# (Mena_WB, SOR_0)
pop = 'mena_world_bank_ACS_pct'
race = 'sor_ACS_pct'
total_pop = 'totpop_ACS'
ny_sor_world_bank = two_by_two(state_dfs[3], race, pop, total_pop)
plot_ei(ny_sor_world_bank)
er(np.array(state_dfs[3][pop]), np.array(state_dfs[3][race]), np.array(state_dfs[3][total_pop]).astype(int))

# (Mena_UNHC, SOR_0)
pop = 'mena_unhcr_ACS_pct'
race = 'sor_ACS_pct'
total_pop = 'totpop_ACS'
ny_sor_unhcr = two_by_two(state_dfs[3], race, pop, total_pop)
plot_ei(ny_sor_unhcr)
er(np.array(state_dfs[3][pop]), np.array(state_dfs[3][race]), np.array(state_dfs[3][total_pop]).astype(int))

# (Mena_UNSC, SOR_0)
pop = 'mena_unsd_ACS_pct'
race = 'sor_ACS_pct'
total_pop = 'totpop_ACS'
ny_sor_unsd = two_by_two(state_dfs[3], race, pop, total_pop)
plot_ei(ny_sor_unsd)
er(np.array(state_dfs[3][pop]), np.array(state_dfs[3][race]), np.array(state_dfs[3][total_pop]).astype(int))

# (Mena_WB, W_0)
pop = 'mena_world_bank_ACS_pct'
race = 'white_PL_pct'
total_pop = 'totpop_PL'
ny_w_world_bank = two_by_two(state_dfs[3], race, pop, total_pop)
plot_ei(ny_w_world_bank)
er(np.array(state_dfs[3][pop]), np.array(state_dfs[3][race]), np.array(state_dfs[3][total_pop]).astype(int))

# (Mena_UNHC, W_0)
pop = 'mena_unhcr_ACS_pct'
race = 'white_PL_pct'
total_pop = 'totpop_PL'
ny_w_unhcr = two_by_two(state_dfs[3], race, pop, total_pop)
plot_ei(ny_w_unhcr)
er(np.array(state_dfs[3][pop]), np.array(state_dfs[3][race]), np.array(state_dfs[3][total_pop]).astype(int))

# (Mena_UNSC, W_0)
pop = 'mena_unsd_ACS_pct'
race = 'white_PL_pct'
total_pop = 'totpop_PL'
ny_w_unsd = two_by_two(state_dfs[3], race, pop, total_pop)
plot_ei(ny_w_unsd)
er(np.array(state_dfs[3][pop]), np.array(state_dfs[3][race]), np.array(state_dfs[3][total_pop]).astype(int))

## Tracts, Texas

In [None]:


# (H, SOR_0)
tx_sor_hispanic = two_by_two(state_dfs[4], 'sor_PL_pct', 'hispanic_PL_pct', 'totpop_PL')
plot_ei(tx_sor_hispanic)
er(np.array(state_dfs[4]['hispanic_PL_pct']), np.array(state_dfs[4]['sor_PL_pct']), np.array(state_dfs[4]['totpop_PL']).astype(int))

# (Brazilian, SOR_0)
tx_sor_brazil = two_by_two(state_dfs[4], 'sor_ACS_pct', 'brazilian_ACS_pct', 'totpop_ACS')
plot_ei(tx_sor_brazil)
er(np.array(state_dfs[4]['brazilian_ACS_pct']), np.array(state_dfs[4]['sor_ACS_pct']), np.array(state_dfs[4]['totpop_ACS']).astype(int))

# (Guyanese, SOR_0)
pop = 'guyanese_ACS_pct'
race = 'sor_ACS_pct'
total_pop = 'totpop_ACS'
tx_sor_guy = two_by_two(state_dfs[4], race, pop, total_pop)
plot_ei(tx_sor_guy)
er(np.array(state_dfs[4][pop]), np.array(state_dfs[4][race]), np.array(state_dfs[4][total_pop]).astype(int))

# (Cabo Verdean, SOR_0)
pop = 'cabo_verdean_ACS_pct'
race = 'sor_ACS_pct'
total_pop = 'totpop_ACS'
tx_sor_cabo = two_by_two(state_dfs[4], race, pop, total_pop)
plot_ei(tx_sor_cabo)
er(np.array(state_dfs[4][pop]), np.array(state_dfs[4][race]), np.array(state_dfs[4][total_pop]).astype(int))

# (Belizean, SOR_0)
pop = 'belizean_ACS_pct'
race = 'sor_ACS_pct'
total_pop = 'totpop_ACS'
tx_sor_bel = two_by_two(state_dfs[4], race, pop, total_pop)
plot_ei(tx_sor_bel)
er(np.array(state_dfs[4][pop]), np.array(state_dfs[4][race]), np.array(state_dfs[4][total_pop]).astype(int))

# (Brazilian, H)
pop = 'brazilian_ACS_pct'
race = 'hispanic_PL_pct'
total_pop = 'totpop_PL'
tx_hispanic_brazil = two_by_two(state_dfs[4], race, pop, total_pop)
plot_ei(tx_hispanic_brazil)
er(np.array(state_dfs[4][pop]), np.array(state_dfs[4][race]), np.array(state_dfs[4][total_pop]).astype(int))

# (Guyanese, H)
pop = 'guyanese_ACS_pct'
race = 'hispanic_PL_pct'
total_pop = 'totpop_PL'
tx_hispanic_guy = two_by_two(state_dfs[4], race, pop, total_pop)
plot_ei(tx_hispanic_guy)
er(np.array(state_dfs[4][pop]), np.array(state_dfs[4][race]), np.array(state_dfs[4][total_pop]).astype(int))

# (Cabo Verde, B_0)
pop = 'cabo_verdean_ACS_pct'
race = 'black_PL_pct'
total_pop = 'totpop_PL'
tx_black_cabo = two_by_two(state_dfs[4], race, pop, total_pop)
plot_ei(tx_black_cabo)
er(np.array(state_dfs[4][pop]), np.array(state_dfs[4][race]), np.array(state_dfs[4][total_pop]).astype(int))

# (Belizean, H)
pop = 'belizean_ACS_pct'
race = 'hispanic_PL_pct'
total_pop = 'totpop_PL'
tx_hispanic_belize = two_by_two(state_dfs[4], race, pop, total_pop)
plot_ei(tx_hispanic_belize)
er(np.array(state_dfs[4][pop]), np.array(state_dfs[4][race]), np.array(state_dfs[4][total_pop]).astype(int))

# (Mena_WB, SOR_0)
pop = 'mena_world_bank_ACS_pct'
race = 'sor_ACS_pct'
total_pop = 'totpop_ACS'
tx_sor_world_bank = two_by_two(state_dfs[4], race, pop, total_pop)
plot_ei(tx_sor_world_bank)
er(np.array(state_dfs[4][pop]), np.array(state_dfs[4][race]), np.array(state_dfs[4][total_pop]).astype(int))

# (Mena_UNHC, SOR_0)
pop = 'mena_unhcr_ACS_pct'
race = 'sor_ACS_pct'
total_pop = 'totpop_ACS'
tx_sor_unhcr = two_by_two(state_dfs[4], race, pop, total_pop)
plot_ei(tx_sor_unhcr)
er(np.array(state_dfs[4][pop]), np.array(state_dfs[4][race]), np.array(state_dfs[4][total_pop]).astype(int))

# (Mena_UNSC, SOR_0)
pop = 'mena_unsd_ACS_pct'
race = 'sor_ACS_pct'
total_pop = 'totpop_ACS'
tx_sor_unsd = two_by_two(state_dfs[4], race, pop, total_pop)
plot_ei(tx_sor_unsd)
er(np.array(state_dfs[4][pop]), np.array(state_dfs[4][race]), np.array(state_dfs[4][total_pop]).astype(int))

# (Mena_WB, W_0)
pop = 'mena_world_bank_ACS_pct'
race = 'white_PL_pct'
total_pop = 'totpop_PL'
tx_w_world_bank = two_by_two(state_dfs[4], race, pop, total_pop)
plot_ei(tx_w_world_bank)
er(np.array(state_dfs[4][pop]), np.array(state_dfs[4][race]), np.array(state_dfs[4][total_pop]).astype(int))

# (Mena_UNHC, W_0)
pop = 'mena_unhcr_ACS_pct'
race = 'white_PL_pct'
total_pop = 'totpop_PL'
tx_w_unhcr = two_by_two(state_dfs[4], race, pop, total_pop)
plot_ei(tx_w_unhcr)
er(np.array(state_dfs[4][pop]), np.array(state_dfs[4][race]), np.array(state_dfs[4][total_pop]).astype(int))

# (Mena_UNSC, W_0)
pop = 'mena_unsd_ACS_pct'
race = 'white_PL_pct'
total_pop = 'totpop_PL'
tx_w_unsd = two_by_two(state_dfs[4], race, pop, total_pop)
plot_ei(tx_w_unsd)
er(np.array(state_dfs[4][pop]), np.array(state_dfs[4][race]), np.array(state_dfs[4][total_pop]).astype(int))

## Chloe's Work

In [79]:
sum(state_dfs[0]["hispanic_PL_pct"].isna())

32

In [85]:
state_dfs[0].fillna(0, inplace = True)

In [88]:
candidate_name_2by2 = "SORO"
demographic_group_name_2by2 = "Hispanic"
precinct_names = state_dfs[0]['tract'].astype(str)
group_fraction_2by2 = np.array(state_dfs[0]["hispanic_PL_pct"]) # Change this AND candidate name below
votes_fraction_2by2 = np.array(state_dfs[0]["sor_PL_pct"]) #Change this AND group name below
precinct_pops = np.array(state_dfs[0]["totpop_PL"]).astype(int)
ei_2by2 = TwoByTwoEI(model_name="king99_pareto_modification", pareto_scale=15, pareto_shape=2)
ei_2by2.fit(group_fraction_2by2,
      votes_fraction_2by2,
      precinct_pops,
      demographic_group_name=demographic_group_name_2by2,
      candidate_name=candidate_name_2by2,
      precinct_names=precinct_names, # omit this line if you don't have or don't want to use precinct names
      chains=4
)
# Generate a simple report to summarize the results
print(ei_2by2.summary())



  0%|          | 0/2500 [00:00<?, ?it/s]

  0%|          | 0/2500 [00:00<?, ?it/s]

  0%|          | 0/2500 [00:00<?, ?it/s]

  0%|          | 0/2500 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [90]:
print(ei_2by2.summary())

TypeError: unsupported format string passed to NoneType.__format__

In [71]:
state

array([0.02633311, 0.02048976, 0.03470203, ..., 0.03735705, 0.02698463,
       0.00796813])

In [None]:
candidate_name_2by2 = "SOR0"
demographic_group_name_2by2 = "Hispanic"
precinct_names = e_df['counties']
group_fraction_2by2 = np.array(e_df["h_pct"]) # Change this AND candidate name below
votes_fraction_2by2 = np.array(e_df["sor_pct"]) #Change this AND group name below
precinct_pops = np.array(e_df["total"]).astype(int)
ei_2by2 = TwoByTwoEI(model_name="king99_pareto_modification", pareto_scale=15, pareto_shape=2)
ei_2by2.fit(group_fraction_2by2,
      votes_fraction_2by2,
      precinct_pops,
      demographic_group_name=demographic_group_name_2by2,
      candidate_name=candidate_name_2by2,
      precinct_names=precinct_names, # omit this line if you don't have or don't want to use precinct names
      chains=4
)
# Generate a simple report to summarize the results
print(ei_2by2.summary())

goodmans_er = GoodmansER(is_weighted_regression="True")

goodmans_er.fit(group_fraction_2by2,
    votes_fraction_2by2,
    precinct_pops, # Must include populations if weighting by population
    demographic_group_name=demographic_group_name_2by2,
    candidate_name=candidate_name_2by2
)

print(goodmans_er.summary())
goodmans_er.plot()