In [1]:
from ddeserts.annotate import multiply_moes
from ddeserts.annotate import prop_ests
from ddeserts.annotate import prop_moes
from ddeserts.annotate import sum_moes
from ddeserts.census import load_age_sex_cit_data
from ddeserts.tsp import load_felon_disf_data
from ddeserts.tsp import PROP_PRISON_NON_CIT

In [2]:
pop_df = load_age_sex_cit_data()
felon_df = load_felon_disf_data()

In [3]:
adu_est = pop_df['adu_est'].sum()
cvap_est = pop_df['cvap_est'].sum()

prop_adu_cit = cvap_est / adu_est
prop_adu_non_cit = 1 - prop_adu_cit
prop_adu_non_cit


0.07498392290080158

In [4]:
prop_prison_cit = 1 - PROP_PRISON_NON_CIT

# rate of (adult) citizens being in prison, relative to their proportion
# of the adult population
cit_prison_rate = prop_prison_cit / prop_adu_cit
# same, for non-citizens
non_cit_prison_rate = PROP_PRISON_NON_CIT / prop_adu_non_cit

# estimated rate at which adult citizens are in prison,
# relative to adult non-citizens
cit_prison_ratio = cit_prison_rate / non_cit_prison_rate
cit_prison_ratio

1.5732701884443223

In [5]:
df = pop_df.merge(felon_df, on=['geoname', 'geotype'])

In [6]:
df['prop_adu_cit_est'] = prop_ests(df, 'cvap', 'adu')
df['prop_adu_cit_moe'] = prop_moes(df, 'cvap', 'adu')

# "nc" means non-citizen
# "ncvap" means non-citizen adult (non-cit. voting age pop.)
df['ncvap_est'] = (df['adu_est'] - df['cvap_est']).clip(0)
df['ncvap_moe'] = sum_moes(df, 'adu', 'cvap')
df['prop_adu_nc_est'] = prop_ests(df, 'ncvap', 'adu')
df['prop_adu_nc_moe'] = prop_moes(df, 'ncvap', 'adu')


In [7]:
df['prop_adu_felon_disf_est'] = prop_ests(df, 'felon_disf', 'adu')

df['felon_disf_moe'] = 0  # no MoE provided, supposedly exact
df['prop_adu_felon_disf_moe'] = prop_moes(df, 'felon_disf', 'adu')

In [8]:
def make_prop_ncvap_felon_disf_est(r):
    p_adu_cit = r['prop_adu_cit_est']
    p_adu_nc = r['prop_adu_nc_est']
    
    p_adu_felon = r['prop_adu_felon_disf_est']
    
    return p_adu_felon / (p_adu_nc + p_adu_cit * cit_prison_ratio)

df['prop_ncvap_felon_disf_est'] = df.apply(make_prop_ncvap_felon_disf_est, axis=1)
# prop_ncvap_felon_disf_est is prop_adu_felon_disf_est divided by a number somewhat
# bigger than 1, so just use its MoE as an upper bound
df['prop_ncvap_felon_disf_moe'] = df['prop_adu_felon_disf_moe']

df['prop_adu_double_disf_est'] = df['prop_adu_nc_est'] * df['prop_ncvap_felon_disf_est']
df['prop_adu_double_disf_moe'] = multiply_moes(df, 'prop_adu_nc', 'prop_ncvap_felon_disf')

df[['prop_adu_nc_est', 'prop_adu_nc_moe', 'prop_ncvap_felon_disf_est', 'prop_ncvap_felon_disf_moe', 'prop_adu_double_disf_moe']]


Unnamed: 0,prop_adu_nc_est,prop_adu_nc_moe,prop_ncvap_felon_disf_est,prop_ncvap_felon_disf_moe,prop_adu_double_disf_moe
0,0.025428,0.002502,0.051587,8.573907e-05,0.000129
1,0.03563,0.009778,0.007564,4.253134e-05,7.4e-05
2,0.077797,0.003478,0.029088,1.750388e-05,0.000101
3,0.037578,0.003592,0.022395,5.047442e-05,8e-05
4,0.145803,0.001909,0.00214,5.670932e-07,4e-06
5,0.056057,0.003478,0.002447,2.607732e-06,9e-06
6,0.078519,0.00534,0.001557,1.87391e-06,8e-06
7,0.049891,0.007816,0.00617,4.752298e-06,4.8e-05
8,0.100726,0.00242,0.042309,1.645329e-05,0.000102
9,0.062068,0.00284,0.018132,2.096324e-05,5.2e-05


In [9]:
df['prop_adu_disf_est'] = df['prop_adu_nc_est'] + df['prop_adu_felon_disf_est'] - df['prop_adu_double_disf_est']
df['prop_adu_disf_moe'] = sum_moes(df, 'prop_adu_nc', 'prop_adu_felon_disf', 'prop_adu_double_disf')

In [18]:
df.sort_values('prop_adu_disf_est')[['geoname', 'prop_adu_nc_est', 'prop_adu_nc_moe', 'prop_adu_felon_disf_est', 'prop_adu_felon_disf_moe', 'prop_adu_disf_est', 'prop_adu_disf_moe', 'prop_adu_double_disf_est', 'prop_adu_double_disf_moe']]

Unnamed: 0,geoname,prop_adu_nc_est,prop_adu_nc_moe,prop_adu_felon_disf_est,prop_adu_felon_disf_moe,prop_adu_disf_est,prop_adu_disf_moe,prop_adu_double_disf_est,prop_adu_double_disf_moe
44,Vermont,0.015992,0.00712,0.0,0.0,0.015992,0.00712,0.0,0.0
18,Maine,0.017666,0.004348,0.0,0.0,0.017666,0.004348,0.0,0.0
25,Montana,0.013163,0.005166,0.004752,1.316477e-05,0.017875,0.005166,4e-05,1.6e-05
47,West Virginia,0.009014,0.003463,0.009985,1.896573e-05,0.018942,0.003463,5.7e-05,2.2e-05
28,New Hampshire,0.026283,0.00551,0.002207,4.943081e-06,0.028453,0.00551,3.7e-05,8e-06
34,Ohio,0.025608,0.001596,0.005111,2.880009e-06,0.030635,0.001596,8.4e-05,5e-06
33,North Dakota,0.031585,0.008415,0.002594,1.164136e-05,0.034126,0.008415,5.3e-05,1.4e-05
40,South Dakota,0.018454,0.007561,0.019488,8.499282e-05,0.037712,0.007562,0.00023,9.4e-05
21,Michigan,0.03364,0.002198,0.00445,2.335173e-06,0.037994,0.002198,9.6e-05,6e-06
24,Missouri,0.022677,0.002346,0.017192,1.662008e-05,0.039619,0.002346,0.00025,2.6e-05


In [19]:
df.sort_values('prop_adu_disf_est')[['geoname', 'prop_adu_disf_est', 'prop_adu_disf_moe']]

Unnamed: 0,geoname,prop_adu_disf_est,prop_adu_disf_moe
44,Vermont,0.015992,0.00712
18,Maine,0.017666,0.004348
25,Montana,0.017875,0.005166
47,West Virginia,0.018942,0.003463
28,New Hampshire,0.028453,0.00551
34,Ohio,0.030635,0.001596
33,North Dakota,0.034126,0.008415
40,South Dakota,0.037712,0.007562
21,Michigan,0.037994,0.002198
24,Missouri,0.039619,0.002346


In [11]:
df['ncvap_felon_disf_est'] = df['ncvap_est'] * df['prop_ncvap_felon_disf_est']
df['ncvap_felon_disf_moe'] = multiply_moes(df, 'ncvap', 'prop_ncvap_felon_disf')

In [17]:
df['adu_disf_est'] = (df['ncvap_est'] + df['felon_disf_est'] - df['ncvap_felon_disf_est']).astype('int')
df['adu_disf_moe'] = sum_moes(df, 'ncvap', 'felon_disf', 'ncvap_felon_disf').astype('int')

df[['geoname', 'ncvap_est', 'ncvap_moe', 'felon_disf_est', 'felon_disf_moe', 'adu_disf_est', 'adu_disf_moe']]

Unnamed: 0,geoname,ncvap_est,ncvap_moe,felon_disf_est,felon_disf_moe,adu_disf_est,adu_disf_moe
0,Alabama,100778,9917,318681,0,414260,9930
1,Alaska,19876,5454,6552,0,26277,5454
2,Arizona,449013,20073,256636,0,692588,20081
3,Arkansas,88302,8439,81658,0,167982,8441
4,California,4451414,58278,97328,0,4539216,58278
5,Colorado,259479,16096,17455,0,276299,16096
6,Connecticut,227374,15463,6892,0,233911,15463
7,Delaware,40420,6332,7721,0,47891,6332
8,Florida,1808029,43428,1150944,0,2882477,43466
9,Georgia,521819,23873,234410,0,746767,23876
