In [1]:
from itertools import chain

import pandas as pd

from ddeserts.annotate import RACES
from ddeserts.annotate import add_all_stat_columns
from ddeserts.annotate import add_geo_columns
from ddeserts.annotate import with_columns_sorted
from ddeserts.load import load_cvap_data
from ddeserts.load import load_charter_cities

In [2]:
# load the data
pre_filter = lambda line: ('California' in line and 'CDP' not in line)

df = load_cvap_data('Place', pre_filter=pre_filter)
len(df)

485

In [3]:
# add geo columns and post-filter
add_geo_columns(df)
df = df[(df['state'] == 'California') & df['geotype'].isin(['city', 'town'])]
len(df)

482

In [4]:
# add charter city data
charter_cities = load_charter_cities()
df['geosubtype'] = 'general law'
df.loc[df['name'].isin(charter_cities), 'geosubtype'] = 'charter'

In [5]:
# add stat columns and sort
add_all_stat_columns(df)
df = with_columns_sorted(df)

In [6]:
# sort by common name of city (Ventura, not San Buenaventura)
df.sort_values(by=['name'], inplace=True)

In [7]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,geoid,geoname,geosubtype,geotype,name,state,adu_est,adu_moe,cit_est,cit_moe,...,prop_his_adu_dvap_est,prop_his_adu_dvap_moe,prop_ind_adu_dvap_est,prop_ind_adu_dvap_moe,prop_pac_adu_dvap_est,prop_pac_adu_dvap_moe,prop_tmr_adu_dvap_est,prop_tmr_adu_dvap_moe,prop_wht_adu_dvap_est,prop_wht_adu_dvap_moe
table,line,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
Place,26573,1600000US0600296,"Adelanto city, California",charter,city,Adelanto,California,22605,761,31030,555,...,0.199111,0.074430,0.000000,1.000000,0.285714,0.714286,0.000000,0.644987,0.010417,0.198576
Place,26599,1600000US0600394,"Agoura Hills city, California",general law,city,Agoura Hills,California,16015,506,19225,362,...,0.163539,0.434848,,,,,0.025000,0.586514,0.029567,0.087397
Place,26664,1600000US0600562,"Alameda city, California",charter,city,Alameda,California,62765,652,73495,855,...,0.168443,0.114563,0.000000,0.573129,0.045455,0.782199,0.070866,0.250381,0.038821,0.051881
Place,26690,1600000US0600674,"Albany city, California",charter,city,Albany,California,14990,317,16765,503,...,0.271137,0.189370,0.000000,0.969746,0.000000,1.000000,0.059603,0.415508,0.054286,0.079818
Place,26729,1600000US0600884,"Alhambra city, California",charter,city,Alhambra,California,69775,654,70645,1162,...,0.136017,0.056382,0.028169,0.567457,0.111111,0.888889,0.044586,0.382115,0.037410,0.115071
Place,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Place,47399,1600000US0686930,"Yountville city, California",general law,city,Yountville,California,2755,92,2935,61,...,0.031915,0.625063,0.000000,1.000000,,,0.000000,0.870285,0.017241,0.186823
Place,47412,1600000US0686944,"Yreka city, California",general law,city,Yreka,California,5640,298,7545,97,...,0.056911,0.414314,0.000000,0.481880,0.000000,0.989949,0.000000,0.553856,0.001163,0.107812
Place,47425,1600000US0686972,"Yuba City city, California",general law,city,Yuba City,California,49710,442,59405,787,...,0.312090,0.056926,0.000000,0.277837,0.102564,0.494824,0.054945,0.313768,0.006709,0.040170
Place,47438,1600000US0687042,"Yucaipa city, California",general law,city,Yucaipa,California,41020,1119,51955,550,...,0.132381,0.104603,0.052632,0.947368,0.000000,1.000000,0.000000,0.819184,0.007308,0.089273


In [8]:
df.to_csv('output/ca-cities.csv')

In [9]:
# general disenfranchisement data
cols = (
    ['name', 'geosubtype'] +
    list(chain(*(
        [f'{pop}_est', f'{pop}_moe'] 
        for pop in ('tot', 'adu', 'dvap', 'prop_adu_dvap')
    ))) +
    list(chain(*(
        [f'{r}_dvap_est', f'{r}_dvap_moe', 
         f'prop_{r}_adu_dvap_est', f'prop_{r}_adu_dvap_moe']
        for r in ('wht', 'his', 'asn', 'blk', 'ind', 'pac', 'tmr')
    )))
)
dg = df[cols]
dg.to_csv('output/ca-cities-dvap.csv')

In [10]:
# racial disparity data
cols = (
    ['name', 'geosubtype'] + 
    list(chain(*(
        [f'{pop}_est', f'{pop}_moe'] for pop in ('tot', 'adu', 'cvap')
    ))) +
    list(chain(*(
        [f'prop_adu_{r}_est', f'prop_cvap_{r}_est', 
         f'racial_disp_{r}_est', f'racial_disp_{r}_moe']
        for r in ('wht', 'his', 'asn', 'blk', 'ind', 'pac', 'tmr')
    )))
)
dg = df[cols]
dg.to_csv('output/ca-cities-racial-disp.csv')

In [11]:
dg = df[['name', 'geosubtype', 'racial_disp_his_est', 'racial_disp_his_moe']]
dg[dg['geosubtype'] == 'charter'].sort_values(by=['racial_disp_his_est'])[:30]

Unnamed: 0_level_0,Unnamed: 1_level_0,name,geosubtype,racial_disp_his_est,racial_disp_his_moe
table,line,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Place,35751,King City,charter,-0.165947,0.148865
Place,42719,Salinas,charter,-0.121919,0.031558
Place,43252,San Rafael,charter,-0.121167,0.024259
Place,42914,Sand City,charter,-0.120696,0.214543
Place,43382,Santa Maria,charter,-0.117588,0.038189
Place,30512,Compton,charter,-0.103871,0.04394
Place,42069,Richmond,charter,-0.100994,0.027528
Place,35179,Inglewood,charter,-0.097968,0.02686
Place,35166,Industry,charter,-0.094581,0.520931
Place,46437,Watsonville,charter,-0.094025,0.053538


In [12]:
dg = df[['name', 'geosubtype', 'racial_disp_asn_est', 'racial_disp_asn_moe']]
dg[dg['geosubtype'] == 'charter'].sort_values(by=['racial_disp_asn_est'])[:30]

Unnamed: 0_level_0,Unnamed: 1_level_0,name,geosubtype,racial_disp_asn_est,racial_disp_asn_moe
table,line,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Place,44838,Sunnyvale,charter,-0.140617,0.025385
Place,43317,Santa Clara,charter,-0.125333,0.027658
Place,26690,Albany,charter,-0.084724,0.042739
Place,35257,Irvine,charter,-0.079677,0.023221
Place,39196,Mountain View,charter,-0.077464,0.025268
Place,43265,San Ramon,charter,-0.07681,0.033142
Place,45462,Torrance,charter,-0.065397,0.022257
Place,27171,Arcadia,charter,-0.063339,0.049204
Place,40483,Palo Alto,charter,-0.054037,0.023868
Place,31045,Cypress,charter,-0.052277,0.034843


In [13]:
dg = df[['name', 'geosubtype', 'racial_disp_wht_est', 'racial_disp_wht_moe']]
dg[dg['geosubtype'] == 'charter'].sort_values(by=['racial_disp_wht_est'], ascending=False)[:30]

Unnamed: 0_level_0,Unnamed: 1_level_0,name,geosubtype,racial_disp_wht_est,racial_disp_wht_moe
table,line,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Place,35751,King City,charter,0.137028,0.098462
Place,43252,San Rafael,charter,0.122015,0.044577
Place,44838,Sunnyvale,charter,0.118964,0.022726
Place,39196,Mountain View,charter,0.105283,0.034021
Place,42914,Sand City,charter,0.099033,0.379626
Place,43317,Santa Clara,charter,0.096673,0.027113
Place,43382,Santa Maria,charter,0.093367,0.029198
Place,41965,Redwood City,charter,0.09333,0.037619
Place,42719,Salinas,charter,0.083305,0.021363
Place,43434,Santa Rosa,charter,0.082444,0.021391


In [14]:
dg = df[df['name'] == 'King City']
dg[['prop_adu_his_est', 'prop_adu_his_moe', 'prop_dvap_his_est', 'prop_dvap_his_moe', 
    'prop_adu_dvap_est', 'prop_adu_dvap_moe', 'prop_his_adu_dvap_est', 'prop_his_adu_dvap_moe', 
    'adu_est', 'cvap_est', 'his_adu_est', 'his_adu_moe', 'his_cvap_est', 'his_cvap_moe']]

Unnamed: 0_level_0,Unnamed: 1_level_0,prop_adu_his_est,prop_adu_his_moe,prop_dvap_his_est,prop_dvap_his_moe,prop_adu_dvap_est,prop_adu_dvap_moe,prop_his_adu_dvap_est,prop_his_adu_dvap_moe,adu_est,cvap_est,his_adu_est,his_adu_moe,his_cvap_est,his_cvap_moe
table,line,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
Place,35751,0.849444,0.069953,0.98583,0.185025,0.548889,0.055279,0.637018,0.061523,9000,4060,7645,506,2775,433


In [15]:
dg = df[df['name'] == 'King City']
dg[['prop_adu_his_est', 'prop_cvap_his_est', 'racial_disp_his_est', 'racial_disp_his_moe']]

Unnamed: 0_level_0,Unnamed: 1_level_0,prop_adu_his_est,prop_cvap_his_est,racial_disp_his_est,racial_disp_his_moe
table,line,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Place,35751,0.849444,0.683498,-0.165947,0.148865


In [16]:
df[['name', 'prop_adu_dvap_moe']]

Unnamed: 0_level_0,Unnamed: 1_level_0,name,prop_adu_dvap_moe
table,line,Unnamed: 2_level_1,Unnamed: 3_level_1
Place,26573,Adelanto,0.042975
Place,26599,Agoura Hills,0.044954
Place,26664,Alameda,0.017765
Place,26690,Albany,0.037616
Place,26729,Alhambra,0.017209
Place,...,...,...
Place,47399,Yountville,0.046413
Place,47412,Yreka,0.075566
Place,47425,Yuba City,0.019877
Place,47438,Yucaipa,0.039395


In [17]:
len(df[df['tot_est'] >= 50000])

176