In [1]:
from itertools import chain

import pandas as pd

from ddeserts.annotate import RACES
from ddeserts.annotate import add_all_stat_columns
from ddeserts.annotate import add_geo_columns
from ddeserts.annotate import with_columns_sorted
from ddeserts.load import load_cvap_data
from ddeserts.load import load_charter_cities

In [2]:
# load the data
pre_filter = lambda line: ('California' in line and 'CDP' not in line)

df = load_cvap_data('Place', pre_filter=pre_filter)
len(df)

485

In [3]:
# add geo columns and post-filter
add_geo_columns(df)
df = df[(df['state'] == 'California') & df['geotype'].isin(['city', 'town'])]
len(df)

482

In [4]:
# add charter city data
charter_cities = load_charter_cities()
df['geosubtype'] = 'general law'
df.loc[df['name'].isin(charter_cities), 'geosubtype'] = 'charter'

In [5]:
# add stat columns and sort
add_all_stat_columns(df)
df = with_columns_sorted(df)

In [6]:
# sort by common name of city (Ventura, not San Buenaventura)
df.sort_values(by=['name'], inplace=True)

In [7]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,geoid,geoname,geosubtype,geotype,name,state,adu_est,adu_moe,cit_est,cit_moe,...,prop_his_adu_dvap_est,prop_his_adu_dvap_moe,prop_ind_adu_dvap_est,prop_ind_adu_dvap_moe,prop_pac_adu_dvap_est,prop_pac_adu_dvap_moe,prop_tmr_adu_dvap_est,prop_tmr_adu_dvap_moe,prop_wht_adu_dvap_est,prop_wht_adu_dvap_moe
table,line,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
Place,26560,1600000US0600296,"Adelanto city, California",charter,city,Adelanto,California,24830,882,33815,572,...,0.201182,0.074798,0.0,1.000000,0.357143,0.642857,0.000000,0.875587,0.010753,0.208508
Place,26586,1600000US0600394,"Agoura Hills city, California",general law,city,Agoura Hills,California,15880,602,19480,323,...,0.071240,0.366285,,,,,0.017857,0.538092,0.024161,0.100157
Place,26651,1600000US0600562,"Alameda city, California",charter,city,Alameda,California,61780,691,72305,727,...,0.143922,0.122714,0.0,0.572756,0.032258,0.695795,0.061776,0.244204,0.036965,0.056467
Place,26677,1600000US0600674,"Albany city, California",charter,city,Albany,California,14790,329,17045,405,...,0.194888,0.203390,0.0,1.000000,0.142857,0.857143,0.023121,0.387839,0.051481,0.093901
Place,26716,1600000US0600884,"Alhambra city, California",charter,city,Alhambra,California,68630,717,70150,1081,...,0.124564,0.056432,0.0,0.896672,0.195652,0.804348,0.038674,0.415265,0.032394,0.116770
Place,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Place,47386,1600000US0686930,"Yountville city, California",general law,city,Yountville,California,3065,72,3275,75,...,0.047619,0.569416,0.0,1.000000,,,0.000000,0.987705,0.006726,0.187086
Place,47399,1600000US0686944,"Yreka city, California",general law,city,Yreka,California,5705,304,7750,56,...,0.059880,0.322946,0.0,0.557604,0.000000,0.700372,0.000000,0.599440,0.001195,0.111618
Place,47412,1600000US0686972,"Yuba City city, California",general law,city,Yuba City,California,51885,473,61875,793,...,0.303074,0.062663,0.0,0.325137,0.151515,0.599458,0.183288,0.274789,0.006322,0.037348
Place,47425,1600000US0687042,"Yucaipa city, California",general law,city,Yucaipa,California,40510,954,51940,474,...,0.131088,0.108338,0.0,1.000000,0.000000,1.000000,0.000000,0.648414,0.008787,0.081268


In [8]:
df.to_csv('output/ca-cities.csv')

In [9]:
# general disenfranchisement data
cols = (
    ['name', 'geosubtype'] +
    list(chain(*(
        [f'{pop}_est', f'{pop}_moe'] 
        for pop in ('tot', 'adu', 'dvap', 'prop_adu_dvap')
    ))) +
    list(chain(*(
        [f'{r}_dvap_est', f'{r}_dvap_moe', 
         f'prop_{r}_adu_dvap_est', f'prop_{r}_adu_dvap_moe']
        for r in ('wht', 'his', 'asn', 'blk', 'ind', 'pac', 'tmr')
    )))
)
dg = df[cols]
dg.to_csv('output/ca-cities-dvap.csv')

In [10]:
# racial disparity data
cols = (
    ['name', 'geosubtype'] + 
    list(chain(*(
        [f'{pop}_est', f'{pop}_moe'] for pop in ('tot', 'adu', 'cvap')
    ))) +
    list(chain(*(
        [f'prop_adu_{r}_est', f'prop_cvap_{r}_est', 
         f'racial_disp_{r}_est', f'racial_disp_{r}_moe']
        for r in ('wht', 'his', 'asn', 'blk', 'ind', 'pac', 'tmr')
    )))
)
dg = df[cols]
dg.to_csv('output/ca-cities-racial-disp.csv')

In [11]:
dg = df[['name', 'geosubtype', 'racial_disp_his_est', 'racial_disp_his_moe']]
dg[dg['geosubtype'] == 'charter'].sort_values(by=['racial_disp_his_est'])[:30]

Unnamed: 0_level_0,Unnamed: 1_level_0,name,geosubtype,racial_disp_his_est,racial_disp_his_moe
table,line,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Place,42706,Salinas,charter,-0.118589,0.026648
Place,43239,San Rafael,charter,-0.11301,0.023344
Place,43369,Santa Maria,charter,-0.110375,0.03651
Place,42901,Sand City,charter,-0.108136,0.225121
Place,35738,King City,charter,-0.100792,0.192112
Place,35166,Inglewood,charter,-0.097765,0.030867
Place,42056,Richmond,charter,-0.093663,0.02887
Place,30499,Compton,charter,-0.090163,0.037342
Place,46424,Watsonville,charter,-0.085065,0.061124
Place,27002,Anaheim,charter,-0.083366,0.01725


In [12]:
dg = df[['name', 'geosubtype', 'racial_disp_asn_est', 'racial_disp_asn_moe']]
dg[dg['geosubtype'] == 'charter'].sort_values(by=['racial_disp_asn_est'])[:30]

Unnamed: 0_level_0,Unnamed: 1_level_0,name,geosubtype,racial_disp_asn_est,racial_disp_asn_moe
table,line,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Place,44825,Sunnyvale,charter,-0.14177,0.026585
Place,43304,Santa Clara,charter,-0.135994,0.021735
Place,26677,Albany,charter,-0.081785,0.04835
Place,35244,Irvine,charter,-0.076732,0.021082
Place,39183,Mountain View,charter,-0.076523,0.027209
Place,43252,San Ramon,charter,-0.06955,0.03221
Place,27158,Arcadia,charter,-0.061197,0.049032
Place,45449,Torrance,charter,-0.060456,0.021651
Place,31032,Cypress,charter,-0.050265,0.034343
Place,27938,Berkeley,charter,-0.049973,0.019378


In [13]:
dg = df[['name', 'geosubtype', 'racial_disp_wht_est', 'racial_disp_wht_moe']]
dg[dg['geosubtype'] == 'charter'].sort_values(by=['racial_disp_wht_est'], ascending=False)[:30]

Unnamed: 0_level_0,Unnamed: 1_level_0,name,geosubtype,racial_disp_wht_est,racial_disp_wht_moe
table,line,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Place,35738,King City,charter,0.120704,0.085067
Place,44825,Sunnyvale,charter,0.118399,0.022254
Place,43239,San Rafael,charter,0.114817,0.042717
Place,39183,Mountain View,charter,0.096644,0.031462
Place,43304,Santa Clara,charter,0.09578,0.024875
Place,42901,Sand City,charter,0.091525,0.315872
Place,43369,Santa Maria,charter,0.088179,0.028587
Place,43421,Santa Rosa,charter,0.083254,0.021193
Place,42706,Salinas,charter,0.08318,0.017692
Place,35153,Industry,charter,0.081019,0.340572


In [14]:
dg = df[df['name'] == 'King City']
dg[['prop_adu_his_est', 'prop_adu_his_moe', 'prop_dvap_his_est', 'prop_dvap_his_moe', 
    'prop_adu_dvap_est', 'prop_adu_dvap_moe', 'prop_his_adu_dvap_est', 'prop_his_adu_dvap_moe', 
    'adu_est', 'cvap_est', 'his_adu_est', 'his_adu_moe', 'his_cvap_est', 'his_cvap_moe']]

Unnamed: 0_level_0,Unnamed: 1_level_0,prop_adu_his_est,prop_adu_his_moe,prop_dvap_his_est,prop_dvap_his_moe,prop_adu_dvap_est,prop_adu_dvap_moe,prop_his_adu_dvap_est,prop_his_adu_dvap_moe,adu_est,cvap_est,his_adu_est,his_adu_moe,his_cvap_est,his_cvap_moe
table,line,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
Place,35738,0.830805,0.078194,0.911672,0.212565,0.554842,0.070018,0.608848,0.080067,8570,3815,7120,568,2785,525


In [15]:
dg = df[df['name'] == 'King City']
dg[['prop_adu_his_est', 'prop_cvap_his_est', 'racial_disp_his_est', 'racial_disp_his_moe']]

Unnamed: 0_level_0,Unnamed: 1_level_0,prop_adu_his_est,prop_cvap_his_est,racial_disp_his_est,racial_disp_his_moe
table,line,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Place,35738,0.830805,0.730013,-0.100792,0.192112


In [16]:
df[['name', 'prop_adu_dvap_moe']]

Unnamed: 0_level_0,Unnamed: 1_level_0,name,prop_adu_dvap_moe
table,line,Unnamed: 2_level_1,Unnamed: 3_level_1
Place,26560,Adelanto,0.046244
Place,26586,Agoura Hills,0.051033
Place,26651,Alameda,0.017271
Place,26677,Albany,0.037088
Place,26716,Alhambra,0.018596
Place,...,...,...
Place,47386,Yountville,0.038285
Place,47399,Yreka,0.075246
Place,47412,Yuba City,0.019660
Place,47425,Yucaipa,0.034674


In [17]:
len(df[df['tot_est'] >= 50000])

180