In [1]:
import pandas as pd

from ddeserts.annotate import add_all_stat_columns
from ddeserts.annotate import add_geo_columns
from ddeserts.annotate import with_columns_sorted
from ddeserts.load import load_cvap_data
from ddeserts.load import load_charter_cities

In [2]:
# load the data
pre_filter = lambda line: ('California' in line and 'CDP' not in line)

df = load_cvap_data('Place', pre_filter=pre_filter)
len(df)

485

In [3]:
# add geo columns and post-filter
add_geo_columns(df)
df = df[(df['state'] == 'California') & df['geotype'].isin(['city', 'town'])]
len(df)

482

In [4]:
# add charter city data
charter_cities = load_charter_cities()
df['hascharter'] = df['name'].isin(charter_cities)

In [5]:
# add stat columns and sort
add_all_stat_columns(df)
df = with_columns_sorted(df)
df

Unnamed: 0_level_0,Unnamed: 1_level_0,geoid,geoname,geotype,hascharter,name,state,adu_est,adu_moe,cit_est,cit_moe,...,prop_his_adu_dvap_est,prop_his_adu_dvap_moe,prop_ind_adu_dvap_est,prop_ind_adu_dvap_moe,prop_oth_adu_dvap_est,prop_oth_adu_dvap_moe,prop_pac_adu_dvap_est,prop_pac_adu_dvap_moe,prop_wht_adu_dvap_est,prop_wht_adu_dvap_moe
table,line,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
Place,25130,16000US0600296,"Adelanto city, California",city,True,Adelanto,California,21995,653,29610,532,...,0.265078,0.042080,0.000000,0.000000,0.000000,1.316976,0.166667,0.755453,0.014876,0.028688
Place,25156,16000US0600394,"Agoura Hills city, California",city,False,Agoura Hills,California,16140,393,19320,468,...,0.243333,0.118992,0.000000,23.000000,0.030769,0.733521,0.000000,23.000000,0.034962,0.009789
Place,25221,16000US0600562,"Alameda city, California",city,True,Alameda,California,62565,524,72155,810,...,0.152388,0.041287,0.000000,0.000000,0.055666,0.221985,0.125000,0.225924,0.046754,0.010674
Place,25247,16000US0600674,"Albany city, California",city,True,Albany,California,14665,241,16290,629,...,0.281013,0.093256,0.000000,0.000000,0.045045,0.711480,0.000000,0.000000,0.063328,0.031071
Place,25286,16000US0600884,"Alhambra city, California",city,True,Alhambra,California,70315,496,69830,975,...,0.149142,0.019888,0.040816,0.133889,0.038217,1.091934,0.192308,0.498896,0.034757,0.020061
Place,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Place,44786,16000US0686930,"Yountville city, California",city,False,Yountville,California,2760,81,2940,41,...,0.041667,0.099185,0.000000,0.000000,0.000000,0.400780,0.000000,12.000000,0.016355,0.016581
Place,44799,16000US0686944,"Yreka city, California",city,False,Yreka,California,5660,180,7500,107,...,0.067308,0.100572,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.005721,0.004546
Place,44812,16000US0686972,"Yuba City city, California",city,False,Yuba City,California,49455,404,58480,771,...,0.344544,0.037790,0.000000,0.000000,0.064789,0.443142,0.108696,0.141410,0.008301,0.008530
Place,44825,16000US0687042,"Yucaipa city, California",city,False,Yucaipa,California,40175,646,50875,470,...,0.142425,0.042594,0.062500,0.282738,0.000000,0.461417,0.000000,0.000000,0.011317,0.006976


In [6]:
df.to_csv('output/ca-cities.csv')

In [7]:
dg = df[['name', 'hascharter', 'racial_disp_score_est', 'racial_disp_score_moe', 'racial_disp_his_est', 'racial_disp_his_moe']]
dg[dg['hascharter']].sort_values(by=['racial_disp_his_est'])[:30]

Unnamed: 0_level_0,Unnamed: 1_level_0,name,hascharter,racial_disp_score_est,racial_disp_score_moe,racial_disp_his_est,racial_disp_his_moe
table,line,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Place,33788,King City,True,18.52508,9.712993,-0.185251,0.094056
Place,40860,San Rafael,True,14.031275,2.603788,-0.134411,0.021228
Place,40340,Salinas,True,13.415319,1.842972,-0.134153,0.018401
Place,40990,Santa Maria,True,13.411184,2.381096,-0.134112,0.023734
Place,40522,Sand City,True,11.536708,28.023979,-0.115367,0.208319
Place,39742,Richmond,True,11.34128,2.839152,-0.113413,0.021351
Place,28900,Compton,True,11.282848,2.230144,-0.11262,0.021225
Place,34984,Lindsay,True,10.212312,7.410926,-0.102123,0.07209
Place,43928,Watsonville,True,10.163313,3.198847,-0.101633,0.030838
Place,25546,Anaheim,True,9.878711,1.293892,-0.098787,0.012884


In [8]:
dg = df[['name', 'hascharter', 'racial_disp_score_est', 'racial_disp_score_moe', 'racial_disp_asn_est', 'racial_disp_asn_moe']]
dg[dg['hascharter']].sort_values(by=['racial_disp_asn_est'])[:30]

Unnamed: 0_level_0,Unnamed: 1_level_0,name,hascharter,racial_disp_score_est,racial_disp_score_moe,racial_disp_asn_est,racial_disp_asn_moe
table,line,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Place,42407,Sunnyvale,True,13.343859,2.457469,-0.127184,0.018876
Place,40925,Santa Clara,True,11.70732,2.012832,-0.115677,0.017798
Place,25247,Albany,True,9.845989,5.41262,-0.086147,0.037046
Place,33346,Irvine,True,8.173348,1.579033,-0.081733,0.015413
Place,40873,San Ramon,True,8.12854,2.275047,-0.081285,0.02074
Place,37077,Mountain View,True,12.344362,2.776812,-0.075435,0.020862
Place,43005,Torrance,True,6.287712,1.810476,-0.062877,0.013989
Place,25702,Arcadia,True,5.90608,3.472709,-0.059061,0.032495
Place,29407,Cypress,True,6.013816,3.778045,-0.053302,0.025908
Place,26443,Berkeley,True,5.553265,1.701762,-0.051543,0.013024


In [9]:
dg = df[['name', 'hascharter', 'racial_disp_score_est', 'racial_disp_score_moe', 'racial_disp_wht_est', 'racial_disp_wht_moe']]
dg[dg['hascharter']].sort_values(by=['racial_disp_wht_est'], ascending=False)[:30]

Unnamed: 0_level_0,Unnamed: 1_level_0,name,hascharter,racial_disp_score_est,racial_disp_score_moe,racial_disp_wht_est,racial_disp_wht_moe
table,line,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Place,33788,King City,True,18.52508,9.712993,0.153601,0.06949
Place,40860,San Rafael,True,14.031275,2.603788,0.131725,0.022289
Place,42407,Sunnyvale,True,13.343859,2.457469,0.116974,0.019006
Place,40990,Santa Maria,True,13.411184,2.381096,0.110709,0.022493
Place,37077,Mountain View,True,12.344362,2.776812,0.110376,0.023463
Place,40340,Salinas,True,13.415319,1.842972,0.098253,0.015408
Place,39651,Redwood City,True,10.739218,2.54518,0.097394,0.023441
Place,40925,Santa Clara,True,11.70732,2.012832,0.09333,0.01934
Place,37233,Napa,True,9.765609,1.841505,0.093065,0.013965
Place,40522,Sand City,True,11.536708,28.023979,0.089831,0.230172


In [10]:
dg = df[df['name'] == 'King City']
dg[['prop_adu_his_est', 'prop_adu_his_moe', 'prop_cvap_his_est', 'prop_cvap_his_moe', 
    'adu_est', 'cvap_est', 'his_adu_est', 'his_adu_moe', 'his_cvap_est', 'his_cvap_moe']]

Unnamed: 0_level_0,Unnamed: 1_level_0,prop_adu_his_est,prop_adu_his_moe,prop_cvap_his_est,prop_cvap_his_moe,adu_est,cvap_est,his_adu_est,his_adu_moe,his_cvap_est,his_cvap_moe
table,line,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Place,33788,0.852353,0.028391,0.667102,0.089668,9245,3830,7880,469,2555,461
