In [1]:
import pandas as pd

from ddeserts.annotate import add_all_stat_columns
from ddeserts.annotate import add_geo_columns
from ddeserts.annotate import with_columns_sorted
from ddeserts.load import load_cvap_data
from ddeserts.load import load_charter_cities

In [2]:
# load the data
pre_filter = lambda line: ('California' in line and 'CDP' not in line)

df = load_cvap_data('Place', pre_filter=pre_filter)
len(df)

485

In [3]:
# add geo columns and post-filter
add_geo_columns(df)
df = df[(df['state'] == 'California') & df['geotype'].isin(['city', 'town'])]
len(df)

482

In [4]:
# add charter city data
charter_cities = load_charter_cities()
df['hascharter'] = df['name'].isin(charter_cities)

In [5]:
# add stat columns and sort
add_all_stat_columns(df)
df = with_columns_sorted(df)
df

Unnamed: 0_level_0,Unnamed: 1_level_0,geoid,geoname,geotype,hascharter,name,state,adu_est,adu_moe,cit_est,cit_moe,...,p_tot_ind_est,p_tot_ind_moe,p_tot_oth_est,p_tot_oth_moe,p_tot_pac_est,p_tot_pac_moe,p_tot_wht_est,p_tot_wht_moe,p_wht_dis_est,p_wht_dis_moe
table,line,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
Place,25130,16000US0600296,"Adelanto city, California",city,True,Adelanto,California,21995,653,29610,532,...,0.000297,0.000446,0.025698,0.047564,0.001485,0.003986,0.120915,0.025515,0.014876,0.284388
Place,25156,16000US0600394,"Agoura Hills city, California",city,False,Agoura Hills,California,16140,393,19320,468,...,0.000000,0.001123,0.035306,0.048639,0.000000,0.001123,0.747748,0.032781,0.034962,0.071360
Place,25221,16000US0600562,"Alameda city, California",city,True,Alameda,California,62565,524,72155,810,...,0.002611,0.001251,0.061895,0.026146,0.003439,0.001736,0.430846,0.015992,0.046754,0.067904
Place,25247,16000US0600674,"Albany city, California",city,True,Albany,California,14665,241,16290,629,...,0.006312,0.003347,0.060843,0.049723,0.004292,0.003091,0.444332,0.029397,0.063328,0.119673
Place,25286,16000US0600884,"Alhambra city, California",city,True,Alhambra,California,70315,496,69830,975,...,0.002894,0.001400,0.015004,0.022648,0.002481,0.002760,0.088605,0.006858,0.034757,0.141863
Place,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Place,44786,16000US0686930,"Yountville city, California",city,False,Yountville,California,2760,81,2940,41,...,0.003350,0.004744,0.031826,0.103451,0.000000,0.004047,0.737018,0.079170,0.016355,0.167563
Place,44799,16000US0686944,"Yreka city, California",city,False,Yreka,California,5660,180,7500,107,...,0.046958,0.012007,0.076720,0.060412,0.014550,0.009651,0.712302,0.047025,0.005721,0.081617
Place,44812,16000US0686972,"Yuba City city, California",city,False,Yuba City,California,49455,404,58480,771,...,0.006390,0.002082,0.049312,0.019401,0.004435,0.001147,0.425318,0.011480,0.008301,0.050949
Place,44825,16000US0687042,"Yucaipa city, California",city,False,Yucaipa,California,40175,646,50875,470,...,0.004025,0.003771,0.017504,0.037777,0.000936,0.001125,0.588880,0.026150,0.011317,0.088829


In [6]:
df.to_csv('output/ca-cities.csv')

In [19]:
dg = df[['name', 'hascharter', 'underrep_score_est', 'underrep_score_moe', 'underrep_his_est', 'underrep_his_moe']]
dg[dg['hascharter']].sort_values(by=['underrep_his_est'], ascending=False)[:30]

Unnamed: 0_level_0,Unnamed: 1_level_0,name,hascharter,underrep_score_est,underrep_score_moe,underrep_his_est,underrep_his_moe
table,line,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Place,33788,King City,True,18.52508,26.810939,0.185251,0.265943
Place,40860,San Rafael,True,14.031275,3.083649,0.134411,0.025662
Place,40340,Salinas,True,13.415319,4.045495,0.134153,0.040438
Place,40990,Santa Maria,True,13.411184,4.910961,0.134112,0.049025
Place,40522,Sand City,True,11.536708,63.06369,0.115367,0.413185
Place,39742,Richmond,True,11.34128,3.872577,0.113413,0.030502
Place,28900,Compton,True,11.282848,4.246812,0.11262,0.041824
Place,34984,Lindsay,True,10.212312,20.549318,0.102123,0.204259
Place,43928,Watsonville,True,10.163313,7.549338,0.101633,0.074798
Place,25546,Anaheim,True,9.878711,2.10209,0.098787,0.020744


In [20]:
dg = df[['name', 'hascharter', 'underrep_score_est', 'underrep_score_moe', 'underrep_asn_est', 'underrep_asn_moe']]
dg[dg['hascharter']].sort_values(by=['underrep_asn_est'], ascending=False)[:30]

Unnamed: 0_level_0,Unnamed: 1_level_0,name,hascharter,underrep_score_est,underrep_score_moe,underrep_asn_est,underrep_asn_moe
table,line,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Place,42407,Sunnyvale,True,13.343859,3.451546,0.127184,0.028594
Place,40925,Santa Clara,True,11.70732,3.006339,0.115677,0.027061
Place,25247,Albany,True,9.845989,6.920116,0.086147,0.050896
Place,33346,Irvine,True,8.173348,2.284577,0.081733,0.022533
Place,40873,San Ramon,True,8.12854,3.314207,0.081285,0.031232
Place,37077,Mountain View,True,12.344362,3.699756,0.075435,0.029559
Place,43005,Torrance,True,6.287712,2.483337,0.062877,0.020122
Place,25702,Arcadia,True,5.90608,6.004182,0.059061,0.057825
Place,29407,Cypress,True,6.013816,4.721332,0.053302,0.034225
Place,26443,Berkeley,True,5.553265,2.002171,0.051543,0.015643
