In [8]:
from math import ceil

import pandas as pd

from ddeserts import read_cvap_csv, moe_of_sum, moe_of_ratio

In [2]:
# read in list of charter cities
charter_cities_path = 'data/cacities/charter-cities.txt'

with open(charter_cities_path) as f:
    charter_cities = { line.strip() for line in f }

In [3]:
# load the data
places_path = 'data/census/CVAP_2015-2019_ACS_csv_files/Place.csv'

pre_filter = lambda line: 'California' in line and 'Total' in line
filter = lambda row: row['state'] == 'California' and row['geotype'] != 'cdp'

ca_city_rows = list(read_cvap_csv(places_path, pre_filter=pre_filter, filter=filter))


df = pd.DataFrame.from_records(ca_city_rows, index=['table', 'line'])
df['tot_moe'] = df['tot_moe'].astype('int')
df['adu_moe'] = df['adu_moe'].astype('int')
df['cvap_moe'] = df['cvap_moe'].astype('int')
df['cit_moe'] = df['cit_moe'].astype('int')


In [15]:
# add extra columns
df['has_charter'] = df['name'].isin(charter_cities)

df['dvap_est'] = df['adu_est'] - df['cvap_est']
df['dvap_moe'] = df.apply(lambda r: ceil(moe_of_sum(r['adu_moe'], r['cvap_moe'])), axis=1).astype('int')
df['dvap_pct'] = df['dvap_est'] / df['adu_est']
df['dvap_pct_moe'] = df.apply(lambda r: moe_of_ratio(
    r['dvap_est'], r['dvap_moe'], r['adu_est'], r['adu_moe']), axis=1).astype('float')


In [24]:
dvap = df.loc[df['has_charter']][
    ['name', 'adu_est', 'adu_moe', 'dvap_est', 'dvap_moe', 'dvap_pct', 'dvap_pct_moe']].sort_values('dvap_pct')
dvap[-30:]

Unnamed: 0_level_0,Unnamed: 1_level_0,name,adu_est,adu_moe,dvap_est,dvap_moe,dvap_pct,dvap_pct_moe
table,line,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Place,27223,Buena Park,64245,807,12855,1368,0.200093,0.02411
Place,29732,Desert Hot Springs,21440,700,4350,1173,0.202892,0.063405
Place,31903,Glendale,164850,999,33510,2044,0.203276,0.013714
Place,40860,San Rafael,45835,474,9335,938,0.203665,0.022807
Place,30499,El Centro,31050,557,6325,936,0.203704,0.034417
Place,33268,Inglewood,83810,1049,17150,1667,0.20463,0.022736
Place,40665,San Jose,797090,1566,164515,4078,0.206395,0.005532
Place,25247,Albany,14665,241,3060,590,0.20866,0.04439
Place,40795,San Mateo,82505,817,17400,1420,0.210896,0.019492
Place,32618,Hayward,125155,887,27275,1738,0.21793,0.015541


In [19]:
dvap = df.loc[df['has_charter']][
    ['name', 'adu_est', 'adu_moe', 'dvap_est', 'dvap_moe', 'dvap_pct', 'dvap_pct_moe']].sort_values('dvap_est')
dvap[-30:]

Unnamed: 0_level_0,Unnamed: 1_level_0,name,adu_est,adu_moe,dvap_est,dvap_moe,dvap_pct,dvap_pct_moe
table,line,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Place,28900,Compton,68500,942,17275,1551,0.25219,0.026474
Place,40795,San Mateo,82505,817,17400,1420,0.210896,0.019492
Place,36583,Modesto,158385,1042,17825,1798,0.112542,0.012173
Place,37077,Mountain View,65625,660,19675,1403,0.29981,0.024642
Place,38442,Pasadena,115505,864,19685,1731,0.170426,0.016384
Place,41042,Santa Rosa,140870,836,20410,1779,0.144885,0.013569
Place,39742,Richmond,85930,921,20620,1923,0.239963,0.025221
Place,40990,Santa Maria,72750,872,26085,1627,0.358557,0.026985
Place,32618,Hayward,125155,887,27275,1738,0.21793,0.015541
Place,39105,Pomona,114115,947,27900,1724,0.24449,0.01728


Unnamed: 0_level_0,Unnamed: 1_level_0,name,adu_est,adu_moe,dvap_est,dvap_moe,dvap_pct,dvap_pct_moe
table,line,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Place,31786,Garden Grove,136210,983,24430,1858,0.179355,0.015044
Place,36908,Moreno Valley,148980,1352,24600,2324,0.165123,0.017254
Place,37961,Ontario,130330,1199,25745,2336,0.197537,0.019924
Place,40990,Santa Maria,72750,872,26085,1627,0.358557,0.026985
Place,32618,Hayward,125155,887,27275,1738,0.21793,0.015541
Place,39105,Pomona,114115,947,27900,1724,0.24449,0.01728
Place,31266,Fontana,150425,1480,28040,2380,0.186405,0.017831
Place,30642,El Monte,88970,957,28440,1917,0.319658,0.025257
Place,40457,San Bernardino,152070,1556,28590,2558,0.188006,0.018939
Place,40925,Santa Clara,102390,787,29365,1797,0.286796,0.019908


In [29]:
#df.to_csv('output/ca_cities.csv')