In [8]:
from math import ceil

import pandas as pd

from ddeserts import read_cvap_csv, moe_of_sum, moe_of_ratio

In [2]:
# read in list of charter cities
charter_cities_path = 'data/cacities/charter-cities.txt'

with open(charter_cities_path) as f:
    charter_cities = { line.strip() for line in f }

In [3]:
# load the data
places_path = 'data/census/CVAP_2015-2019_ACS_csv_files/Place.csv'

pre_filter = lambda line: 'California' in line and 'Total' in line
filter = lambda row: row['state'] == 'California' and row['geotype'] != 'cdp'

ca_city_rows = list(read_cvap_csv(places_path, pre_filter=pre_filter, filter=filter))


df = pd.DataFrame.from_records(ca_city_rows, index=['table', 'line'])
df['tot_moe'] = df['tot_moe'].astype('int')
df['adu_moe'] = df['adu_moe'].astype('int')
df['cvap_moe'] = df['cvap_moe'].astype('int')
df['cit_moe'] = df['cit_moe'].astype('int')


In [15]:
# add extra columns
df['has_charter'] = df['name'].isin(charter_cities)

df['dvap_est'] = df['adu_est'] - df['cvap_est']
df['dvap_moe'] = df.apply(lambda r: ceil(moe_of_sum(r['adu_moe'], r['cvap_moe'])), axis=1).astype('int')
df['dvap_pct'] = df['dvap_est'] / df['adu_est']
df['dvap_pct_moe'] = df.apply(lambda r: moe_of_ratio(
    r['dvap_est'], r['dvap_moe'], r['adu_est'], r['adu_moe']), axis=1).astype('float')


In [21]:
dvap = df.loc[df['has_charter']][
    ['name', 'adu_est', 'adu_moe', 'dvap_est', 'dvap_moe', 'dvap_pct', 'dvap_pct_moe']].sort_values('dvap_pct')
dvap[20:40]

Unnamed: 0_level_0,Unnamed: 1_level_0,name,adu_est,adu_moe,dvap_est,dvap_moe,dvap_pct,dvap_pct_moe
table,line,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Place,33060,Huntington Beach,161815,973,10710,1784,0.066187,0.011492
Place,36050,Marysville,9415,302,655,473,0.06957,0.054209
Place,39625,Redondo Beach,52620,654,3790,962,0.072026,0.019419
Place,39456,Rancho Mirage,16820,283,1220,484,0.072533,0.030509
Place,41770,Solvang,4665,160,350,291,0.075027,0.06726
Place,40483,Ventura,86525,731,6820,1367,0.078821,0.016605
Place,44370,Whittier,66350,574,5255,905,0.079201,0.01445
Place,34542,La Quinta,33055,614,2645,996,0.080018,0.032216
Place,34815,Lemoore,18985,468,1525,689,0.080327,0.039239
Place,26547,Big Bear Lake,4190,147,360,250,0.085919,0.064959


In [19]:
dvap = df.loc[df['has_charter']][
    ['name', 'adu_est', 'adu_moe', 'dvap_est', 'dvap_moe', 'dvap_pct', 'dvap_pct_moe']].sort_values('dvap_est')
dvap[-30:]

Unnamed: 0_level_0,Unnamed: 1_level_0,name,adu_est,adu_moe,dvap_est,dvap_moe,dvap_pct,dvap_pct_moe
table,line,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Place,28900,Compton,68500,942,17275,1551,0.25219,0.026474
Place,40795,San Mateo,82505,817,17400,1420,0.210896,0.019492
Place,36583,Modesto,158385,1042,17825,1798,0.112542,0.012173
Place,37077,Mountain View,65625,660,19675,1403,0.29981,0.024642
Place,38442,Pasadena,115505,864,19685,1731,0.170426,0.016384
Place,41042,Santa Rosa,140870,836,20410,1779,0.144885,0.013569
Place,39742,Richmond,85930,921,20620,1923,0.239963,0.025221
Place,40990,Santa Maria,72750,872,26085,1627,0.358557,0.026985
Place,32618,Hayward,125155,887,27275,1738,0.21793,0.015541
Place,39105,Pomona,114115,947,27900,1724,0.24449,0.01728
