In [1]:
import pandas as pd

from ddeserts import read_cvap_csv

In [2]:
# read in list of charter cities
charter_cities_path = 'data/cacities/charter-cities.txt'

with open(charter_cities_path) as f:
    charter_cities = { line.strip() for line in f }

In [82]:
# load the data
places_path = 'data/census/CVAP_2015-2019_ACS_csv_files/Place.csv'

pre_filter = lambda line: 'California' in line and 'Total' in line
filter = lambda row: row['state'] == 'California' and row['geotype'] != 'cdp'

ca_city_rows = list(read_cvap_csv(places_path, pre_filter=pre_filter, filter=filter))


df = pd.DataFrame.from_records(ca_city_rows, index=['table', 'line'])
df['tot_moe'] = df['tot_moe'].astype('int')
df['adu_moe'] = df['adu_moe'].astype('int')
df['cvap_moe'] = df['cvap_moe'].astype('int')
df['cit_moe'] = df['cit_moe'].astype('int')


In [85]:
# add extra columns
df['has_charter'] = df['name'].isin(charter_cities)

df['dvap_est'] = df['adu_est'] - df['cvap_est']
df['dvap_moe'] = df['cvap_moe']  # cvap_moe is always bigger
df['dvap_pct'] = df['dvap_est'] / df['adu_est']

def moe_of_div(a, a_moe, b, b_moe):
    min_a = a - a_moe
    max_a = a + a_moe
    min_b = b - b_moe
    max_b = b + b_moe
    
    d = a / b
    min_d = min_a / max_b
    max_d = max_a / min_b
    
    return max(d - min_d, max_d - d)

def make_dvap_pct_moe(r):
    return moe_of_div(r['dvap_est'], r['dvap_moe'], r['adu_est'], r['adu_moe'])

df['dvap_pct_moe'] = df.apply(make_dvap_pct_moe, axis=1)

In [86]:
dvap = df.loc[df['has_charter']][
    ['name', 'adu_est', 'dvap_est', 'dvap_moe', 'dvap_pct', 'dvap_pct_moe']].sort_values('dvap_pct')
dvap[-20:]

Unnamed: 0_level_0,Unnamed: 1_level_0,name,adu_est,dvap_est,dvap_moe,dvap_pct,dvap_pct_moe
table,line,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Place,33346,Irvine,212955,47620,2665,0.223615,0.014138
Place,35400,Los Angeles,3145520,728480,7692,0.231593,0.002715
Place,39651,Redwood City,67430,15885,1208,0.235578,0.020762
Place,39742,Richmond,85930,20620,1687,0.239963,0.022445
Place,39105,Pomona,114115,27900,1440,0.24449,0.01477
Place,25546,Anaheim,265820,66135,3028,0.248796,0.012963
Place,28900,Compton,68500,17275,1232,0.25219,0.021753
Place,44630,Woodlake,4855,1355,354,0.279094,0.092751
Place,29849,Dinuba,15740,4445,560,0.282402,0.044168
Place,40925,Santa Clara,102390,29365,1615,0.286796,0.018117


In [87]:
dvap = df.loc[df['has_charter']][
    ['name', 'adu_est', 'dvap_est', 'dvap_moe', 'dvap_pct', 'dvap_pct_moe']].sort_values('dvap_est')
dvap[-30:]

Unnamed: 0_level_0,Unnamed: 1_level_0,name,adu_est,dvap_est,dvap_moe,dvap_pct,dvap_pct_moe
table,line,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Place,28900,Compton,68500,17275,1232,0.25219,0.021753
Place,40795,San Mateo,82505,17400,1161,0.210896,0.016322
Place,36583,Modesto,158385,17825,1465,0.112542,0.010056
Place,37077,Mountain View,65625,19675,1237,0.29981,0.022087
Place,38442,Pasadena,115505,19685,1499,0.170426,0.01436
Place,41042,Santa Rosa,140870,20410,1570,0.144885,0.012077
Place,39742,Richmond,85930,20620,1687,0.239963,0.022445
Place,40990,Santa Maria,72750,26085,1373,0.358557,0.023452
Place,32618,Hayward,125155,27275,1494,0.21793,0.013578
Place,39105,Pomona,114115,27900,1440,0.24449,0.01477
