# Prep Census Data

In [21]:
import geopandas as gpd
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm, trange
import json
storage = "/Volumes/easystore/Drones"

with open(f'{storage}/census/acs-block-groups-household-income/metadata.json') as f:
    acs_columns = json.load(f)
    
    
    

acs_poverty_bg = pd.concat(
    [
        chunk
        for chunk in tqdm(
            pd.read_csv(
                f"{storage}/census/acs-block-groups-household-income/acs2021_5yr_B19001_15000US060730134093.csv", chunksize=100000, dtype=str
            ),
            desc="Loading data",
        )
    ]
)

census_columns= pd.concat(
    [
        chunk
        for chunk in tqdm(
            pd.read_csv(
                f"{storage}/census/race/DECENNIALDHC2020.P9-Column-Metadata.csv", chunksize=100000, dtype=str
            ),
            desc="Loading data",
        )
    ]
)
census_race= pd.concat(
    [
        chunk
        for chunk in tqdm(
            pd.read_csv(
                f"{storage}/census/race/DECENNIALDHC2020.P9-Data.csv", chunksize=100000, dtype=str
            ),
            desc="Loading data",
        )
    ]
)
western_tracts= pd.concat(
    [
        chunk
        for chunk in tqdm(
            pd.read_csv(
                f"../../data/manually-label/WesternTracts.csv", chunksize=100000, dtype=str
            ),
            desc="Loading data",
        )
    ]
)


Loading data: 0it [00:00, ?it/s]

Loading data: 0it [00:00, ?it/s]

Loading data: 0it [00:00, ?it/s]

Loading data: 0it [00:00, ?it/s]

## ACS Data
Free and Reduced Meals in Schools in California is 55k: https://www.cde.ca.gov/ls/nu/rs/scales2324.asp

In [22]:
columns = acs_columns['tables']['B19001']['columns']
remap={}
eligible = []
below_median = []
drop=[]
for column in columns:
    remap[column] = columns[column]['name'].replace(':','').replace('$','')
    remap[column+", Error"] = columns[column]['name'].replace(':','').replace('$','') + " Error"
    drop.append(columns[column]['name'].replace(':','').replace('$','') + " Error")
    
    if columns[column]['name'] != "Total:":
        if columns[column]['FRLP'] == "True":
            eligible.append(columns[column]['name'].replace(':','').replace('$',''))
        if columns[column]['median'] == "below":
            below_median.append(columns[column]['name'].replace(':','').replace('$',''))

# df.rename(columns={"A": "a", "B": "c"})
acs_poverty_bg = acs_poverty_bg.rename(columns=remap)

acs_poverty_bg['len'] = acs_poverty_bg['geoid'].apply(lambda x: len(x))
acs_poverty_bg = acs_poverty_bg[acs_poverty_bg['len'] == 19].copy()
acs_poverty_bg['block-group-geoid'] = acs_poverty_bg['geoid'].apply(lambda x: x.split('15000US')[1])


In [23]:
acs_poverty_bg['FRLP Households'] = acs_poverty_bg[eligible].astype(float).sum(axis=1)
acs_poverty_bg['Below Median Households'] = acs_poverty_bg[below_median].astype(float).sum(axis=1)
acs_poverty_bg['% FRLP Eligible'] = acs_poverty_bg['FRLP Households']/acs_poverty_bg['Total'].astype(float)
acs_poverty_bg['% Below Median'] = acs_poverty_bg['Below Median Households']/acs_poverty_bg['Total'].astype(float)
acs_poverty_bg = acs_poverty_bg.drop(labels=drop, axis=1)
acs_poverty_bg.head()

Unnamed: 0,geoid,name,Total,"Less than 10,000","10,000 to 14,999","15,000 to 19,999","20,000 to 24,999","25,000 to 29,999","30,000 to 34,999","35,000 to 39,999",...,"100,000 to 124,999","125,000 to 149,999","150,000 to 199,999","200,000 or more",len,block-group-geoid,FRLP Households,Below Median Households,% FRLP Eligible,% Below Median
3,15000US060730032041,"BG 1, Tract 32.04, San Diego, CA",660,41,8,0,16,8,31,27,...,92,76,89,69,19,60730032041,239.0,334.0,0.362121,0.506061
4,15000US060730032071,"BG 1, Tract 32.07, San Diego, CA",759,0,0,0,0,0,0,0,...,90,123,39,228,19,60730032071,86.0,279.0,0.113307,0.367589
5,15000US060730100012,"BG 2, Tract 100.01, San Diego, CA",953,82,16,7,0,32,0,61,...,91,61,141,74,19,60730100012,359.0,586.0,0.376705,0.6149
6,15000US060730100161,"BG 1, Tract 100.16, San Diego, CA",0,0,0,0,0,0,0,0,...,0,0,0,0,19,60730100161,0.0,0.0,,
7,15000US060730100192,"BG 2, Tract 100.19, San Diego, CA",529,0,0,0,0,0,27,0,...,51,109,94,40,19,60730100192,94.0,235.0,0.177694,0.444234


In [40]:
acs_poverty_bg.to_csv('../../data/outputs/outputs_acs-poverty-blockgroups.csv',index=False)

In [25]:
acs_poverty_bg[acs_poverty_bg['geoid']=='15000US060730032041'][['Less than 10,000', '10,000 to 14,999',
       '15,000 to 19,999', '20,000 to 24,999', '25,000 to 29,999',
       '30,000 to 34,999', '35,000 to 39,999', '40,000 to 44,999',
       '45,000 to 49,999', '50,000 to 59,999']].sum().astype(int).sum()



239

## Get Chula Vista Block Groups

In [26]:
cv_block_groups = acs_poverty_bg[['geoid','block-group-geoid']]
cv_block_groups.to_csv('../../data/outputs/outputs_cv-block-groups.csv',index=False)


## Census Data

In [27]:
columns = census_race.iloc[0].to_dict()
remap={}
drop=[]
for column in columns:
    try:
        if column[-1] == "A":
            drop.append(column)
        else:
            
            remap[column] = ' '.join(columns[column].split('!!')).replace(':',"").replace(' Hispanic or Latino',' Latino').replace('Not Latino Population of one race','').replace('alone','').replace('  ',' ').strip()
    except:
        print(f"Not Remapping: {column}")
        

Not Remapping: Unnamed: 75


In [28]:
census_race_remapped = census_race.rename(columns = remap)
census_race_remapped = census_race_remapped.drop(labels=drop, axis=1)
census_race_remapped=census_race_remapped[['Geography', 'Geographic Area Name', 'Total', 'Total Latino',
       'Total Not Latino', 'Total', 'Total White',
       'Total Black or African American',
       'Total American Indian and Alaska Native', 'Total Asian',
       'Total Native Hawaiian and Other Pacific Islander',
       'Total Some Other Race']][3:]

In [29]:
census_race_remapped.columns = ['Geography', 'Geographic Area Name', 'Total', 'Total Alone', 'Total Latino',
       'Total Not Latino', 'Total_useless', 'Total_useless_1', 'Total White',
       'Total Black or African American',
       'Total American Indian and Alaska Native', 'Total Asian',
       'Total Native Hawaiian and Other Pacific Islander',
       'Total Some Other Race']

In [30]:
census_race_remapped=census_race_remapped[['Geography', 'Geographic Area Name', 'Total', 'Total Alone', 'Total Latino',
       'Total Not Latino', 'Total White',
       'Total Black or African American',
       'Total American Indian and Alaska Native', 'Total Asian',
       'Total Native Hawaiian and Other Pacific Islander',
       'Total Some Other Race']]

In [31]:
census_race_remapped['GEOID20'] = census_race_remapped['Geography'].apply(lambda x: x.split('US')[-1])
census_race_remapped['len'] = census_race_remapped['GEOID20'].apply(lambda x: len(x))
mask=census_race_remapped['GEOID20'].str.contains('|'.join(cv_block_groups["block-group-geoid"].values), case=False)
census_race_remapped = census_race_remapped[mask].copy()

# census_race[census_race['GEOID20'].str.contains("060730032041")]

In [32]:
census_race_remapped.to_csv('../../data/outputs/outputs_census-race.csv',index=False)


In [33]:
sd_county = pd.read_csv('../../data/mapping/sd_county-geometry.csv')

In [34]:
sd_county['GEOID20'] = "0" + sd_county['GEOID20'].astype(str)
mask=sd_county['GEOID20'].astype(str).str.contains('|'.join(cv_block_groups["block-group-geoid"].values), case=False)
cv_blocks = sd_county[mask].copy()
cv_blocks['GEOID20'] = "0" + cv_blocks['GEOID20'].astype(int).astype(str)
cv_blocks['len'] = cv_blocks['GEOID20'].apply(lambda x: len(x))
cv_blocks['POP20'] = cv_blocks['POP20'].astype(int) + 1


In [35]:
cv_blocks['west_cv']=False

In [36]:
cv_blocks.loc[cv_blocks['TRACTCE20'].astype(str).isin(western_tracts['Tract'].astype(str)),'west_cv']=True

In [37]:
cv_blocks[cv_blocks['west_cv']==True]

Unnamed: 0,STATEFP20,COUNTYFP20,TRACTCE20,BLOCKCE20,GEOID20,NAME20,MTFCC20,UR20,UACE20,UATYPE20,FUNCSTAT20,ALAND20,AWATER20,INTPTLAT20,INTPTLON20,HOUSING20,POP20,geometry,len,west_cv
536,6,73,12600,1005,060730126001005,Block 1005,G5040,U,78661.0,U,S,20268,0,32.632693,-117.097276,0,1,"POLYGON ((-117.09848 32.636082, -117.098118 32...",15,True
537,6,73,12600,1000,060730126001000,Block 1000,G5040,U,78661.0,U,S,41267,0,32.632765,-117.096874,0,1,"POLYGON ((-117.098419 32.636099, -117.098382 3...",15,True
538,6,73,12600,1001,060730126001001,Block 1001,G5040,U,78661.0,U,S,19259,0,32.627561,-117.094825,0,1,"POLYGON ((-117.095692 32.629235, -117.095466 3...",15,True
539,6,73,12600,1002,060730126001002,Block 1002,G5040,U,78661.0,U,S,104107,0,32.627972,-117.093272,310,656,"POLYGON ((-117.095251 32.629345, -117.095177 3...",15,True
540,6,73,12600,1003,060730126001003,Block 1003,G5040,U,78661.0,U,S,31836,0,32.628447,-117.091570,39,94,"POLYGON ((-117.092611 32.630074, -117.092519 3...",15,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
27952,6,73,13102,4009,060730131024009,Block 4009,G5040,U,78661.0,U,S,27049,0,32.613179,-117.060620,34,108,"POLYGON ((-117.061631 32.613855, -117.06136 32...",15,True
27953,6,73,13102,4001,060730131024001,Block 4001,G5040,U,78661.0,U,S,60002,0,32.614759,-117.057278,33,134,"POLYGON ((-117.058925 32.616522, -117.05847 32...",15,True
27954,6,73,13000,3012,060730130003012,Block 3012,G5040,U,78661.0,U,S,14376,0,32.618411,-117.082929,16,59,"POLYGON ((-117.083624 32.619156, -117.083478 3...",15,True
27955,6,73,12900,3006,060730129003006,Block 3006,G5040,U,78661.0,U,S,23002,0,32.630523,-117.065482,23,90,"POLYGON ((-117.066701 32.630665, -117.066304 3...",15,True


In [38]:
cv_blocks.to_csv('../../data/outputs/outputs_cv-blocks-geometry.csv',index=False)

In [39]:
cv_blocks

Unnamed: 0,STATEFP20,COUNTYFP20,TRACTCE20,BLOCKCE20,GEOID20,NAME20,MTFCC20,UR20,UACE20,UATYPE20,FUNCSTAT20,ALAND20,AWATER20,INTPTLAT20,INTPTLON20,HOUSING20,POP20,geometry,len,west_cv
536,6,73,12600,1005,060730126001005,Block 1005,G5040,U,78661.0,U,S,20268,0,32.632693,-117.097276,0,1,"POLYGON ((-117.09848 32.636082, -117.098118 32...",15,True
537,6,73,12600,1000,060730126001000,Block 1000,G5040,U,78661.0,U,S,41267,0,32.632765,-117.096874,0,1,"POLYGON ((-117.098419 32.636099, -117.098382 3...",15,True
538,6,73,12600,1001,060730126001001,Block 1001,G5040,U,78661.0,U,S,19259,0,32.627561,-117.094825,0,1,"POLYGON ((-117.095692 32.629235, -117.095466 3...",15,True
539,6,73,12600,1002,060730126001002,Block 1002,G5040,U,78661.0,U,S,104107,0,32.627972,-117.093272,310,656,"POLYGON ((-117.095251 32.629345, -117.095177 3...",15,True
540,6,73,12600,1003,060730126001003,Block 1003,G5040,U,78661.0,U,S,31836,0,32.628447,-117.091570,39,94,"POLYGON ((-117.092611 32.630074, -117.092519 3...",15,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
28251,6,73,10016,1000,060730100161000,Block 1000,G5040,R,,,S,1051164,0,32.593759,-116.951446,0,1,"POLYGON ((-116.966156 32.589925, -116.965916 3...",15,False
28252,6,73,10016,1002,060730100161002,Block 1002,G5040,R,,,S,2009800,0,32.585042,-116.990265,0,1,"POLYGON ((-117.005217 32.592376, -117.005127 3...",15,False
28286,6,73,13322,3005,060730133223005,Block 3005,G5040,U,78661.0,U,S,6217,0,32.628850,-116.992421,4,22,"POLYGON ((-116.993114 32.628806, -116.993053 3...",15,False
28287,6,73,13322,3000,060730133223000,Block 3000,G5040,U,78661.0,U,S,310639,0,32.634625,-116.991596,162,575,"POLYGON ((-116.996325 32.633525, -116.996153 3...",15,False
