# Prep Census Data

In [1]:
import geopandas as gpd
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm, trange
import json
storage = "/Volumes/easystore/Drones"

with open(f'{storage}/census/acs-block-groups-household-income/metadata.json') as f:
    acs_columns = json.load(f)
    
    
    

acs_poverty_bg = pd.concat(
    [
        chunk
        for chunk in tqdm(
            pd.read_csv(
                f"{storage}/census/acs-block-groups-household-income/acs2021_5yr_B19001_15000US060730134093.csv", chunksize=100000, dtype=str
            ),
            desc="Loading data",
        )
    ]
)

census_columns= pd.concat(
    [
        chunk
        for chunk in tqdm(
            pd.read_csv(
                f"{storage}/census/race/DECENNIALDHC2020.P3-Column-Metadata.csv", chunksize=100000, dtype=str
            ),
            desc="Loading data",
        )
    ]
)
census_race= pd.concat(
    [
        chunk
        for chunk in tqdm(
            pd.read_csv(
                f"{storage}/census/race/DECENNIALDHC2020.P3-Data.csv", chunksize=100000, dtype=str
            ),
            desc="Loading data",
        )
    ]
)



Loading data: 0it [00:00, ?it/s]

Loading data: 0it [00:00, ?it/s]

Loading data: 0it [00:00, ?it/s]

## ACS Data
Free and Reduced Meals in Schools in California is 55k: https://www.cde.ca.gov/ls/nu/rs/scales2324.asp

In [2]:
columns = acs_columns['tables']['B19001']['columns']
remap={}
eligible = []
below_median = []
drop=[]
for column in columns:
    remap[column] = columns[column]['name'].replace(':','').replace('$','')
    remap[column+", Error"] = columns[column]['name'].replace(':','').replace('$','') + " Error"
    drop.append(columns[column]['name'].replace(':','').replace('$','') + " Error")
    
    if columns[column]['name'] != "Total:":
        if columns[column]['FRLP'] == "True":
            eligible.append(columns[column]['name'].replace(':','').replace('$',''))
        if columns[column]['median'] == "below":
            below_median.append(columns[column]['name'].replace(':','').replace('$',''))

# df.rename(columns={"A": "a", "B": "c"})
acs_poverty_bg = acs_poverty_bg.rename(columns=remap)

acs_poverty_bg['len'] = acs_poverty_bg['geoid'].apply(lambda x: len(x))
acs_poverty_bg = acs_poverty_bg[acs_poverty_bg['len'] == 19].copy()
acs_poverty_bg['block-group-geoid'] = acs_poverty_bg['geoid'].apply(lambda x: x.split('15000US')[1])


In [3]:
acs_poverty_bg['FRLP Households'] = acs_poverty_bg[eligible].astype(float).sum(axis=1)
acs_poverty_bg['Below Median Households'] = acs_poverty_bg[below_median].astype(float).sum(axis=1)
acs_poverty_bg['% FRLP Eligible'] = acs_poverty_bg['FRLP Households']/acs_poverty_bg['Total'].astype(float)
acs_poverty_bg['% Below Median'] = acs_poverty_bg['Below Median Households']/acs_poverty_bg['Total'].astype(float)
acs_poverty_bg = acs_poverty_bg.drop(labels=drop, axis=1)
acs_poverty_bg.head()

Unnamed: 0,geoid,name,Total,"Less than 10,000","10,000 to 14,999","15,000 to 19,999","20,000 to 24,999","25,000 to 29,999","30,000 to 34,999","35,000 to 39,999",...,"100,000 to 124,999","125,000 to 149,999","150,000 to 199,999","200,000 or more",len,block-group-geoid,FRLP Households,Below Median Households,% FRLP Eligible,% Below Median
3,15000US060730032041,"BG 1, Tract 32.04, San Diego, CA",660,41,8,0,16,8,31,27,...,92,76,89,69,19,60730032041,239.0,334.0,0.362121,0.506061
4,15000US060730032071,"BG 1, Tract 32.07, San Diego, CA",759,0,0,0,0,0,0,0,...,90,123,39,228,19,60730032071,86.0,279.0,0.113307,0.367589
5,15000US060730100012,"BG 2, Tract 100.01, San Diego, CA",953,82,16,7,0,32,0,61,...,91,61,141,74,19,60730100012,359.0,586.0,0.376705,0.6149
6,15000US060730100161,"BG 1, Tract 100.16, San Diego, CA",0,0,0,0,0,0,0,0,...,0,0,0,0,19,60730100161,0.0,0.0,,
7,15000US060730100192,"BG 2, Tract 100.19, San Diego, CA",529,0,0,0,0,0,27,0,...,51,109,94,40,19,60730100192,94.0,235.0,0.177694,0.444234


In [4]:
acs_poverty_bg.to_csv('../../data/outputs/outputs_acs-poverty-blockgroups.csv',index=False)

## Get Chula Vista Block Groups

In [5]:
cv_block_groups = acs_poverty_bg[['geoid','block-group-geoid']]
cv_block_groups.to_csv('../../data/outputs/outputs_cv-block-groups.csv',index=False)


## Census Data

In [6]:
columns = census_race.iloc[0].to_dict()
remap={}
drop=[]
for column in columns:
    try:
        if column[-1] == "A":
            drop.append(column)
        else:
            remap[column] = columns[column].split('!!')[-1].replace(':',"").replace(' alone','')
    except:
        print(f"Not Remapping: {column}")
        

Not Remapping: Unnamed: 18


In [7]:
census_race = census_race.rename(columns = remap)
census_race = census_race.drop(labels=drop, axis=1)


In [8]:
census_race['GEOID20'] = census_race['Geography'].apply(lambda x: x.split('US')[-1])
census_race['len'] = census_race['GEOID20'].apply(lambda x: len(x))
mask=census_race['GEOID20'].str.contains('|'.join(cv_block_groups["block-group-geoid"].values), case=False)
census_race = census_race[mask].copy()

# census_race[census_race['GEOID20'].str.contains("060730032041")]

In [9]:
census_race.to_csv('../../data/outputs/outputs_census-race.csv',index=False)


In [10]:
census_race.columns

Index(['Geography', 'Geographic Area Name', 'Total', 'White',
       'Black or African American', 'American Indian and Alaska Native',
       'Asian', 'Native Hawaiian and Other Pacific Islander',
       'Some Other Race', 'Two or More Races', 'Unnamed: 18', 'GEOID20',
       'len'],
      dtype='object')

In [11]:
sd_county = pd.read_csv('../../data/mapping/sd_county-geometry.csv')

In [12]:
sd_county['GEOID20'] = "0" + sd_county['GEOID20'].astype(str)
mask=sd_county['GEOID20'].astype(str).str.contains('|'.join(cv_block_groups["block-group-geoid"].values), case=False)
sd_county = sd_county[mask].copy()
sd_county['GEOID20'] = "0" + sd_county['GEOID20'].astype(int).astype(str)
sd_county['len'] = sd_county['GEOID20'].apply(lambda x: len(x))
sd_county['POP20'] = sd_county['POP20'].astype(int) + 1
sd_county.to_csv('../../data/outputs/outputs_cv-blocks-geometry.csv',index=False)