# 2010 and 2020 Pop & Housing by CBG for mapping

In [7]:
import pandas as pd
import numpy as np
import os
import glob

In [8]:
from geo import stco,stco_fips,sub_7,sub_lbl
from r_codes import geo_col,col1,col2,col3
myAPI = os.environ.get('Census_API')

### 2010 data

In [10]:
dff = pd.DataFrame()
cols = 'P001001,H001001'
base_url = f'https://api.census.gov/data/2010/dec/sf1'
for st,co in stco.items():
    for c in co:
        data_url = f'{base_url}?get={cols}&for=block%20group:*&in=state:{st}%20county:{c}&key={myAPI}'
        df = pd.read_json(data_url)
        df.columns = df.iloc[0]
        df = df[1:]
        df['id'] = df.state+df.county+df.tract+df['block group']
        dff = pd.concat([dff,df])

In [11]:
dff = dff.drop(columns=['state','county','tract','block group'])
dff['id'] = dff['id'].astype(int)

Unnamed: 0,P001001,H001001,id
1,726,284,90010101011
2,745,307,90010101012
3,1485,623,90010101013
4,1520,564,90010101014
5,1326,478,90010101021


In [22]:
dff.to_csv('output/blockgroup/pophou10.csv',index=False)

### 2020 Census Tract Table

In [13]:
col_head = {'o':geo_col,'1':col1,'2':col2,'3':col3}
col_join = ['LOGRECNO','STUSAB','FILEID','CHARITER']
col_data = ['STATE','COUNTY','GEOCODE','SUMLEV','P0010001','H0010001']

In [14]:
# set directory
folders = glob.glob('../data/red_20/*')

In [15]:
def make_state(state):
    files = glob.glob(f'../data/red_20/{state}2020.pl/*.pl')
    #for first file in folder
    df = pd.read_table(f'{files[0]}',sep='|',header=None,low_memory=False)
    df.columns = col_head[files[0][-8]]

    #for all other files
    for file in files[1:]:
        dff = pd.read_table(f'{file}',sep='|',header=None,low_memory=False)
        dff.columns = col_head[file[-8]]
        df = pd.merge(df,dff,left_on=col_join,right_on=col_join,how="inner")
    
    #reduce table size
    df = df[col_data] #just the data columns we need
    df = df[df.SUMLEV==150].copy() #just census block groups
    return df

def make_table(folders):
    df = pd.DataFrame()
    for folder in folders:
        state = folder[15:17]
        dff = make_state(state)
        df = pd.concat([df,dff])
    return df

In [16]:
#master regional table with counties, munis, and NYC tracts
df = make_table(folders)
df['STATE']= df['STATE'].astype(str).str.pad(width=2,side='left',fillchar='0')
df['COUNTY']= df['COUNTY'].astype(int).astype(str).str.pad(width=3,side='left',fillchar='0')
df['stco'] = df.STATE + df.COUNTY
df = df[df['stco'].isin(stco_fips)]
df = df.drop(columns=['STATE','SUMLEV','stco','COUNTY']).rename(columns={'GEOCODE':'id'})
df['id']=df.id.astype(int)

Unnamed: 0,GEOCODE,P0010001,H0010001
1065,90010101011,1436,607
1066,90010101012,206,58
1067,90010101013,0,0
1068,90010101014,1615,621
1069,90010101015,1144,430


In [21]:
df.to_csv('output/blockgroup/pophou20.csv',index=False)