# 2000, 2010 and 2020 Race/Hispanic Origin by Census Tract for mapping

In [1]:
import pandas as pd
import numpy as np
import os
import glob

In [2]:
from geo import stco,stco_fips,sub_7,sub_lbl
from r_codes import geo_col,col1,col2,col3
myAPI = os.environ.get('Census_API')

### 2000 & 2010 census tract tables

In [3]:
#variables and rename (different codes in 2000 vs. 2010 census)
col_00 = f'P004002,P004005,P004006,P004007,P004008,P004009,P004010,P004011'
col_00_rename={'P004002':'P_Hisp','P004005':'P_White','P004006':'P_Black','P004008':'P_Asian',\
               'P004007':'P_Other','P004009':'P_Other','P004010':'P_Other','P004011':'P_Other'}

col_10 = f'P005003,P005004,P005005,P005006,P005007,P005008,P005009,P005010'
col_10_rename={'P005003':'P_White','P005004':'P_Black','P005006':'P_Asian','P005010':'P_Hisp',\
               'P005005':'P_Other','P005007':'P_Other','P005008':'P_Other','P005009':'P_Other'}

In [4]:
years = ['2000','2010']
year_data = {'2000':col_00,'2010':col_10}
year_recode = {'2000':col_00_rename,'2010':col_10_rename}

In [5]:
## census data api pull & table build
def api_pull(url):
    df = pd.read_json(url)
    df.columns = df.iloc[0]
    df = df[1:]
    return df

## column cleanup function
# moves label ids to the front, converts data columns to ints
# recodes data columns to new id and groups duplicates (i.e., "other race")
# then reorders after the groupby
def col_clean(df,year,move):
    df = df[move+[col for col in df.columns if col not in move]]
    for col in df.columns[len(move):]:
        df[col] = df[col].astype(int)
    df = df.rename(columns=year_recode.get(year))
    df = df.groupby(df.columns,axis=1).sum()
    df = df[move+[col for col in df.columns if col not in move]]
    return df

## get tract data for 2000, 2010, 2020
def get_tract(years):
    dff = pd.DataFrame()
    for year in years:
        cols = year_data.get(year)
        base_url = f'https://api.census.gov/data/{year}/dec/sf1'
        for st,co in stco.items():
            for c in co:
                data_url = f'{base_url}?get={cols}&for=tract:*&in=state:{st}%20county:{c}&key={myAPI}'
                df = api_pull(data_url)
                df['yr'] = year
                df['tract'] = df.tract.str.pad(width=6,side='right',fillchar='0')
                df['id']=df.state+df.county+df.tract
                df = df.drop(columns=['state','county','tract'])
                move = ['id','yr']
                df = col_clean(df,year,move)
                dff = pd.concat([dff,df])
    dff['yr'] = dff['yr'].astype(int).astype(str) #clean up year column
    dff = dff.groupby(['id','yr']).sum().reset_index()
    dff['id'] = dff['id'].astype(int)
    return dff

In [6]:
tract = get_tract(years)
tract_00 = tract[tract['yr']=='2000'].drop(columns='yr')
tract_10 = tract[tract['yr']=='2010'].drop(columns='yr')

In [7]:
tract_00.to_csv('output/tract/race00.csv')
tract_10.to_csv('output/tract/race10.csv')

### 2020 Census Tract Table

In [8]:
col_head = {'o':geo_col,'1':col1,'2':col2,'3':col3}
col_join = ['LOGRECNO','STUSAB','FILEID','CHARITER']
col_data = ['STATE','COUNTY','GEOCODE','SUMLEV',\
            'P0020002','P0020005','P0020006','P0020007',\
            'P0020008','P0020009','P0020010','P0020011']
col_recode = {'GEOCODE':'id','P0020002':'P_Hisp','P0020005':'P_White','P0020006':'P_Black',\
              'P0020007':'P_Other','P0020008':'P_Asian','P0020009':'P_Other','P0020010':'P_Other',\
              'P0020011':'P_Other'}

In [9]:
# set directory
folders = glob.glob('../data/red_20/*')

In [10]:
def make_state(state):
    files = glob.glob(f'../data/red_20/{state}2020.pl/*.pl')
    #for first file in folder
    df = pd.read_table(f'{files[0]}',sep='|',header=None,low_memory=False)
    df.columns = col_head[files[0][-8]]

    #for all other files
    for file in files[1:]:
        dff = pd.read_table(f'{file}',sep='|',header=None,low_memory=False)
        dff.columns = col_head[file[-8]]
        df = pd.merge(df,dff,left_on=col_join,right_on=col_join,how="inner")
    
    #reduce table size
    df = df[col_data] #just the data columns we need
    df = df[df.SUMLEV==140].copy() #just tracts
    return df

def make_table(folders):
    df = pd.DataFrame()
    for folder in folders:
        state = folder[15:17]
        dff = make_state(state)
        df = pd.concat([df,dff])
    return df

In [11]:
#master regional table with counties, munis, and NYC tracts
df = make_table(folders)
df = df.rename(columns=col_recode)
df = df.groupby(df.columns,axis=1).sum()
df['STATE']= df['STATE'].astype(str).str.pad(width=2,side='left',fillchar='0')
df['COUNTY']= df['COUNTY'].astype(int).astype(str).str.pad(width=3,side='left',fillchar='0')
df['stco'] = df.STATE + df.COUNTY
df = df[df['stco'].isin(stco_fips)]

In [12]:
df = df.drop(columns=['STATE','SUMLEV','stco','COUNTY'])
move = list(df.columns)
move = [move[-1]]+move[:-1]
df = df[move]
df['id']=df.id.astype(int)

In [13]:
df.to_csv('output/tract/race20.csv')