In [1]:
import pandas as pd
import json
import numpy as np

In [35]:
from censusAPI import myAPI
from geo import stco

### 2000 & 2010 Decennial Census  - Population, total & by race, and Housing by County - Full U.S.

In [3]:
#variables and rename (different codes in 2000 vs. 2010 census)
col_00 = f'NAME,P001001,P004002,P004003,P004005,P004006,P004007,P004008,P004009,P004010,P004011,H001001,H003002,H003003'
col_00_rename={'P001001':'P_Tot','P004002':'P_Hisp','P004003':'P_NonHisp',\
              'P004005':'P_White','P004006':'P_Black','P004008':'P_Asian',\
               'P004007':'P_Other','P004009':'P_Other','P004010':'P_Other','P004011':'P_Other',\
              'H001001':'H_Tot','H003002':'H_Occ','H003003':'H_Vac'}

col_10 = f'NAME,P001001,P005002,P005003,P005004,P005005,P005006,P005007,P005008,P005009,P005010,H001001,H003002,H003003'
col_10_rename={'P001001':'P_Tot','P005002':'P_NonHisp','P005003':'P_White',\
              'P005004':'P_Black','P005006':'P_Asian','P005010':'P_Hisp',\
               'P005005':'P_Other','P005007':'P_Other','P005008':'P_Other','P005009':'P_Other',\
              'H001001':'H_Tot','H003002':'H_Occ','H003003':'H_Vac'}

In [4]:
years = ['2000','2010']
year_data = {'2000':col_00,'2010':col_10}
year_recode = {'2000':col_00_rename,'2010':col_10_rename}

In [18]:
# For NYC subborough calculations
boro = ['005','047','061','081','085']
geo_nyc_00 = pd.read_csv('../data/geo/nyc_subbor_00.csv')
geo_nyc_10 = pd.read_csv('../data/geo/nyc_subbor_10.csv')
geo_nyc_00['yr'],geo_nyc_10['yr']='2000','2010'
geo_nyc = pd.concat([geo_nyc_00,geo_nyc_10])

In [31]:
# For subplace calculations
geo_subpl_00 = pd.read_csv('../data/geo/subpl00.csv')
geo_subpl_10 = pd.read_csv('../data/geo/subpl10.csv')
geo_subpl_00['yr'],geo_subpl_10['yr']='2000','2010'
geo_subpl=pd.concat([geo_subpl_00,geo_subpl_10])

## data getters

In [44]:
def get_co(years):
    dff = pd.DataFrame()
    for year in years:
        cols = year_data.get(year)
        base_url = f'https://api.census.gov/data/{year}/dec/sf1'
        data_url = f'{base_url}?get={cols}&for=county:*&in=state:*&key={myAPI}'
        df = pd.read_json(data_url)
        df.columns = df.iloc[0]
        df = df[1:]
        df['stco'] = df.state + df.county
        df = df.drop(columns=['state','county'])
        df['yr'] = year
        move = ['stco','NAME','yr'] #move non-variable columns to front
        df = df[move+[col for col in df.columns if col not in move]]
        for col in df.columns[3:]: #convert variables to integers
            df[col] = df[col].astype(int)
        df = df.rename(columns=year_recode.get(year))
        df = df.groupby(df.columns,axis=1).sum()
        df = df[move+[col for col in df.columns if col not in move]]
        dff = pd.concat([dff,df])
    dff['yr'] = dff['yr'].astype(str)
    return dff

def get_us(years):
    dff = pd.DataFrame()
    for year in years:
        cols = year_data.get(year)
        base_url = f'https://api.census.gov/data/{year}/dec/sf1'
        data_url = f'{base_url}?get={cols}&for=us:*&key={myAPI}'
        df = pd.read_json(data_url)
        df.columns = df.iloc[0]
        df = df[1:]
        df['yr'] = year
        move = ['NAME','yr']
        df = df[move+[col for col in df.columns if col not in move]]
        for col in df.columns[2:]:
            df[col] = df[col].astype(int)
        df = df.rename(columns=year_recode.get(year))
        df = df.groupby(df.columns,axis=1).sum()
        df = df[move+[col for col in df.columns if col not in move]]
        dff = pd.concat([dff,df])
    dff['yr'] = dff['yr'].astype(str)
    return dff

def get_nycsub(years):
    dff = pd.DataFrame()
    for year in years:
        for b in boro:
            cols = year_data.get(year)
            base_url = f'https://api.census.gov/data/{year}/dec/sf1'
            data_url = f'{base_url}?get={cols}&for=tract:*&in=state:36%20county:{b}&key={myAPI}'
            df = pd.read_json(data_url)
            df.columns = df.iloc[0]
            df = df[1:]
            df['yr'] = year
            df['tract'] = df.tract.str.pad(width=6,side='right',fillchar='0')
            df['ct_id']=df.state+df.county+df.tract
            df = df.drop(columns=['state','county','tract','NAME'])
            move = ['ct_id','yr']
            df = df[move+[col for col in df.columns if col not in move]]
            for col in df.columns[2:]:
                df[col] = df[col].astype(int)
            df = df.rename(columns=year_recode.get(year))
            df = df.groupby(df.columns,axis=1).sum()
            df = df[move+[col for col in df.columns if col not in move]]
            df['ct_id']=df['ct_id'].astype(int)
            dff = pd.concat([dff,df])
    dff['yr'] = dff['yr'].astype(int).astype(str) #clean up year column
    dff = pd.merge(dff,geo_nyc,left_on=['ct_id','yr'],right_on=['ct_id','yr'],how='left')
    dff['stco'] = dff['ct_id'].astype(str).str[:5]
    dff = dff.drop(columns=['nta_id','nta_nm','puma','ct_id','boro'])
    dff = dff.groupby(['stco','id','name','yr']).sum().reset_index()
    return dff

def get_subpl(years):
    dff = pd.DataFrame()
    for year in years:
        #subdivision table
        for st,co in stco.items():
            for c in co:
                cols = year_data.get(year)
                base_url = f'https://api.census.gov/data/{year}/dec/sf1'
                data_url = f'{base_url}?get={cols}&for=county%20subdivision:*&in=state:{st}&in=county:{c}&key={myAPI}'
                df = pd.read_json(data_url)
                df.columns = df.iloc[0]
                df = df[1:]
                df['yr'] = year
                df['id']=df.state+df.county+df['county subdivision']
                df = df.drop(columns=['state','county','county subdivision','NAME'])
                move = ['id','yr']
                df = df[move+[col for col in df.columns if col not in move]]
                for col in df.columns[2:]:
                    df[col] = df[col].astype(int)
                df = df.rename(columns=year_recode.get(year))
                df = df.groupby(df.columns,axis=1).sum()
                df = df[move+[col for col in df.columns if col not in move]]
                df['id']=df['id'].astype(int)
                dff = pd.concat([dff,df])
        #place table
        pl_url = f'{base_url}?get={cols}&for=place:*&in=state:36&key={myAPI}'
        nyp = pd.read_json(pl_url)
        nyp.columns = nyp.iloc[0]
        nyp = nyp[1:]
        nyp['yr'] = year
        nyp['id']=nyp.state+nyp.place
        nyp['id']=nyp['id'].astype(int)
        nyp = nyp.drop(columns=['state','place','NAME'])
        subpl = pd.concat([dff,nyp])
        
    subpl['yr'] = subpl['yr'].astype(int).astype(str) #clean up year column
    subpl = pd.merge(subpl,geo_subpl,left_on=['id','yr'],right_on=['id','yr'],how='left')
    subpl = subpl.drop(columns=['id','nm'])
    subpl = subpl.groupby(['stco','geoid','name','yr']).sum().reset_index()
    subpl['stco'],subpl['geoid'] = subpl['stco'].astype(int),subpl['geoid'].astype(int)
    return subpl

In [45]:
test = get_subpl(years)

In [46]:
test.head()

Unnamed: 0,stco,geoid,name,yr,H_Occ,H_Tot,H_Vac,P_Asian,P_Black,P_Hisp,P_NonHisp,P_Other,P_Tot,P_White
0,9001,900100000,"County subdivisions not defined, Fairfield Cou...",2000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,9001,900100000,"County subdivisions not defined, Fairfield Cou...",2010,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,9001,900104720,"Bethel town, Fairfield County, Connecticut",2000,6505.0,6653.0,148.0,631.0,219.0,669.0,17398.0,311.0,18067.0,16237.0
3,9001,900104720,"Bethel town, Fairfield County, Connecticut",2010,6938.0,7310.0,372.0,820.0,315.0,1419.0,17165.0,432.0,18584.0,15598.0
4,9001,900108070,"Bridgeport town, Fairfield County, Connecticut",2000,50307.0,54367.0,4060.0,4459.0,40974.0,44478.0,95051.0,6460.0,139529.0,43158.0


In [47]:
test.to_excel('subpl.xlsx')