# Decennial Census 2000 Housing Units in Structure
### SF3 
https://api.census.gov/data/2000/dec/sf3/variables.html
https://www.census.gov/data/developers/data-sets/decennial-census.2000.html

In [1]:
import pandas as pd
import numpy as np
import os

In [2]:
from geo import *
myAPI = os.environ.get('Census_API')

### variables to pull and rename

In [3]:
#variables and rename (different codes in 2000 vs. 2010 census)
col = f'H030001,H030002,H030003,H030004,H030005,H030006,H030007,H030008,H030009,H030010,H030011'

col_recode={'H030001':'UTot','H030002':'U1','H030003':'U1',\
            'H030004':'U2','H030005':'U34','H030006':'U5',\
            'H030007':'U5','H030008':'U5','H030009':'U5',\
            'H030010':'UOth','H030011':'UOth'}

year='2000'

### geo tables for join with NYC and subplace

## data getters

In [4]:
## census data api pull & table build
def api_pull(url):
    df = pd.read_json(url)
    df.columns = df.iloc[0]
    df = df[1:]
    return df

## county data 
def get_co():
    base_url = f'https://api.census.gov/data/{year}/dec/sf3'
    data_url = f'{base_url}?get={col}&for=county:*&in=state:*&key={myAPI}'
    df = api_pull(data_url)
    df['GEO_ID'] = df.state + df.county
    df = df.drop(columns=['state','county'])
    return df

## national data
def get_us():
    base_url = f'https://api.census.gov/data/{year}/dec/sf3'
    data_url = f'{base_url}?get={col}&for=us:*&key={myAPI}'
    return api_pull(data_url)

## build master table

In [5]:
#full US counties
co_us = get_co()
co_us = co_us.rename(columns=col_recode)
col = co_us.pop('GEO_ID')
co_us.insert(0,col.name,col)

In [6]:
for col in co_us.columns[1:]:
    co_us[col] = co_us[col].astype(int)
    
co_us = co_us.groupby(co_us.columns,axis=1).sum()
co_us['GEO_ID'] = co_us['GEO_ID'].astype(int).astype(str).str.zfill(5)
co = co_us[co_us['GEO_ID'].isin(stco_fips)].copy()

In [7]:
co_us.head()

Unnamed: 0,GEO_ID,U1,U2,U34,U5,UOth,UTot
1,1001,12221,133,168,504,4636,17662
2,1003,46435,1006,1834,11195,13815,74285
3,1005,7363,603,336,376,3783,12461
4,1007,4984,50,139,333,2839,8345
5,1009,14176,319,246,326,6091,21158


In [8]:
co.head(10)

Unnamed: 0,GEO_ID,U1,U2,U34,U5,UOth,UTot
306,9001,221046,30732,28305,58105,1278,339466
308,9005,60596,6528,4595,6950,598,79267
310,9009,199904,33390,39746,65675,2017,340732
1773,34003,201353,49468,19343,68533,1123,339820
1778,34013,115031,43847,48235,93642,256,301011
1780,34017,37620,59945,38821,103836,396,240618
1781,34019,38634,1858,1487,2880,173,45032
1782,34021,94204,7848,5437,25381,410,133280
1783,34023,176969,20100,15959,58173,2436,273637
1784,34025,180814,9684,9347,37744,3295,240884


In [9]:
co['sub'] = co['GEO_ID'].map(sub_7)
subreg = co.drop('GEO_ID',axis=1).copy()
subreg = subreg[['sub']+[col for col in subreg.columns if col !='sub']]
subreg = subreg.groupby(['sub']).sum().reset_index()
subreg = subreg.rename({'sub':'GEO_ID'},axis=1)

In [10]:
subreg

Unnamed: 0,GEO_ID,U1,U2,U34,U5,UOth,UTot
0,CT,481546,70650,72646,130730,3893,759465
1,INJ,939841,254768,167170,437113,5589,1804481
2,LHV,270738,38326,38756,129859,1769,479448
3,LI,815456,51915,20194,86930,5979,980474
4,MHV,244511,25784,23158,38787,19003,351243
5,NYC,536054,409565,306204,1945829,3260,3200912
6,ONJ,602726,33326,26276,92219,11045,765592


In [11]:
cousubreg = pd.concat([co,subreg])
cousubreg.to_excel('../../output/Housing/BuildSize_DEC_cousubreg.xlsx',index=False)