In [1]:
import pandas as pd
import json
import requests
import math
import numpy as np

In [2]:
from censusAPI import myAPI
import utilcalcs as calc
import geo_agg
from muni_geo_id import PUMA_2018,cousub_2018,placeLI_2018,\
                        cousub_2010,placeLI_2010,stco,state

In [3]:
import os
path = os.getcwd()
path = path[:-13]

## User-defined parameters

In [4]:
nyc = ['005','047','061','081','085']

In [5]:
#Variables and predicates for both start/end years
#broadband access
source = 'acs/acs5'

col_b = f'GEO_ID,NAME,B01002_001E,B01002_001M' 
col_d = f'GEO_ID,DP02_0001E,DP02_0001M' #DP02_0151E,DP02_0151M,DP02_0152E,DP02_0152M' 

year1 = '2019'
#year0 = '2010'

var_data = ['GEO_ID','NAME','B01002_001E','B01002_001M','DP02_0001E','DP02_0001M']

### Data download functions

In [6]:
def get_cousub(year,col_b,col_d):#,geo_code):
    frames1 = []
    frames2 = []
    for st,co in stco.items():
        for i in co:
            url = f'https://api.census.gov/data/{year}/{source}?get={col_b}&for=county%20subdivision:*&in=state:{st}%20county:{i}&key={myAPI}'
            resp = requests.request('GET', url).content
            df = pd.DataFrame(json.loads(resp)[1:])
            df.columns = json.loads(resp)[0]
            frames1.append(df)
    for st,co in stco.items():
        for i in co:
            url = f'https://api.census.gov/data/{year}/{source}/profile?get={col_d}&for=county%20subdivision:*&in=state:{st}%20county:{i}&key={myAPI}'
            resp = requests.request('GET', url).content
            df = pd.DataFrame(json.loads(resp)[1:])
            df.columns = json.loads(resp)[0]
            frames2.append(df)
    df_sub = pd.merge(pd.concat(frames1),pd.concat(frames2),how='left',on='GEO_ID')
    #df_sub = df_sub[df_sub['GEO_ID'].isin(geo_code)]
    return df_sub

def get_place(year,col_b,col_d,geo_code):
    url1 = f'https://api.census.gov/data/{year}/{source}?get={col_b}&for=place:*&in=state:36&key={myAPI}'
    resp1 = requests.request('GET', url1).content
    df1 = pd.DataFrame(json.loads(resp1)[1:])
    df1.columns = json.loads(resp1)[0]
    url2 = f'https://api.census.gov/data/{year}/{source}/profile?get={col_d}&for=place:*&in=state:36&key={myAPI}'
    resp2 = requests.request('GET', url2).content
    df2 = pd.DataFrame(json.loads(resp2)[1:])
    df2.columns = json.loads(resp2)[0]
    
    df_pl = pd.merge(df1,df2,how='left',on='GEO_ID')
    df_pl = df_pl[df_pl['GEO_ID'].isin(geo_code)]
    return df_pl

def get_nyc_tract(year,col_b,col_d):
    frames1 = []
    frames2 = []
    for i in nyc:
        url1 = f'https://api.census.gov/data/{year}/{source}?get={col_b}&for=tract:*&in=state:36&in=county:{i}&key={myAPI}'
        resp1 = requests.request('GET', url1).content
        df1 = pd.DataFrame(json.loads(resp1)[1:])
        df1.columns = json.loads(resp1)[0]
        frames1.append(df1)
    for i in nyc:
        url2 = f'https://api.census.gov/data/{year}/{source}/profile?get={col_d}&for=tract:*&in=state:36&in=county:{i}&key={myAPI}'
        resp2 = requests.request('GET', url2).content
        df2 = pd.DataFrame(json.loads(resp2)[1:])
        df2.columns = json.loads(resp2)[0]
        frames2.append(df2)
    df_tract = pd.merge(pd.concat(frames1),pd.concat(frames2),how='left',on='GEO_ID')
    return df_tract

def clean_data(df,var):
    dff = df[var].copy()
    var_num = var[2:]
    for col in var_num:
        dff[col] = dff[col].astype(float)
    dff = dff.replace([999999999, 555555555, 333333333, 222222222,\
                    666666666, 888888888, -999999999, -555555555,\
                    -333333333, -222222222, -666666666, -888888888], np.nan)
    return dff


### Variables for table calculations

## Households and Broadband Year 1

#### Subdivisions in NY-NJ-CT - Places in LI

In [7]:
dfY1_sub = get_cousub(year1,col_b,col_d)

In [8]:
dfY1_pl = get_place(year1,col_b,col_d,placeLI_2018)

In [9]:
dfY1 = pd.concat([dfY1_sub,dfY1_pl],sort=True)
dfY1.head()

Unnamed: 0,B01002_001E,B01002_001M,DP02_0001E,DP02_0001M,GEO_ID,NAME,county subdivision_x,county subdivision_y,county_x,county_y,place_x,place_y,state_x,state_y
0,43.2,0.8,22271,365,0600000US0900133620,"Greenwich town, Fairfield County, Connecticut",33620,33620,1,1,,,9,9
1,46.6,1.6,3452,139,0600000US0900163480,"Redding town, Fairfield County, Connecticut",63480,63480,1,1,,,9,9
2,43.9,1.5,7116,200,0600000US0900150580,"New Canaan town, Fairfield County, Connecticut",50580,50580,1,1,,,9,9
3,46.4,1.7,4971,166,0600000US0900150860,"New Fairfield town, Fairfield County, Connecticut",50860,50860,1,1,,,9,9
4,46.0,1.1,9885,312,0600000US0900152980,"Newtown town, Fairfield County, Connecticut",52980,52980,1,1,,,9,9


In [10]:
dfY1 = clean_data(dfY1,var_data)

In [11]:
dfY1.head()

Unnamed: 0,GEO_ID,NAME,B01002_001E,B01002_001M,DP02_0001E,DP02_0001M
0,0600000US0900133620,"Greenwich town, Fairfield County, Connecticut",43.2,0.8,22271.0,365.0
1,0600000US0900163480,"Redding town, Fairfield County, Connecticut",46.6,1.6,3452.0,139.0
2,0600000US0900150580,"New Canaan town, Fairfield County, Connecticut",43.9,1.5,7116.0,200.0
3,0600000US0900150860,"New Fairfield town, Fairfield County, Connecticut",46.4,1.7,4971.0,166.0
4,0600000US0900152980,"Newtown town, Fairfield County, Connecticut",46.0,1.1,9885.0,312.0


#### Tracts for NYC Only - to calculate as NTAs

In [19]:
dfY1_nyc = get_nyc_tract(year1,col_b,col_d)
dfY1_nyc = clean_data(dfY1_nyc,var_data)

In [20]:
dfY1_nyc['join_id'] = dfY1_nyc['GEO_ID'].str[9:].astype(int)

In [21]:
#import csv to recode tracts to NTAs
geo = pd.read_csv(f'{path}data/nyc_xgeo.csv') 

In [22]:
dfY1_nyc = pd.merge(dfY1_nyc, geo, left_on="join_id", right_on="tract", how="inner").drop(columns=['tract','GEO_ID','join_id','PUMA','NTA_name','Subbor_name','Subbor'])

In [23]:
dfY1_nyc.head()

Unnamed: 0,NAME,B01002_001E,B01002_001M,DP02_0001E,DP02_0001M,NTA
0,"Census Tract 200, Bronx County, New York",32.7,1.8,1469.0,82.0,BX37
1,"Census Tract 205.01, Bronx County, New York",34.3,4.1,2858.0,91.0,BX36
2,"Census Tract 205.02, Bronx County, New York",39.3,5.9,926.0,45.0,BX36
3,"Census Tract 209, Bronx County, New York",36.4,4.1,1732.0,69.0,BX63
4,"Census Tract 210.01, Bronx County, New York",39.0,3.6,3841.0,174.0,BX46


In [24]:
dfY1_nyc = dfY1_nyc.drop('NAME',axis=1)
dfY1_nyc.head()

Unnamed: 0,B01002_001E,B01002_001M,DP02_0001E,DP02_0001M,NTA
0,32.7,1.8,1469.0,82.0,BX37
1,34.3,4.1,2858.0,91.0,BX36
2,39.3,5.9,926.0,45.0,BX36
3,36.4,4.1,1732.0,69.0,BX63
4,39.0,3.6,3841.0,174.0,BX46


In [25]:
cols = list(dfY1_nyc.columns)
cols = [cols[-1]]+cols[:-1]
dfY1_nyc = dfY1_nyc[cols]

In [26]:
#Aggregate pumas to sub-borough geos & calc MOEs
dfY1_nyc = geo_agg.calc_muni_agg(dfY1_nyc,'NTA')
dfY1_nyc = dfY1_nyc.rename(columns={'NTA':'GEO_ID'})
dfY1_nyc.head()

Unnamed: 0,GEO_ID,DP02_0001E,DP02_0001M,B01002_001E,B01002_001M
0,BX37,9443.0,207.292064,453.9,34.668285
1,BX36,19531.0,238.763481,413.8,13.786225
2,BX63,13427.0,200.259831,230.3,11.474319
3,BX46,12697.0,285.506567,189.0,8.038657
4,BX52,16692.0,316.045883,568.5,21.534159


#### Combine SubPlace and NYCPUMA Table into Municipality Table for 2018

In [27]:
dfY1 = pd.concat([dfY1,dfY1_nyc],sort=True)
dfY1.head()

Unnamed: 0,B01002_001E,B01002_001M,DP02_0001E,DP02_0001M,GEO_ID,NAME
0,43.2,0.8,22271.0,365.0,0600000US0900133620,"Greenwich town, Fairfield County, Connecticut"
1,46.6,1.6,3452.0,139.0,0600000US0900163480,"Redding town, Fairfield County, Connecticut"
2,43.9,1.5,7116.0,200.0,0600000US0900150580,"New Canaan town, Fairfield County, Connecticut"
3,46.4,1.7,4971.0,166.0,0600000US0900150860,"New Fairfield town, Fairfield County, Connecticut"
4,46.0,1.1,9885.0,312.0,0600000US0900152980,"Newtown town, Fairfield County, Connecticut"


In [28]:
dfY1.to_excel(f'{path}output/Demo/MedianAge_muni.xlsx')