# Instructions to go here

In [1]:
### SET THIS VALUE BEFORE RUNNING ###
#
onDCPServer = False
#
#
## proxy assignment, None if working off-network
if onDCPServer == False:
    p = None
else:
    from proxy import p

In [2]:
import pandas as pd
import json
import requests
import math
import numpy as np
import os

In [3]:
import data_getters as get
import utilcalcs as calc
import geo_agg
from geo import *

## User-defined parameters

In [4]:
#Variables and predicates for both start/end years
#Housing Units by Structure Size
source ='acs5/profile'

col = 'group(DP04)'

years = ['2010','2019']

#Columns from DP we want
#0006 = total units, 0007 = 1 unit detached, 0008 = 1 unit attached
#0009 = 2 units, 0010 = 3 or 4 units, 0011 = 5 to 9 units 
#0012 = 10 to 19 units, 0013 = 20+ units, 0014 = Mobile home, 0015 = RV, boat, other
U1E = ['DP04_0007E','DP04_0008E']
U1M = ['DP04_0007M','DP04_0008M']
U5E = ['DP04_0011E','DP04_0012E','DP04_0013E']
U5M = ['DP04_0011M','DP04_0012M','DP04_0013M']
UOthE = ['DP04_0014E','DP04_0015E']
UOthM = ['DP04_0014M','DP04_0015M']

var_data = ['GEO_ID','DP04_0006E','DP04_0006M']+U1E+U1M+\
           ['DP04_0009E','DP04_0009M','DP04_0010E','DP04_0010M']+\
           U5E + U5M + UOthE + UOthM

### Geography crosswalk files

In [5]:
# For NYC subborough calculations
geo_nyc = pd.read_csv('../data/geo/nyc_subbor_10.csv')

#make adjusted subpl for calculation comparison over time
geo_subpl = pd.read_csv('../data/geo/subpl10.csv')
subpl10 = [str(i) for i in list(geo_subpl['id'])]

liplace19 = pd.read_csv('../data/geo/liplaces19.csv')
liplaces19 = [str(i) for i in list(liplace19['GEOID'])]

#### calc/group building size columns function

In [6]:
# function to clean census data once pulled
# calc/rename housing unit by building size columns
def group_bsize(df,year):
    df = get.clean_data(df,var_data)
    df['U1_E'] = df.loc[:,U1E].sum(axis=1)
    df['U1_M'] = df.apply(lambda x: (calc.get_moe(x[U1M])),axis=1)
    df['U5_E'] = df.loc[:,U5E].sum(axis=1)
    df['U5_M'] = df.apply(lambda x: (calc.get_moe(x[U5M])),axis=1)
    df['UOth_E'] = df.loc[:,UOthE].sum(axis=1)
    df['UOth_M'] = df.apply(lambda x: (calc.get_moe(x[UOthM])),axis=1)
    df = df.drop(U1E+U1M+U5E+U5M+UOthE+UOthM,axis=1).rename(\
                      {'DP04_0006E':'UT_E','DP04_0006M':'UT_M',
                      'DP04_0009E':'U2_E','DP04_0009M':'U2_M',
                      'DP04_0010E':'U34_E', 'DP04_0010M':'U34_M'},axis=1)
    for col in df.columns[1:]:
        df.rename(columns={col:f'{col[:-1]}{year[-2:]}{col[-1:]}'},inplace=True)
    return df


#make function to calculate change between the two years
#def calc_change(df):
#    pass

### County + Subregion - Year 0 & Year1

In [7]:
c_y0 = get.get_county(p,source,years[0],col)
c_y1 = get.get_county(p,source,years[1],col)

c_y0 = group_bsize(c_y0,years[0])
c_y1 = group_bsize(c_y1,years[1])

cou = pd.merge(c_y0,c_y1,on='GEO_ID')
cou['sub'] = cou['GEO_ID'].map(sub)

In [8]:
subreg = cou.drop('GEO_ID',axis=1).copy()
subreg = subreg[['sub']+[col for col in subreg.columns if col !='sub']]
subreg = geo_agg.calc_muni_agg(subreg,'sub')
subreg = subreg.rename({'sub':'GEO_ID'},axis=1)

In [9]:
cousubreg = pd.concat([cou,subreg])

In [10]:
cousubreg.to_excel('../../output/Housing/BuildSize_ACS_cousubreg.xlsx',index=False)

### Subplace - Year 0

In [11]:
#all ny places,reduce to just li places
pl_y0 = get.get_place(p,source,years[0],col)
pl_y0 = pl_y0[pl_y0['GEO_ID'].isin(subpl10)]
#all mcds in region
mcd_y0 = get.get_mcd(p,source,years[0],col)
#combine and clean
df_y0 = pd.concat([pl_y0,mcd_y0])
df_y0 = group_bsize(df_y0,years[0])

### Subplace Year 1

In [12]:
#all ny places,reduce to just li places
pl_y1 = get.get_place(p,source,years[1],col)
pl_y1 = pl_y1[pl_y1['GEO_ID'].isin(liplaces19)]
#all mcds in region
mcd_y1 = get.get_mcd(p,source,years[1],col)
#combine and clean
df_y1 = pd.concat([pl_y1,mcd_y1])
df_y1 = group_bsize(df_y1,years[1])

### Census Tract - Year 0

In [13]:
ct_y0 = get.get_tract(p,source,years[0],col)
ct_y0 = group_bsize(ct_y0,years[0])

Unnamed: 0,GEO_ID,DP04_0001E,DP04_0001M,DP04_0001PE,DP04_0001PM,DP04_0002E,DP04_0002M,DP04_0002PE,DP04_0002PM,DP04_0003E,...,DP04_0139PMA,DP04_0139PEA,DP04_0140EA,DP04_0140MA,DP04_0140PEA,DP04_0140PMA,DP04_0141EA,DP04_0141MA,DP04_0141PMA,DP04_0141PEA
0,9009154900,1823,104,1823,-888888888,1790,107,98.2,1.9,33,...,,,,,,,,,(X),(X)
1,9009155000,2330,119,2330,-888888888,2090,169,89.7,6.2,240,...,,,,,,,,,(X),(X)
2,9009155100,1868,109,1868,-888888888,1733,137,92.8,5.4,135,...,,,,,,,,,(X),(X)
3,9009157100,1214,39,1214,-888888888,1123,79,92.5,6.4,91,...,,,,,,,,,(X),(X)
4,9009157200,1328,61,1328,-888888888,1263,85,95.1,4.4,65,...,,,,,,,,,(X),(X)


### Census Tract - Year 1

In [16]:
ct_y1 = get.get_tract(p,source,years[1],col)
ct_y1 = group_bsize(ct_y1,years[1])

In [19]:
ct_y1.to_excel(f'../../output/Housing/BuildSize_ACS_tract_{years[1]}.xlsx',index=False)