This script creates Area Based Composite Measures using tract informatino from 2023 ACS data

Created 04-19-2025 <br> last updated 05-03-2025 by Erin Best

In [2]:
# Import Libraries 
import requests
import pandas as pd
from sklearn.preprocessing import StandardScaler
import numpy as np


#### Using APIs <br> pull in ACS Data

In [3]:
## Overall Table
acsTable = pd.DataFrame()

In [None]:
##### Median Income
url = https://api.census.gov/data/2021/acs/acs5/subject
params = {
    get : group(S1903) <br>
    ucgid: pseudo(0400000US08$1400000)
}
response = requests.get(url <br> params)
if response.status_code == 200:
    data = response.json()
    s1903 = pd.DataFrame(data[1:] <br> columns = data[0])
else:
    print(f"Failed to retrieve data: {response.status_code}")
# s1903.head()

In [None]:
##### Educational Attainment
url = https://api.census.gov/data/2021/acs/acs5/subject
params = {
    "get" : "group(S1501)" <br>
    "ucgid" : "pseudo(0400000US08$1400000)"
}
response = requests.get(url <br> params)
if response.status_code == 200:
    data = response.json()
    s1501 = pd.DataFrame(data[1:] <br> columns = data[0])
else:
    print(f"Failed to retrieve data: {response.status_code}")
# s1501.head()

In [None]:
##### Households
url = https://api.census.gov/data/2021/acs/acs5/profile
params = {
    get : group(DP02) <br>
    ucgid : pseudo(0400000US08$1400000)
}
response = requests.get(url <br> params)
if response.status_code == 200:
    data = response.json()
    dp02 = pd.DataFrame(data[1:] <br> columns = data[0])
else:
    print(f"Failed to retrieve data: {response.status_code}")
# dp02.head()

In [None]:
##### Poverty Level
url = https://api.census.gov/data/2021/acs/acs5
params = {
    get : group(B17001) <br>
    ucgid : pseudo(0400000US08$1400000)
}
response = requests.get(url <br> params)
if response.status_code == 200:
    data = response.json()
    b17001 = pd.DataFrame(data[1:] <br> columns = data[0])
else:
    print(f"Failed to retrieve data: {response.status_code}")
# b17001.head()

#### Clean up datasets as needed

##### Median Household Income

In [None]:
##### Median Income
s1903_table = s1903.loc[:  <br> [GEO_ID <br> S1903_C03_001E]]
## Replace + and  <br> with empty string
s1903_table[S1903_C03_001E] = (s1903_table[S1903_C03_001E].str.replace(r[\+ <br>] <br>  <br> regex=True))
## Covert column to numeric
s1903_table[S1903_C03_001E] = pd.to_numeric(s1903_table[S1903_C03_001E] <br> errors=coerce)
# s1903_table.describe() # used to confirm max house value returns as 250000
# s1903_table.dtypes    # used to confirm column types

##### Educational Attainment

In [None]:
##### Educational Attainment
s1501_table = s1501.loc[:  <br> [GEO_ID <br> S1501_C01_001E <br> S1501_C01_002E <br> S1501_C01_003E <br> S1501_C01_004E <br> S1501_C01_005E <br> S1501_C01_006E <br> S1501_C01_007E <br> S1501_C01_008E <br> S1501_C01_009E <br> S1501_C01_010E <br> S1501_C01_011E <br> S1501_C01_012E <br> S1501_C01_013E <br> S1501_C01_014E <br> S1501_C01_015E]]

# Merge into the following groups:
# S1501_C01_001E - Total (18-24)
# S1501_C01_006E - Total (25+)
s1501_table[001E_006E] = s1501_table[[S1501_C01_001E <br> S1501_C01_006E]].apply(pd.to_numeric).sum(axis=1)

# S1501_C01_002E - Less than high school (18-24)
# S1501_C01_007E - Less than 9th (25+)
s1501_table[002E_007E] = s1501_table[[S1501_C01_002E <br> S1501_C01_007E]].apply(pd.to_numeric).sum(axis=1)

# S1501_C01_008E - 9th to 12th <br> no diploma (25+)
s1501_table[008E] = s1501_table[S1501_C01_008E].apply(pd.to_numeric)

# S1501_C01_003E - High school graduate (18-24)
# S1501_C01_009E - High school graduate (25+)
s1501_table[003E_009E] = s1501_table[[S1501_C01_003E <br> S1501_C01_009E]].apply(pd.to_numeric).sum(axis=1)

# S1501_C01_004E - Some college or associates degree (18-24)
# S1501_C01_010E - Some college <br> no degree (25+)
# S1501_C01_011E - Associates Degree (25+)
s1501_table[004E_010E_011E] = s1501_table[[S1501_C01_004E <br> S1501_C01_010E <br> S1501_C01_011E]].apply(pd.to_numeric).sum(axis=1)

# S1501_C01_005E - Bachelors degree or higher (18-24)
# S1501_C01_012E - Bachelors degree (25+)
s1501_table[005E_012E] = s1501_table[[S1501_C01_005E <br> S1501_C01_012E]].apply(pd.to_numeric).sum(axis=1)

# S1501_C01_013E - graduate or professional degree (25+)
s1501_table[013E] = s1501_table[S1501_C01_013E].apply(pd.to_numeric)

s1501_table = s1501_table[[GEO_ID <br> 001E_006E <br> 002E_007E <br> 008E <br> 003E_009E <br> 004E_010E_011E <br> 005E_012E <br> 013E]]
# s1501_table.describe()
# s1501_table.dtypes    # used to confirm column types


##### Household

In [None]:
##### Household - Total <br> Married-couple <br> Co-habitating couple <br> Male Householder no spouse <br> Female householder no spouse
dp02_table = dp02.loc[:  <br> [GEO_ID <br> DP02_0001E <br> DP02_0002E <br> DP02_0004E <br> DP02_0006E <br> DP02_0010E]]
## Covert column to numberic
dp02_table[[DP02_0001E <br> DP02_0002E <br> DP02_0004E <br> DP02_0006E <br> DP02_0010E]] = dp02_table[[DP02_0001E <br> DP02_0002E <br> DP02_0004E <br> DP02_0006E <br> DP02_0010E]].apply(pd.to_numeric <br> errors=coerce)
# dp02_table.describe()
# dp02_table.dtypes # used to confirm column types

In [None]:
##### Household2 - Male Total <br> Female Total <br> Never married <br> now marred except separated <br> separated <br> widowed <br> divorced
dp02_table2 = dp02.loc[:  <br> [GEO_ID <br> DP02_0025E <br> DP02_0031E <br> DP02_0026E <br> DP02_0027E <br> DP02_0028E <br> DP02_0029E <br> DP02_0030E <br> DP02_0032E <br> DP02_0033E <br> DP02_0034E <br> DP02_0035E <br> DP02_0036E]]
## Covert column to numberic
dp02_table2[[DP02_0026E <br> DP02_0027E <br> DP02_0028E <br> DP02_0029E <br> DP02_0030E <br> DP02_0032E <br> DP02_0033E <br> DP02_0034E <br> DP02_0035E <br> DP02_0036E]] = dp02_table2[[DP02_0026E <br> DP02_0027E <br> DP02_0028E <br> DP02_0029E <br> DP02_0030E <br> DP02_0032E <br> DP02_0033E <br> DP02_0034E <br> DP02_0035E <br> DP02_0036E]].apply(pd.to_numeric <br> errors=coerce)
# dp02_table2.describe()
# dp02_table2.dtypes # used to confirm column types

In [None]:
##### Household3 - Total population <br> Native born <br> foreign born
dp02_table3 = dp02.loc[:  <br> [GEO_ID <br> DP02_0088E <br> DP02_0089E <br> DP02_0094E]]
## Covert column to numberic
dp02_table3[[DP02_0088E <br> DP02_0089E <br> DP02_0094E]] = dp02_table3[[DP02_0088E <br> DP02_0089E <br> DP02_0094E]].apply(pd.to_numeric <br> errors=coerce)
# dp02_table3.describe()
# dp02_table3.dtypes # used to confirm column types

In [None]:
##### Household4 - Civilian Pop over 18 <br> Civilian veterans
dp02_table4 = dp02.loc[:  <br> [GEO_ID <br> DP02_0069E <br> DP02_0070E]]
## Covert column to numberic
dp02_table4[[DP02_0069E <br> DP02_0070E]] = dp02_table4[[DP02_0069E <br> DP02_0070E]].apply(pd.to_numeric <br> errors=coerce)
# dp02_table4.describe()
# dp02_table4.dtypes # used to confirm column types

##### Poverty

In [None]:
##### Poverty - below poverty level <br> total
b17001_table = b17001.loc[: <br> [GEO_ID <br> B17001_001E  <br> B17001_002E]]
b17001_table[[B17001_001E  <br> B17001_002E]] = b17001_table[[B17001_001E  <br> B17001_002E]].apply(pd.to_numeric <br> errors = coerce)
# b17001_table.describe()
# b17001_table.dtypes # used to confirm column types

#### Calculate all variables and add to table - Use GEO_ID as Tract ID

##### Median Household Income

In [None]:
## Income (S1903_C03_001E) > median income (s1903_medIncome) - create flag where 1 = True (income >= to than median) <br> 0 = False (income < than median))
s1903_medIncome = np.median(s1903_table[S1903_C03_001E])

#Binary
s1903_table[householdIncomeAboveMedian] = (s1903_table[S1903_C03_001E] >= s1903_medIncome).astype(int)
print(f"Median Household Income ($): {s1903_medIncome}")

#Low/Medium/High Bins 
# s1903_table[holdholdIncomeBins] = pd.qcut(s1903_table[S1903_C03_001E] <br> q =3 <br> labels = [Low <br> Medium <br> High])
# s1903_table.head()

#Very Low/Low/Medium/High/Very High bins
s1903_table[householdIncomeBins] = pd.qcut(s1903_table[S1903_C03_001E] <br> q =5 <br> labels = [Very Low <br>Low <br> Medium <br> High <br> Very High])
s1903_table.head()

Median Household Income ($): 77756.0


Unnamed: 0,GEO_ID,S1903_C03_001E,householdIncomeAboveMedian,householdIncomeBins
0,1400000US08001007801,37702,0,Very Low
1,1400000US08001007802,46096,0,Very Low
2,1400000US08001007900,52952,0,Very Low
3,1400000US08001008000,60447,0,Low
4,1400000US08001008100,51034,0,Very Low


##### Education Attainment

In [None]:
# 001E_006E - Total
# 002E_007E - Less than high school
# 008E - High school <br> no diploma (25+)
# 003E_009E - High school graduate 
# 004E_010E_011E - Some college <br> no degree or Associates Degree
# 005E_012E - Bachelors degree or higher
# 013E - Graduate or professional degree

# Percents
s1501_table[lessThanHS_P] = round((s1501_table[002E_007E] / s1501_table[001E_006E])*100 <br> 2)
s1501_table[HSNoDiploma_P] = round((s1501_table[008E] / s1501_table[001E_006E])*100 <br> 2)
s1501_table[HSGrad_P] = round((s1501_table[003E_009E] / s1501_table[001E_006E])*100 <br> 2)
s1501_table[SomeCollegeNoDeg_P] = round((s1501_table[004E_010E_011E] / s1501_table[001E_006E])*100 <br> 2)
s1501_table[BachOrHigher_P] = round((s1501_table[005E_012E] / s1501_table[001E_006E])*100 <br> 2)
s1501_table[GradOrProf_P] = round((s1501_table[013E] / s1501_table[001E_006E])*100 <br> 2)

# No HS vs HS Grad Percent
s1501_table[noHSDegree_P] = round(((s1501_table[002E_007E] + s1501_table[008E]) / s1501_table[001E_006E])*100 <br> 2)
s1501_table[hasHSDegree_P] = round(((s1501_table[003E_009E] + s1501_table[004E_010E_011E] + s1501_table[005E_012E] + s1501_table[013E]) / s1501_table[001E_006E])*100 <br> 2) 
s1501_table.head()

# Very low/Low/Medium/High/Very High on hasHSDegree_P
s1501_table[hsDegreeBins] = pd.qcut(s1501_table[hasHSDegree_P] <br> q =5 <br> labels = [Very Low <br>Low <br> Medium <br> High <br> Very High])
s1501_table.head()


Unnamed: 0,GEO_ID,001E_006E,002E_007E,008E,003E_009E,004E_010E_011E,005E_012E,013E,lessThanHS_P,HSNoDiploma_P,HSGrad_P,SomeCollegeNoDeg_P,BachOrHigher_P,GradOrProf_P,noHSDegree_P,hasHSDegree_P,hsDegreeBins
0,1400000US08001007801,2586,724,392,619,675,132,44,28.0,15.16,23.94,26.1,5.1,1.7,43.16,56.84,Very Low
1,1400000US08001007802,3296,813,253,1190,717,217,106,24.67,7.68,36.1,21.75,6.58,3.22,32.34,67.66,Very Low
2,1400000US08001007900,4021,683,493,1316,963,324,242,16.99,12.26,32.73,23.95,8.06,6.02,29.25,70.75,Very Low
3,1400000US08001008000,4114,603,262,1390,1078,459,322,14.66,6.37,33.79,26.2,11.16,7.83,21.03,78.97,Very Low
4,1400000US08001008100,1445,77,31,175,259,548,355,5.33,2.15,12.11,17.92,37.92,24.57,7.47,92.53,Low


##### Household

In [None]:
# Household1
# DP02_0001E - Total
# DP02_0002E - Married-couple households
# DP02_0004E - Cohabiting couple households
# DP02_0006E - Male householder <br> no spouse
# DP02_0010E - Female householder <br> no spouse

# Percents 
dp02_table[married_P] = round((dp02_table[DP02_0002E] / dp02_table[DP02_0001E])*100 <br> 2)   #Based on distribution within the geo_id <br> not within the Married set. (row)
dp02_table[cohabiting_P] = round((dp02_table[DP02_0004E] / dp02_table[DP02_0001E])*100 <br> 2)    #Based on distribution within the geo_id <br> not within the cohabiting set. 
dp02_table[maleHouseholder_P] = round((dp02_table[DP02_0006E] / dp02_table[DP02_0001E])*100 <br> 2)   #Based on distribution within the geo_id <br> not within the Male set. 
dp02_table[femaleHouseholder_P] = round((dp02_table[DP02_0010E] / dp02_table[DP02_0001E])*100 <br> 2) #Based on distribution within the geo_id <br> not within the Female set. 

# Rank (1 = lowest occurance <br> 4 = highest occurance in that row)
colsToRank = [DP02_0002E <br> DP02_0004E <br> DP02_0006E <br> DP02_0010E]
newName = [marriedRank <br> cohabitingRank <br> maleHouseholderRank <br> femaleHouseholderRank]
dp02_table[newName] = dp02_table[colsToRank].rank(axis=1 <br> method=dense)

#Very Low/Low/Medium/High/Very High Bins based on each column
dp02_table[marriedBins] = pd.qcut(dp02_table[married_P] <br> q=5 <br> labels = [Very Low <br>Low <br> Medium <br> High <br> Very High]) #Based on the distribution within Married set (column)
dp02_table[cohabitingBins] = pd.qcut(dp02_table[cohabiting_P] <br> q=5 <br> labels = [Very Low <br>Low <br> Medium <br> High <br> Very High]) #Based on the distribution within Cohabiting set and not set values
dp02_table[maleHouseholderBins] = pd.qcut(dp02_table[maleHouseholder_P] <br> q=5 <br> labels = [Very Low <br>Low <br> Medium <br> High <br> Very High]) #Based on the distribution within Male set and not set values
dp02_table[femaleHouseholderBins] = pd.qcut(dp02_table[femaleHouseholder_P] <br> q=5 <br> labels = [Very Low <br>Low <br> Medium <br> High <br> Very High]) #Based on the distribution Female set and not set values

dp02_table.head()

Unnamed: 0,GEO_ID,DP02_0001E,DP02_0002E,DP02_0004E,DP02_0006E,DP02_0010E,married_P,cohabiting_P,maleHouseholder_P,femaleHouseholder_P,marriedRank,cohabitingRank,maleHouseholderRank,femaleHouseholderRank,marriedBins,cohabitingBins,maleHouseholderBins,femaleHouseholderBins
0,1400000US08001007801,1285,336,143,387,419,26.15,11.13,30.12,32.61,2.0,1.0,3.0,4.0,Very Low,Very High,Very High,Very High
1,1400000US08001007802,1468,481,181,495,311,32.77,12.33,33.72,21.19,3.0,1.0,4.0,2.0,Very Low,Very High,Very High,Medium
2,1400000US08001007900,2131,734,250,703,444,34.44,11.73,32.99,20.84,4.0,1.0,3.0,2.0,Very Low,Very High,Very High,Low
3,1400000US08001008000,1735,712,114,315,594,41.04,6.57,18.16,34.24,4.0,1.0,2.0,3.0,Low,Medium,Medium,Very High
4,1400000US08001008100,716,182,91,213,230,25.42,12.71,29.75,32.12,2.0,1.0,3.0,4.0,Very Low,Very High,Very High,Very High


In [None]:
# Household2
#           Never married   married     separated   widowed     divorced    Total
# Male      DP02_0026E	    DP02_0027E	DP02_0028E	DP02_0029E	DP02_0030E  DP02_0025E
# Female    DP02_0032E	    DP02_0033E	DP02_0034E	DP02_0035E	DP02_0036E  DP02_0031E

# Percents
total = dp02_table2[DP02_0025E].astype(int) + dp02_table2[DP02_0031E].astype(int)
dp02_table2[neverMarried_P] = round(((dp02_table2[DP02_0026E] + dp02_table2[DP02_0032E]) / total)*100 <br> 2) #Based on distribution within geo_id
dp02_table2[nowMarried_P] = round(((dp02_table2[DP02_0027E] + dp02_table2[DP02_0033E]) / total)*100 <br> 2) #Based on distribution within geo_id
dp02_table2[separated_P] = round(((dp02_table2[DP02_0028E] + dp02_table2[DP02_0034E]) / total)*100 <br> 2) #Based on distribution within geo_id
dp02_table2[widowed_P] = round(((dp02_table2[DP02_0029E] + dp02_table2[DP02_0035E]) / total)*100 <br> 2) #Based on distribution within geo_id
dp02_table2[divorced_P] = round(((dp02_table2[DP02_0030E] + dp02_table2[DP02_0036E]) / total)*100 <br> 2) #Based on distribution within geo_id

# Binary
dp02_medianNeverMarried = np.median((dp02_table2[DP02_0026E] + dp02_table2[DP02_0032E]))
dp02_table2[belowNeverMarriedMedian] = ((dp02_table2[DP02_0026E] + dp02_table2[DP02_0032E]) <= dp02_medianNeverMarried).astype(int)
dp02_table2[aboveNeverMarriedMedian] = ((dp02_table2[DP02_0026E] + dp02_table2[DP02_0032E]) >= dp02_medianNeverMarried).astype(int)
print(fMedian Never Married: {dp02_medianNeverMarried})

## Potentially use 5 bins (Very low <br> low <br> medium <br> high <br> very high) on all?
dp02_table2[neverMarriedBins] = pd.qcut(dp02_table2[neverMarried_P] <br> q=5 <br> labels = [Very Low <br>Low <br> Medium <br> High <br> Very High]) #Based on the distribution within Never set (column)
dp02_table2[nowMarriedBins] = pd.qcut(dp02_table2[nowMarried_P] <br> q=5 <br> labels = [Very Low <br>Low <br> Medium <br> High <br> Very High]) #Based on the distribution within Now Married set (column)
dp02_table2[separatedBins] = pd.qcut(dp02_table2[separated_P] <br> q=5 <br> labels = [Very Low <br>Low <br> Medium <br> High <br> Very High]) #Based on the distribution within Separated set (column)
dp02_table2[widowedBins] = pd.qcut(dp02_table2[widowed_P] <br> q=5 <br> labels = [Very Low <br>Low <br> Medium <br> High <br> Very High]) #Based on the distribution within Widowed set (column)
dp02_table2[divorcedBins] = pd.qcut(dp02_table2[divorced_P] <br> q=5 <br> labels = [Very Low <br>Low <br> Medium <br> High <br> Very High]) #Based on the distribution within divorced set (column)

dp02_table2.head()

Median Never Married: 955.0


Unnamed: 0,GEO_ID,DP02_0025E,DP02_0031E,DP02_0026E,DP02_0027E,DP02_0028E,DP02_0029E,DP02_0030E,DP02_0032E,DP02_0033E,...,separated_P,widowed_P,divorced_P,belowNeverMarriedMedian,aboveNeverMarriedMedian,neverMarriedBins,nowMarriedBins,separatedBins,widowedBins,divorcedBins
0,1400000US08001007801,1412,1405,771,399,84,0,158,558,403,...,8.8,4.97,10.58,0,1,Very High,Very Low,Very High,High,Medium
1,1400000US08001007802,1786,1604,950,524,0,66,246,716,563,...,0.53,3.69,14.57,0,1,Very High,Very Low,Low,Medium,High
2,1400000US08001007900,2235,2115,982,879,98,77,199,806,743,...,4.87,5.1,11.63,0,1,High,Very Low,Very High,High,Medium
3,1400000US08001008000,2329,2056,1056,1081,30,14,148,616,977,...,1.32,2.65,10.97,0,1,High,Low,High,Low,Medium
4,1400000US08001008100,776,680,378,247,6,33,112,368,231,...,1.51,4.46,9.96,1,0,Very High,Very Low,High,Medium,Low


In [None]:
# Household3
# Total population - DP02_0088E
# Native Born - DP02_0089E
# Foreign Born - DP02_0094E

# Percents
dp02_table3[nativeBorn_P] = round((dp02_table3[DP02_0089E] / dp02_table3[DP02_0088E])*100 <br> 2)
dp02_table3[foreignBorn_P] = round((dp02_table3[DP02_0094E] / dp02_table3[DP02_0088E])*100 <br> 2)

# Binary
dp02_nativeBornMedian = np.median(dp02_table3[DP02_0089E])
dp02_table3[belowNativeBornMedian] = ((dp02_table3[DP02_0089E]) <= dp02_nativeBornMedian).astype(int)
dp02_table3[aboveNativeBornMedian] = ((dp02_table3[DP02_0089E]) >= dp02_nativeBornMedian).astype(int)
print(fMedian Native Born: {dp02_nativeBornMedian})

dp02_foreignBornMedian = np.median(dp02_table3[DP02_0094E])
dp02_table3[belowForeignBornMedian] = ((dp02_table3[DP02_0094E]) <= dp02_foreignBornMedian).astype(int)
dp02_table3[aboveForeignBornMedian] = ((dp02_table3[DP02_0094E]) >= dp02_foreignBornMedian).astype(int)
print(fMedian Foreign Born: {dp02_foreignBornMedian})

## Potentially use 5 bins (Very low <br> low <br> medium <br> high <br> very high) on each
dp02_table3[nativeBornBins] = pd.qcut(dp02_table3[nativeBorn_P] <br> q=5 <br> labels = [Very Low <br>Low <br> Medium <br> High <br> Very High]) #Based on the distribution within native born % set (column)
dp02_table3[foreignBornBins] = pd.qcut(dp02_table3[foreignBorn_P] <br> q=5 <br> labels = [Very Low <br>Low <br> Medium <br> High <br> Very High]) #Based on the distribution within foreign born % set (column)

dp02_table3.head()

Median Native Born: 3358.0
Median Foreign Born: 253.0


Unnamed: 0,GEO_ID,DP02_0088E,DP02_0089E,DP02_0094E,nativeBorn_P,foreignBorn_P,belowNativeBornMedian,aboveNativeBornMedian,belowForeignBornMedian,aboveForeignBornMedian,nativeBornBins,foreignBornBins
0,1400000US08001007801,4027,2022,2005,50.21,49.79,1,0,0,1,Very Low,Very High
1,1400000US08001007802,4598,2764,1834,60.11,39.89,1,0,0,1,Very Low,Very High
2,1400000US08001007900,5749,3781,1968,65.77,34.23,0,1,0,1,Very Low,Very High
3,1400000US08001008000,5515,4450,1065,80.69,19.31,0,1,0,1,Very Low,Very High
4,1400000US08001008100,1538,1205,333,78.35,21.65,1,0,0,1,Very Low,Very High


In [None]:
# Household4
# Civilian Population 18 and older - DP02_0069E
# Civilan Veterans - DP02_0070E

# Percents
dp02_table4[civilVet_P] =  round((dp02_table4[DP02_0070E] / dp02_table4[DP02_0069E])*100 <br> 2)

# Binary
dp02_vetMedian = np.median(dp02_table4[DP02_0070E])
dp02_table4[belowCivilVetMedian] = ((dp02_table4[DP02_0070E]) <= dp02_vetMedian).astype(int)
dp02_table4[aboveCivilVetMedian] = ((dp02_table4[DP02_0070E]) >= dp02_vetMedian).astype(int)
print(fMedian Civilian Veterans: {dp02_vetMedian})

## Potentially use 5 bins (Very low <br> low <br> medium <br> high <br> very high) on vets
dp02_table4[civilVetBins] =  pd.qcut(dp02_table4[civilVet_P] <br> q=5 <br> labels = [Very Low <br>Low <br> Medium <br> High <br> Very High]) #Based on the distribution within civilVet % set (column)

dp02_table4.head()

Median Civilian Veterans: 209.0


Unnamed: 0,GEO_ID,DP02_0069E,DP02_0070E,civilVet_P,belowCivilVetMedian,aboveCivilVetMedian,civilVetBins
0,1400000US08001007801,2586,88,3.4,1,0,Very Low
1,1400000US08001007802,3296,235,7.13,0,1,Medium
2,1400000US08001007900,4012,149,3.71,1,0,Very Low
3,1400000US08001008000,4114,407,9.89,0,1,High
4,1400000US08001008100,1431,169,11.81,1,0,Very High


##### Poverty

In [None]:
# B17001_001E - Total
# B17001_002E - Number of people whose income in past 12 months below poverty level

# Percents
b17001_table[belowPoverty_P] = round((b17001_table[B17001_002E] / b17001_table[B17001_001E])*100 <br> 2)  #Based on distribution within the geo_id (row)
b17001_table[atOrAbovePoverty_P] = 100 - b17001_table[belowPoverty_P]   #Based on distribution within the geo_id (row)

# Binary
b17001_medianBelowPoverty = np.median(b17001_table[B17001_002E])
b17001_table[belowPovertyMedian] = (b17001_table[B17001_002E] <= b17001_medianBelowPoverty).astype(int)
b17001_table[abovePovertyMedian] = (b17001_table[B17001_002E] >= b17001_medianBelowPoverty).astype(int)
print(f"Median Number of People Below Poverty: {b17001_medianBelowPoverty}")

# #Bins
b17001_table[belowPovertyBins] = pd.qcut(b17001_table[belowPoverty_P] <br> q=5 <br> labels = [Very Low <br> Low <br> Medium <br>  High <br> Very High]) #Based on the distribution within belowPoverty % set (column)
b17001_table[atOrAbovePovertyBins] = pd.qcut(b17001_table[atOrAbovePoverty_P] <br> q=5 <br> labels = [Very Low <br> Low <br> Medium <br>  High <br> Very High]) #Based on the distribution within atOrAbovePoverty % set (column)

b17001_table.head()

Median Number of People Below Poverty: 274.0


Unnamed: 0,GEO_ID,B17001_001E,B17001_002E,belowPoverty_P,atOrAbovePoverty_P,belowPovertyMedian,abovePovertyMedian,belowPovertyBins,atOrAbovePovertyBins
0,1400000US08001007801,4001,1217,30.42,69.58,0,1,Very High,Very Low
1,1400000US08001007802,4593,1485,32.33,67.67,0,1,Very High,Very Low
2,1400000US08001007900,5749,1039,18.07,81.93,0,1,Very High,Very Low
3,1400000US08001008000,5495,515,9.37,90.63,0,1,Medium,Medium
4,1400000US08001008100,1394,360,25.82,74.18,0,1,Very High,Very Low


#### Merge Tables

In [None]:
## Columns
print(f{s1903_table.columns}\n)
print(f{s1501_table.columns}\n)
print(f{dp02_table.columns}\n)
print(f{dp02_table2.columns}\n)
print(f{dp02_table3.columns}\n)
print(f{dp02_table4.columns}\n)
print(f{b17001_table.columns}\n)

Index(['GEO_ID', 'S1903_C03_001E', 'householdIncomeAboveMedian',
       'householdIncomeBins'],
      dtype='object')

Index(['GEO_ID', '001E_006E', '002E_007E', '008E', '003E_009E',
       '004E_010E_011E', '005E_012E', '013E', 'lessThanHS_P', 'HSNoDiploma_P',
       'HSGrad_P', 'SomeCollegeNoDeg_P', 'BachOrHigher_P', 'GradOrProf_P',
       'noHSDegree_P', 'hasHSDegree_P', 'hsDegreeBins'],
      dtype='object')

Index(['GEO_ID', 'DP02_0001E', 'DP02_0002E', 'DP02_0004E', 'DP02_0006E',
       'DP02_0010E', 'married_P', 'cohabiting_P', 'maleHouseholder_P',
       'femaleHouseholder_P', 'marriedRank', 'cohabitingRank',
       'maleHouseholderRank', 'femaleHouseholderRank', 'marriedBins',
       'cohabitingBins', 'maleHouseholderBins', 'femaleHouseholderBins'],
      dtype='object')

Index(['GEO_ID', 'DP02_0025E', 'DP02_0031E', 'DP02_0026E', 'DP02_0027E',
       'DP02_0028E', 'DP02_0029E', 'DP02_0030E', 'DP02_0032E', 'DP02_0033E',
       'DP02_0034E', 'DP02_0035E', 'DP02_0036E', 'neverMarr

In [None]:
## Length
print(f{len(s1903_table)}\n)
print(f{len(s1501_table)}\n)
print(f{len(dp02_table)}\n)
print(f{len(dp02_table2)}\n)
print(f{len(dp02_table3)}\n)
print(f{len(dp02_table4)}\n)
print(f{len(b17001_table)}\n)

1447

1447

1447

1447

1447

1447

1447



In [None]:
# # Table Subsets - OG Percents and Groupings
# s1903_subset = s1903_table[[GEO_ID <br> householdIncomeAboveMedian]]
# s1501_subset = s1501_table[[GEO_ID <br> lessThanHS_P <br> HSNoDiploma_P <br> HSGrad_P <br> SomeCollegeNoDeg_P <br> BachOrHigher_P <br> GradOrProf_P <br> noHSDegree_P <br> hasHSDegree_P]]
# dp02_subset = dp02_table[[GEO_ID <br> married_P <br> cohabiting_P <br> maleHouseholder_P <br> femaleHouseholder_P <br>  marriedRank <br> cohabitingRank <br> maleHouseholderRank <br> femaleHouseholderRank]]
# dp02_subset2 = dp02_table2[[GEO_ID <br> neverMarried_P <br> nowMarried_P <br> separated_P <br> widowed_P <br> divorced_P <br> belowNeverMarriedMedian <br> aboveNeverMarriedMedian]]
# dp02_subset3 = dp02_table3[[GEO_ID <br> nativeBorn_P <br> foreignBorn_P <br> belowNativeBornMedian <br> aboveNativeBornMedian <br> belowForeignBornMedian <br> aboveForeignBornMedian]]
# dp02_subset4 = dp02_table4[[GEO_ID <br> civilVet_P <br> belowCivilVetMedian <br> aboveCivilVetMedian]]
# b17001_subset = b17001_table[[GEO_ID <br> belowPoverty_P <br> atOrAbovePoverty_P <br> belowPovertyMedian <br> abovePovertyMedian]]

# # Table Subsets - Percents and Bins
# s1903_subset = s1903_table[[GEO_ID <br> householdIncomeAboveMedian <br> holdholdIncomeBins]]
# s1501_subset = s1501_table[[GEO_ID <br> lessThanHS_P <br> HSNoDiploma_P <br> HSGrad_P <br> SomeCollegeNoDeg_P <br> BachOrHigher_P <br> GradOrProf_P <br> noHSDegree_P <br> hasHSDegree_P <br> hsDegreeBins]]
# dp02_subset = dp02_table[[GEO_ID <br> married_P <br> cohabiting_P <br> maleHouseholder_P <br> femaleHouseholder_P <br>  marriedRank <br> cohabitingRank <br> maleHouseholderRank <br> femaleHouseholderRank <br> marriedBins <br>
#        cohabitingBins <br> maleHouseholderBins <br> femaleHouseholderBins]]
# dp02_subset2 = dp02_table2[[GEO_ID <br> neverMarried_P <br> nowMarried_P <br> separated_P <br> widowed_P <br> divorced_P <br> belowNeverMarriedMedian <br> aboveNeverMarriedMedian <br> neverMarriedBins <br> nowMarriedBins <br> separatedBins <br> widowedBins <br>
#        divorcedBins]]
# dp02_subset3 = dp02_table3[[GEO_ID <br> nativeBorn_P <br> foreignBorn_P <br> belowNativeBornMedian <br> aboveNativeBornMedian <br> belowForeignBornMedian <br> aboveForeignBornMedian <br> nativeBornBins <br>
#        foreignBornBins]]
# dp02_subset4 = dp02_table4[[GEO_ID <br> civilVet_P <br> belowCivilVetMedian <br> aboveCivilVetMedian <br> civilVetBins]]
# b17001_subset = b17001_table[[GEO_ID <br> belowPoverty_P <br> atOrAbovePoverty_P <br> belowPovertyMedian <br> abovePovertyMedian <br> belowPovertyBins <br> atOrAbovePovertyBins]]


# Table Subsets - Bins
s1903_subset = s1903_table[[GEO_ID <br> householdIncomeBins]]
s1501_subset = s1501_table[[GEO_ID <br> hsDegreeBins]]
dp02_subset = dp02_table[[GEO_ID <br> marriedBins <br> cohabitingBins <br> maleHouseholderBins <br> femaleHouseholderBins]]
dp02_subset2 = dp02_table2[[GEO_ID <br> neverMarriedBins <br> nowMarriedBins <br> separatedBins <br> widowedBins <br> divorcedBins]]
dp02_subset3 = dp02_table3[[GEO_ID <br> nativeBornBins <br> foreignBornBins]]
dp02_subset4 = dp02_table4[[GEO_ID <br> civilVetBins]]
b17001_subset = b17001_table[[GEO_ID <br> belowPovertyBins <br> atOrAbovePovertyBins]]


In [None]:
# MERGE TABLES
from functools import reduce

tables = [s1903_subset <br> s1501_subset <br> dp02_subset <br> dp02_subset2 <br> dp02_subset3 <br> dp02_subset4 <br> b17001_subset]
acsTable = reduce(lambda left <br> right: pd.merge(left <br> right <br> on="GEO_ID" <br> how=inner) <br> tables)

acsTable.columns

Index(['GEO_ID', 'householdIncomeBins', 'hsDegreeBins', 'marriedBins',
       'cohabitingBins', 'maleHouseholderBins', 'femaleHouseholderBins',
       'neverMarriedBins', 'nowMarriedBins', 'separatedBins', 'widowedBins',
       'divorcedBins', 'nativeBornBins', 'foreignBornBins', 'civilVetBins',
       'belowPovertyBins', 'atOrAbovePovertyBins'],
      dtype='object')

In [30]:
acsTable

Unnamed: 0,GEO_ID,householdIncomeBins,hsDegreeBins,marriedBins,cohabitingBins,maleHouseholderBins,femaleHouseholderBins,neverMarriedBins,nowMarriedBins,separatedBins,widowedBins,divorcedBins,nativeBornBins,foreignBornBins,civilVetBins,belowPovertyBins,atOrAbovePovertyBins
0,1400000US08001007801,Very Low,Very Low,Very Low,Very High,Very High,Very High,Very High,Very Low,Very High,High,Medium,Very Low,Very High,Very Low,Very High,Very Low
1,1400000US08001007802,Very Low,Very Low,Very Low,Very High,Very High,Medium,Very High,Very Low,Low,Medium,High,Very Low,Very High,Medium,Very High,Very Low
2,1400000US08001007900,Very Low,Very Low,Very Low,Very High,Very High,Low,High,Very Low,Very High,High,Medium,Very Low,Very High,Very Low,Very High,Very Low
3,1400000US08001008000,Low,Very Low,Low,Medium,Medium,Very High,High,Low,High,Low,Medium,Very Low,Very High,High,Medium,Medium
4,1400000US08001008100,Very Low,Low,Very Low,Very High,Very High,Very High,Very High,Very Low,High,Medium,Low,Very Low,Very High,Very High,Very High,Very Low
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1442,1400000US08123002300,Medium,Very Low,High,Low,Low,Low,Low,High,Medium,High,Low,High,Low,Medium,Medium,Medium
1443,1400000US08123002501,Medium,Medium,Very High,Very Low,Very Low,Low,Low,High,Medium,Medium,Low,High,Low,High,Very High,Very Low
1444,1400000US08123002502,Medium,Very Low,High,Medium,Low,Low,Low,High,High,Medium,Low,Low,High,Medium,High,Low
1445,1400000US08125963100,Low,Very Low,High,Medium,Low,Medium,Low,High,Medium,Very High,Low,Low,High,Medium,High,Low


Write DF to TXT File

In [None]:
np.savetxt(ACSData-BinsOnly.txt <br> acsTable.values <br> fmt=%s <br> header=\t.join(acsTable.columns) <br> comments=)

| Output Column Header   | ACS Columns Utilized |
| -------- | ------- |
| GEO_ID   | GEO_ID |
| householdIncomeBins   | S1903_C03_001E |
| hsDegreeBins   | S1501_C01_001E (total 18-24) <br> S1501_C01_003E (High school graduate (18-24)) <br> S1501_C01_004E (Some college or associates degree (18-24)) <br> S1501_C01_005E (Bachelor's degree or higher (18-24)) <br> S1501_C01_006E (total 25+) <br> S1501_C01_008E (9th to 12th, no diploma (25+)) <br> S1501_C01_009E (High school graduate (25+)) <br> S1501_C01_010E (Some college, no degree (25+)) <br> S1501_C01_011E (Associates Degree (25+)) <br> S1501_C01_012E (Bachelor's degree (25+)) <br> S1501_C01_013E (graduate or professional degree (25+)) |
| marriedBins  | DP02_0001 (total) <br> DP02_0002E (married couple) |
| cohabitingBins   | DP02_0001 (total) <br> DP02_0004E (cohabiting couple)  |
| maleHouseholderBins   | DP02_0001 (total) <br> DP02_0006E (male householder no spouse)  |
| femaleHouseholderBins   | DP02_0001 (total) <br> DP02_0010E (female householder no spouse)  |
| neverMarriedBins | DP02_0025E (Male Total) <br> DP02_0031E (Female Total) <br> DP02_0026E (Male Never Married) <br> DP02_0032E (Female Never Married) |
| nowMarriedBins   | DP02_0025E (Male Total) <br> DP02_0031E (Female Total) <br> DP02_0027E (Male Now Married) <br> DP02_0033E (Female Now Married) |
| separatedBins   | DP02_0025E (Male Total) <br> DP02_0031E (Female Total) <br> DP02_0028E (Male Separated) <br> DP02_0034E (Female Separated) |
| widowedBins   | DP02_0025E (Male Total) <br> DP02_0031E (Female Total) <br> DP02_0029E (Male Widowed) <br> DP02_0035E (Female Widowed) |
| divorcedBins  | DP02_0025E (Male Total) <br> DP02_0031E (Female Total) <br> DP02_0030E (Male Divorced) <br> DP02_0036E (Female Divorced) |
| nativeBornBins   | DP02_0088E (total) <br> DP02_0089E (Native Born)|
| foreignBornBins   | DP02_0088E (total) <br> DP02_0094E (Foreign Born) |
| civilVetBins   | DP02_0069E (Civilian Population 18+) <br> DP02_0070E (Civilan Veterans) |
| belowPovertyBins  | B17001_001E (total) <br> B17001_002E (Number of people whose income below poverty level in last 12months) |
| atOrAbovePovertyBins  | B17001_001E (total) <br> B17001_002E (Number of people whose income below poverty level in last 12months) |
