In [1]:
import pandas as pd
import altair as alt

# American Community Survey demographic data

Source: NYC Department of City Planning

## Ethnicity

### Load ACS data exported from geodatabase (all boroughs) and calculate non-white percentage

In [2]:
df_acs_dem = (
    pd.read_csv(
        "data/raw/acs-demographics.csv",
        header=0,
        names=[
            "borough_code",
            "borough_name",
            "nta_code",
            "nta_name",
            "total_pop",
            "hispanic_no",
            "hispanic_pc",
            "white_no",
            "white_pc",
            "black_no",
            "black_pc",
            "asian_no",
            "asian_pc",
        ],
    )
    .assign(non_white_pc=lambda df: 100 - df["white_pc"])
    .sort_values("nta_code")
    .reset_index(drop=True)
)

df_acs_dem.info()
df_acs_dem.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 195 entries, 0 to 194
Data columns (total 14 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   borough_code  195 non-null    int64  
 1   borough_name  195 non-null    object 
 2   nta_code      195 non-null    object 
 3   nta_name      195 non-null    object 
 4   total_pop     195 non-null    int64  
 5   hispanic_no   195 non-null    int64  
 6   hispanic_pc   193 non-null    float64
 7   white_no      195 non-null    int64  
 8   white_pc      193 non-null    float64
 9   black_no      195 non-null    int64  
 10  black_pc      192 non-null    float64
 11  asian_no      195 non-null    int64  
 12  asian_pc      193 non-null    float64
 13  non_white_pc  193 non-null    float64
dtypes: float64(5), int64(6), object(3)
memory usage: 21.5+ KB


Unnamed: 0,borough_code,borough_name,nta_code,nta_name,total_pop,hispanic_no,hispanic_pc,white_no,white_pc,black_no,black_pc,asian_no,asian_pc,non_white_pc
0,3,Brooklyn,BK09,Brooklyn Heights-Cobble Hill,24168,1770,7.3,17538,72.6,1400,5.8,2236,9.3,27.4
1,3,Brooklyn,BK17,Sheepshead Bay-Gerritsen Beach-Manhattan Beach,65631,6021,9.2,40943,62.4,4254,6.5,12973,19.8,37.6
2,3,Brooklyn,BK19,Brighton Beach,34431,3673,10.7,24524,71.2,234,0.7,4745,13.8,28.8
3,3,Brooklyn,BK21,Seagate-Coney Island,32259,8992,27.9,10240,31.7,9430,29.2,2808,8.7,68.3
4,3,Brooklyn,BK23,West Brighton,16774,660,3.9,15525,92.6,280,1.7,139,0.8,7.4


### Export processed data to `.csv`

In [3]:
df_acs_dem.to_csv("data/processed/acs-ethnicity.csv", index=False)

### Load pre-processed data (three boroughs; excludes parks)

In [4]:
df_ethnicity_proc = pd.read_csv("data/processed/acs-ethnicity.csv").query(
    "borough_name in ['Bronx', 'Brooklyn', 'Manhattan'] & nta_code not in ['BK99', 'BX98', 'BX99', 'MN99']"
)

df_ethnicity_proc.info()
df_ethnicity_proc.head()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 114 entries, 0 to 116
Data columns (total 14 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   borough_code  114 non-null    int64  
 1   borough_name  114 non-null    object 
 2   nta_code      114 non-null    object 
 3   nta_name      114 non-null    object 
 4   total_pop     114 non-null    int64  
 5   hispanic_no   114 non-null    int64  
 6   hispanic_pc   114 non-null    float64
 7   white_no      114 non-null    int64  
 8   white_pc      114 non-null    float64
 9   black_no      114 non-null    int64  
 10  black_pc      114 non-null    float64
 11  asian_no      114 non-null    int64  
 12  asian_pc      114 non-null    float64
 13  non_white_pc  114 non-null    float64
dtypes: float64(5), int64(6), object(3)
memory usage: 13.4+ KB


Unnamed: 0,borough_code,borough_name,nta_code,nta_name,total_pop,hispanic_no,hispanic_pc,white_no,white_pc,black_no,black_pc,asian_no,asian_pc,non_white_pc
0,3,Brooklyn,BK09,Brooklyn Heights-Cobble Hill,24168,1770,7.3,17538,72.6,1400,5.8,2236,9.3,27.4
1,3,Brooklyn,BK17,Sheepshead Bay-Gerritsen Beach-Manhattan Beach,65631,6021,9.2,40943,62.4,4254,6.5,12973,19.8,37.6
2,3,Brooklyn,BK19,Brighton Beach,34431,3673,10.7,24524,71.2,234,0.7,4745,13.8,28.8
3,3,Brooklyn,BK21,Seagate-Coney Island,32259,8992,27.9,10240,31.7,9430,29.2,2808,8.7,68.3
4,3,Brooklyn,BK23,West Brighton,16774,660,3.9,15525,92.6,280,1.7,139,0.8,7.4


### Join to adjusted intersections data

In [None]:
# Join

In [None]:
# Scatterplot

In [None]:
# Histogram

### Export ethnicities data to `.csv` for joining to NTA polygons in QGIS

In [None]:
# Write to CSV

## Household income

### Load ACS data exported from geodatabase (all boroughs)

In [5]:
df_acs_econ = (
    pd.read_csv(
        "data/raw/acs-economics.csv",
        header=0,
        names=[
            "borough_code",
            "borough_name",
            "nta_code",
            "nta_name",
            "hh_inc_u10k_no",
            "hh_inc_u10k_pc",
            "hh_inc_10_14k_no",
            "hh_inc_10_14k_pc",
            "hh_inc_15_24k_no",
            "hh_inc_15_24k_pc",
            "hh_inc_25_34k_no",
            "hh_inc_25_34k_pc",
            "hh_inc_35_49k_no",
            "hh_inc_35_49k_pc",
            "hh_inc_50_74k_no",
            "hh_inc_50_74k_pc",
            "hh_inc_75_99k_no",
            "hh_inc_75_99k_pc",
            "hh_inc_100_149k_no",
            "hh_inc_100_149k_pc",
            "hh_inc_150_199k_no",
            "hh_inc_150_199k_pc",
            "hh_inc_200k_plus_no",
            "hh_inc_200k_plus_pc",
            "hh_inc_median",
            "hh_inc_mean",
        ],
    )
    .sort_values("nta_code")
    .reset_index(drop=True)
)

df_acs_econ.info()
df_acs_econ.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 195 entries, 0 to 194
Data columns (total 26 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   borough_code         195 non-null    int64  
 1   borough_name         195 non-null    object 
 2   nta_code             195 non-null    object 
 3   nta_name             195 non-null    object 
 4   hh_inc_u10k_no       195 non-null    int64  
 5   hh_inc_u10k_pc       189 non-null    float64
 6   hh_inc_10_14k_no     195 non-null    int64  
 7   hh_inc_10_14k_pc     189 non-null    float64
 8   hh_inc_15_24k_no     195 non-null    int64  
 9   hh_inc_15_24k_pc     191 non-null    float64
 10  hh_inc_25_34k_no     195 non-null    int64  
 11  hh_inc_25_34k_pc     191 non-null    float64
 12  hh_inc_35_49k_no     195 non-null    int64  
 13  hh_inc_35_49k_pc     191 non-null    float64
 14  hh_inc_50_74k_no     195 non-null    int64  
 15  hh_inc_50_74k_pc     191 non-null    flo

Unnamed: 0,borough_code,borough_name,nta_code,nta_name,hh_inc_u10k_no,hh_inc_u10k_pc,hh_inc_10_14k_no,hh_inc_10_14k_pc,hh_inc_15_24k_no,hh_inc_15_24k_pc,...,hh_inc_75_99k_no,hh_inc_75_99k_pc,hh_inc_100_149k_no,hh_inc_100_149k_pc,hh_inc_150_199k_no,hh_inc_150_199k_pc,hh_inc_200k_plus_no,hh_inc_200k_plus_pc,hh_inc_median,hh_inc_mean
0,3,Brooklyn,BK09,Brooklyn Heights-Cobble Hill,402,3.5,296,2.6,446,3.9,...,1231,10.9,1781,15.7,1570,13.8,3634,32.1,135414,215829.0
1,3,Brooklyn,BK17,Sheepshead Bay-Gerritsen Beach-Manhattan Beach,1741,6.8,1976,7.7,2506,9.7,...,3027,11.8,3848,14.9,1967,7.6,1888,7.3,59577,84027.0
2,3,Brooklyn,BK19,Brighton Beach,1423,10.0,2287,16.0,1667,11.7,...,1165,8.2,1429,10.0,727,5.1,667,4.7,39578,61830.0
3,3,Brooklyn,BK21,Seagate-Coney Island,1665,14.5,1894,16.5,1855,16.2,...,668,5.8,1060,9.3,279,2.4,288,2.5,27819,49756.0
4,3,Brooklyn,BK23,West Brighton,596,7.1,1013,12.1,1157,13.8,...,1000,11.9,732,8.7,383,4.6,264,3.1,42907,61129.0


### Join to adjusted intersections data

In [None]:
# Join

In [None]:
# Scatterplot

In [None]:
# Histogram

### Export economics data to `.csv` for joining to NTA polygons in QGIS

In [6]:
df_acs_econ.to_csv("data/processed/acs-household-income.csv", index=False)