# American Community Survey

This example uses the Public Use Microdata Sample provided by the Census Bureau. You can read about the [data documentation](https://www.census.gov/programs-surveys/acs/microdata/documentation.html) or view the [data transformation](https://github.com/jaanli/exploring_american_community_survey_data/blob/main/american_community_survey/models/public_use_microdata_sample/generated/enum_types_mapped_renamed/housing_units_united_states_first_tranche_enum_mapped_renamed.sql).

In [2]:
from pathlib import Path

import geopandas as gpd
import numpy as np
import pandas as pd
import shapely
from ipywidgets import FloatRangeSlider, jsdlink
from palettable.colorbrewer.diverging import BrBG_10

from lonboard import Map, ScatterplotLayer
from lonboard.colormap import apply_continuous_cmap
from lonboard.controls import MultiRangeSlider
from lonboard.layer_extension import DataFilterExtension

In [3]:
!wget -O 2020_census_microdata_tiger_shapefile.zip https://data.payless.health/census.gov%2Famerican_community_survey%2F2020_census_microdata_tiger_shapefile.zip
!unzip 2020_census_microdata_tiger_shapefile.zip

--2024-02-14 07:52:51--  https://data.payless.health/census.gov%2Famerican_community_survey%2F2020_census_microdata_tiger_shapefile.zip
Resolving data.payless.health (data.payless.health)... 18.164.116.95, 18.164.116.105, 18.164.116.102, ...
Connecting to data.payless.health (data.payless.health)|18.164.116.95|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 68488320 (65M) [application/zip]
Saving to: ‘2020_census_microdata_tiger_shapefile.zip’


2024-02-14 07:52:52 (48.8 MB/s) - ‘2020_census_microdata_tiger_shapefile.zip’ saved [68488320/68488320]

Archive:  2020_census_microdata_tiger_shapefile.zip
 extracting: 2020_census_microdata_tiger_shapefile.cpg  
  inflating: 2020_census_microdata_tiger_shapefile.dbf  
  inflating: 2020_census_microdata_tiger_shapefile.shp  
  inflating: 2020_census_microdata_tiger_shapefile.shx  


In [21]:
from pathlib import Path
import pandas as pd

url_first = "https://data.payless.health/census.gov%2Famerican_community_survey%2Fpublic_use_microdata%2F2022_acs_pums_individual_people_united_states_first_tranche.parquet"
url_second = "https://data.payless.health/census.gov%2Famerican_community_survey%2Fpublic_use_microdata%2F2022_acs_pums_individual_people_united_states_second_tranche.parquet"
local_path = Path("2022_acs_pums_individual_people_united_states.parquet")

# Check if the local file exists to avoid re-downloading
columns_of_interest = ["Public use microdata area code (PUMA) based on 2020 Census definition (areas with population of 100,000 or more, use with ST for unique code)", 
                       "Total person's income (use ADJINC to adjust to constant dollars)"]
if not local_path.exists():
    df_first = pd.read_parquet(url_first, columns=columns_of_interest)
    df_second = pd.read_parquet(url_second, columns=columns_of_interest)
    df = pd.concat([df_first, df_second])

    df.rename(columns={"Public use microdata area code (PUMA) based on 2020 Census definition (areas with population of 100,000 or more, use with ST for unique code)": "puma",
                        "Total person's income (use ADJINC to adjust to constant dollars)": "income"}, inplace=True)
    df.to_parquet(local_path)
else:
    df = pd.read_parquet(local_path)


shapefile_path = "2020_census_microdata_tiger_shapefile.shp"
puma_shapes = gpd.read_file(shapefile_path)
puma_shapes = puma_shapes.rename(columns={'PUMACE10': "puma"})
puma_shapes['centroid'] = shapely.centroid(puma_shapes['geometry'])

merged_df = df.merge(puma_shapes[["puma", "centroid"]], on="puma",
                      how='left')
# filter for nan values
idx = ~merged_df['centroid'].isna() & ~merged_df['income'].isna()
df['income'] = df["income"].astype(float)
gdf = gpd.GeoDataFrame(df["income"][idx], geometry=merged_df["centroid"][idx])

gdf.head()

Unnamed: 0,income,geometry
0,18800.0,POINT (-75.47555 40.59609)
1,12500.0,POINT (-86.22214 32.34156)
2,16400.0,POINT (-93.72120 33.64495)
3,8600.0,POINT (-96.12896 42.56285)
4,5000.0,POINT (-85.39690 40.22753)


In [22]:
filter_extension = DataFilterExtension(filter_size=1)

In [23]:
# If you want to define specific bounds for normalization:
min_bound = 5000  # Example minimum income
max_bound = 100000  # Example maximum income

normalized_income = (gdf["income"].values.astype(float) - min_bound) / (max_bound - min_bound)

fill_color = apply_continuous_cmap(normalized_income, BrBG_10)

radius = normalized_income * 200  # Adjust 200 as per your visualization needs

In [24]:
filter_values = gdf["income"]

initial_filter_range = [5_000, 100_000]


In [25]:
layer = ScatterplotLayer.from_geopandas(
    gdf,
    extensions=[filter_extension],
    get_fill_color=fill_color,
    get_radius=radius,
    get_filter_value=filter_values,
    filter_range=initial_filter_range,
    radius_units="meters",
    radius_min_pixels=1,
)
m = Map(layer)
m

  df[col_name] = pd.to_numeric(
  df[col_name] = pd.to_numeric(


Map(layers=[ScatterplotLayer(extensions=[DataFilterExtension()], filter_range=[5000.0, 100000.0], get_fill_col…

In [26]:
income_slider = FloatRangeSlider(
    value=initial_filter_range,
    min=0,
    max=100_000,
    step=1,
    description="Income: ",
)


multi_slider = MultiRangeSlider([income_slider])
multi_slider

MultiRangeSlider(children=(FloatRangeSlider(value=(5000.0, 100000.0), description='Income: ', max=100000.0, st…

In [27]:
_ = jsdlink((income_slider, "value"), (layer, "filter_range"))