# American Community Survey

This example uses the Public Use Microdata Sample provided by the Census Bureau. You can read about the [data documentation](https://www.census.gov/programs-surveys/acs/microdata/documentation.html).

In [3]:
from pathlib import Path

import geopandas as gpd
import numpy as np
import pandas as pd
import shapely
from ipywidgets import FloatRangeSlider, jsdlink
from palettable.colorbrewer.diverging import BrBG_10

from lonboard import Map, ScatterplotLayer
from lonboard.colormap import apply_continuous_cmap
from lonboard.controls import MultiRangeSlider
from lonboard.layer_extension import DataFilterExtension

In [20]:
!wget https://www2.census.gov/geo/tiger/TIGER2020/PUMA/tl_2020_01_puma10.zip
!unzip tl_2020_01_puma10.zip

--2024-02-13 19:40:29--  https://www2.census.gov/geo/tiger/TIGER2020/PUMA/tl_2020_01_puma10.zip
Resolving www2.census.gov (www2.census.gov)... 104.77.246.208
Connecting to www2.census.gov (www2.census.gov)|104.77.246.208|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: unspecified [application/zip]
Saving to: ‘tl_2020_01_puma10.zip’

tl_2020_01_puma10.z     [   <=>              ]   1.21M  2.41MB/s    in 0.5s    

2024-02-13 19:40:30 (2.41 MB/s) - ‘tl_2020_01_puma10.zip’ saved [1274421]

Archive:  tl_2020_01_puma10.zip
 extracting: tl_2020_01_puma10.cpg   
  inflating: tl_2020_01_puma10.dbf   
  inflating: tl_2020_01_puma10.prj   
  inflating: tl_2020_01_puma10.shp   
  inflating: tl_2020_01_puma10.shp.ea.iso.xml  
  inflating: tl_2020_01_puma10.shp.iso.xml  
  inflating: tl_2020_01_puma10.shx   


In [22]:
from pathlib import Path
import pandas as pd

# URL to the specified data
url = "https://data.payless.health/census.gov%2Famerican_community_survey%2Fpublic_use_microdata%2F2022_acs_pums_individual_people_united_states_first_tranche.parquet"
local_path = Path("2022_acs_pums_individual_people_united_states_first_tranche.parquet")

# Check if the local file exists to avoid re-downloading
columns_of_interest = ["Public use microdata area code (PUMA) based on 2020 Census definition (areas with population of 100,000 or more, use with ST for unique code)", 
                       "Total person's income (use ADJINC to adjust to constant dollars)"]
if not local_path.exists():
    # Download the parquet file
    df = pd.read_parquet(url, columns=columns_of_interest)

    df.rename(columns={"Public use microdata area code (PUMA) based on 2020 Census definition (areas with population of 100,000 or more, use with ST for unique code)": "puma",
                        "Total person's income (use ADJINC to adjust to constant dollars)": "income"}, inplace=True)
    # Saving to local for future use
    df.to_parquet(local_path)
else:
    # Load from the local file
    df = pd.read_parquet(local_path)


shapefile_path = "tl_2020_01_puma10.shp"
puma_shapes = gpd.read_file(shapefile_path)
puma_shapes = puma_shapes.rename(columns={'PUMACE10': "puma"})
puma_shapes['centroid'] = shapely.centroid(puma_shapes['geometry'])

merged_df = df.merge(puma_shapes[["puma", "centroid"]], on="puma",
                      how='left')
# filter for nan values
idx = ~merged_df['centroid'].isna() & ~merged_df['income'].isna()
df['income'] = df["income"].astype(float)
gdf = gpd.GeoDataFrame(df["income"][idx], geometry=merged_df["centroid"][idx])

gdf.head()

Unnamed: 0,income,geometry
1,12500.0,POINT (-86.22214 32.34156)
3,8600.0,POINT (-85.82603 33.77143)
7,15600.0,POINT (-88.07089 30.68071)
11,10100.0,POINT (-87.09821 34.13982)
13,0.0,POINT (-86.03476 34.04525)


In [23]:
filter_extension = DataFilterExtension(filter_size=3)

In [24]:
# If you want to define specific bounds for normalization:
min_bound = 5000  # Example minimum income
max_bound = 100000  # Example maximum income

normalized_income = (gdf["income"].values.astype(float) - min_bound) / (max_bound - min_bound)

fill_color = apply_continuous_cmap(normalized_income, BrBG_10)

radius = normalized_income * 200  # Adjust 200 as per your visualization needs

In [25]:
filter_values = np.column_stack(
    [gdf["income"]]
)
initial_filter_range = [5_000, 100_000]
    # [5_000, 100_000],


In [26]:
layer = ScatterplotLayer.from_geopandas(
    gdf,
    extensions=[filter_extension],
    get_fill_color=fill_color,
    get_radius=radius,
    get_filter_value=filter_values,
    filter_range=initial_filter_range,
    radius_units="meters",
    radius_min_pixels=0.1,
)
m = Map(layer)
m

  df[col_name] = pd.to_numeric(
  df[col_name] = pd.to_numeric(


TraitError: The 'get_filter_value' trait of a ScatterplotLayer instance expected filter_size (3) to match 2nd dimension of numpy array, not the ndarray array([[12500.],
       [ 8600.],
       [15600.],
       ...,
       [96800.],
       [61200.],
       [18580.]], dtype=float32).