In [58]:
!pip install datacommons datacommons_pandas --upgrade --quiet

import datacommons, datacommons_pandas as dc
import plotly.express as px
import numpy as np
import pandas as pd
import datapane as dp

city_dcids = dc.get_property_values(["CDC500_City"], "member", limit=500)["CDC500_City"]

cdc500_df = dc.build_multivariate_dataframe(
    city_dcids,
    [
        "Percent_Person_Obesity",  # Prevalence of obesity from CDC
        "Median_Income_Person",
        "Median_Age_Person",
        "UnemploymentRate_Person",  # Unemployment rate from BLS
        "Count_Person_BelowPovertyLevelInThePast12Months",  # Persons living below the poverty line from Census
        "Count_Person",  # Total population from Census
    ],
)

age_vs_income = px.scatter(cdc500_df, x="Median_Age_Person", y="Median_Income_Person")

age_vs_income.show()

In [59]:
def add_parent_col(df, col, level):
    """
    Add a new column, where each value is the dcid of the parent level
    """
    df[f"{level}_geoId"] = (
        df[col]
        .map(dc.get_property_values(df[col], "containedInPlace", value_type=level))
        .str[0]
    )


def add_name_col(df, col, level):
    """
    Add a new column, where each value is the name for the place dcid in the index.
    """
    df[level] = df[col].map(dc.get_property_values(df[col], "name")).str[0]


cdc500_df = cdc500_df.reset_index()

# Get the parent state
add_parent_col(cdc500_df, "place", "State")

# Get the proper names for states and cities
add_name_col(cdc500_df, "State_geoId", "State")
add_name_col(cdc500_df, "place", "City")

age_vs_income_v2 = px.scatter(
    cdc500_df.dropna(),
    x="Median_Age_Person",
    y="Median_Income_Person",
    hover_name="City",
    color="State",
    size="Count_Person",
)

age_vs_income_v2.update_layout(yaxis_range=[20000, 65000])
age_vs_income_v2.update_layout(xaxis_range=[25, 45])

age_vs_income_v2.show()

In [63]:
# !pip install folium --upgrade --quiet
import folium as folium
import requests
import json

# Get GeoJSON data for US States
url = "https://raw.githubusercontent.com/python-visualization/folium/master/examples/data/us-states.json"
state_geo = json.loads(requests.get(url).text)

# Fetch data on state level
us_states = dc.get_places_in(["country/USA"], "State")

elderly_wealth = dc.build_multivariate_dataframe(
    us_states["country/USA"],
    [
        "Count_Household_HouseholderAge65OrMoreYears_IncomeOf200000OrMoreUSDollar", 
        "Count_Household_HouseholderAge65OrMoreYears",
    ],
).reset_index()

elderly_wealth["ratio_65_above_200k_or_more_income"] = rich_older_people["Count_Household_HouseholderAge65OrMoreYears_IncomeOf200000OrMoreUSDollar"] / rich_older_people["Count_Household_HouseholderAge65OrMoreYears"] * 100

add_name_col(elderly_wealth, "place", "State")

# Drop Washington DC and Puerto Rico to match GeoJSON data
elderly_wealth = elderly_wealth[~elderly_wealth["State"].isin(["District of Columbia", "Puerto Rico"])]

# Prepare the customised tooltip text
tooltip_text = [ f"{x}: {y:.2f}%" for x, y in zip(elderly_wealth.State, elderly_wealth.ratio_65_above_200k_or_more_income)] 
for idx, x in enumerate(tooltip_text):
    state_geo["features"][idx]["properties"]["tooltip1"] = tooltip_text[idx]

m = folium.Map(location=[48, -102], zoom_start=3)

c = folium.Choropleth(
    geo_data=state_geo,
    name="choropleth",
    data=elderly_wealth,
    columns=["State", "ratio_65_above_200k_or_more_income"],
    key_on="feature.properties.name",
    fill_color="YlGn",
    fill_opacity=0.7,
    line_opacity=0.2,
    legend_name="Median Income",
    highlight=True,
    overlay=True,
    tooltip="feature.properties.tooltip1",
).add_to(m)

c.geojson.add_child(folium.features.GeoJsonTooltip(["tooltip1"], labels=False))

folium.LayerControl().add_to(m)

m