In [16]:
!pip install datacommons datacommons_pandas --upgrade --quiet

import datacommons, datacommons_pandas as dc
import plotly.express as px, numpy as np, pandas as pd, datapane as dp

city_dcids = dc.get_property_values(["CDC500_City"], "member", limit=500)["CDC500_City"]

cdc500_df = dc.build_multivariate_dataframe(
    city_dcids,
    [
        "Percent_Person_Obesity",  # Prevalence of obesity from CDC
        "Median_Income_Person",
        "Median_Age_Person",
        "UnemploymentRate_Person",  # Unemployment rate from BLS
        "Count_Person_BelowPovertyLevelInThePast12Months",  # Persons living below the poverty line from Census
        "Count_Person",  # Total population from Census
    ],
)

age_vs_income = px.scatter(cdc500_df, x="Median_Age_Person", y="Median_Income_Person")

age_vs_income.show()

<IPython.core.display.Javascript object>

In [22]:
def add_parent_col(df, col, level):
    """
    Add a new column, where each value is the dcid of the parent level
    """
    df[f"{level}_geoId"] = (
        df[col]
        .map(dc.get_property_values(df[col], "containedInPlace", value_type=level))
        .str[0]
    )


def add_name_col(df, col, level):
    """
    Add a new column, where each value is the name for the place dcid in the index.
    """
    df[level] = df[col].map(dc.get_property_values(df[col], "name")).str[0]


cdc500_df = cdc500_df.reset_index()

# Get the parent state
add_parent_col(cdc500_df, "place", "State")

# Get the proper names for states and cities
add_name_col(cdc500_df, "State_geoId", "State")
add_name_col(cdc500_df, "place", "City")

age_vs_income_v2 = px.scatter(
    cdc500_df.dropna(),
    x="Median_Age_Person",
    y="Median_Income_Person",
    hover_name="City",
    color="State",
    size="Count_Person",
)

age_vs_income_v2.update_layout(yaxis_range=[20000, 65000])
age_vs_income_v2.update_layout(xaxis_range=[25, 45])

age_vs_income_v2.show()

<IPython.core.display.Javascript object>

In [36]:
tooltip_text = state_data[["State", "Median_Income_Person"]].apply(
    lambda s, p: f"{s}: ${p}", axis=1
)

tooltip_text

TypeError: <lambda>() missing 1 required positional argument: 'p'

<IPython.core.display.Javascript object>

In [33]:
# !pip install folium --upgrade --quiet
import folium as folium
import requests
import json

# Get GeoJSON data for US States
url = "https://raw.githubusercontent.com/python-visualization/folium/master/examples/data/us-states.json"
state_geo = json.loads(requests.get(url).text)

# Group data by state
state_data = cdc500_df.groupby("State").median().reset_index()

# Drop Washington DC
washington_dc = state_data.index[
    state_data["State"] == "District of Columbia"
].tolist()[0]
state_data = state_data.drop(washington_dc).reset_index()

# prepare the customised tooltip text
tooltip_text = (
    state_data[["State", "Median_Income_Person"]].astype(str).apply(": $".join, axis=1)
)

# # Append a tooltip column with customised text
for idx, x in enumerate(tooltip_text):
    state_geo["features"][idx]["properties"]["tooltip1"] = tooltip_text[idx]

m = folium.Map(location=[48, -102], zoom_start=3)

c = folium.Choropleth(
    geo_data=state_geo,
    name="choropleth",
    data=state_data,
    columns=["State", "Median_Income_Person"],
    key_on="feature.properties.name",
    fill_color="YlGn",
    fill_opacity=0.7,
    line_opacity=0.2,
    legend_name="Median Income",
    highlight=True,
    overlay=True,
    tooltip="feature.properties.tooltip1",
).add_to(m)

c.geojson.add_child(folium.features.GeoJsonTooltip(["tooltip1"], labels=False))

folium.LayerControl().add_to(m)

m

<IPython.core.display.Javascript object>

In [12]:
g = dp.TextReport(
        dp.Plot(age_vs_income),
        dp.Plot(age_vs_income_v2),
        dp.Plot(m),
        dp.DataTable(data),
).upload(name="Explore the API powering Google Search")

TextReport assets successfully uploaded - you can edit and format [here](https://datapane.com/u/johnmicahreid/reports/exploring-the-dataset-powering-google-search/edit/), and view the final report [here](https://datapane.com/u/johnmicahreid/reports/exploring-the-dataset-powering-google-search/)

In [None]:
states_selected = cities_selected \
    .groupby("State") \
    .median() \
    .reset_index()


import plotly.graph_objects as go

fig = go.Figure(
    data=[
        go.Bar(
            name='Median Age', 
            x=states_selected.State, 
            y=states_selected.Median_Age_Person,
            yaxis='y', 
            offsetgroup=1),
        go.Bar(
            name='Median Income', 
            x=states_selected.State, 
            y=states_selected.Median_Income_Person,
            yaxis='y2', 
            offsetgroup=2)
    ], layout={
            'yaxis': {'title': 'SF Zoo axis'},
            'yaxis2': {'title': 'LA Zoo axis', 'overlaying': 'y', 'side': 'right'}
        }
)

# Change the bar mode
fig.update_layout(barmode='group')
fig.show()

# age_vs_income_state = px.bar(
#     states_selected,
#     x = "Median_Age_Person",
#     y = "Median_Income_Person",
#     color = "State"
# )

# age_vs_income_state.show()