In [1]:
from bokeh.io import output_notebook
from bokeh.models import FuncTickFormatter
from bokeh.plotting import figure, output_file, show
import pandas as pd

# bokeh: configure for notebook
# https://docs.bokeh.org/en/latest/docs/user_guide/jupyter.html#userguide-jupyter-notebook
output_notebook()

# load data
df = pd.read_csv(
    "https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-counties.csv"
)
df.date = df.date.apply(pd.to_datetime)
df.head()

Unnamed: 0,date,county,state,fips,cases,deaths
0,2020-01-21,Snohomish,Washington,53061.0,1,0
1,2020-01-22,Snohomish,Washington,53061.0,1,0
2,2020-01-23,Snohomish,Washington,53061.0,1,0
3,2020-01-24,Cook,Illinois,17031.0,1,0
4,2020-01-24,Snohomish,Washington,53061.0,1,0


In [2]:
def make_figure(df, title):
    # make the plot
    p = figure(
        title=title, x_axis_label="Date", y_axis_label="# of Cases", plot_width=880,
    )
    p.line(df.index, df[("cases", "sum")], legend_label=f"Cases", line_width=2)
    p.line(
        df.index,
        df[("deaths", "sum")],
        legend_label=f"Deaths",
        line_width=2,
        line_color="red",
    )
    label_dict = {str(d): d.strftime("%d-%m-%Y") for d in df.index}
    p.xaxis.formatter = FuncTickFormatter(code="""
        let date = new Date(tick);
        return `${date.getDate()}-${date.getMonth()}-${date.getFullYear()}`
    """)
    show(p)


def plot_state_curve(df, state: str):
    # get the state data
    state_df = df[df["state"] == state]
    # arrange by day
    day_df = (
        state_df.drop(columns=["fips", "state", "county"])
        .groupby(by="date")
        .agg(["sum"])
    )
    day_df.columns = {("cases", "sum"): "cases_sum", ("deaths", "sum"): "deaths_sum"}
    make_figure(day_df, f"{state} Cases")


def plot_state_counties_curve(df, state: str, counties: list):
    # get the state and county data
    counties_df = df[(df.state == state) & (df.county.isin(counties))]
    # arrange by day
    day_df = (
        counties_df.drop(columns=["fips", "state", "county"])
        .groupby(by="date")
        .agg(["sum"])
    )
    day_df.columns = {("cases", "sum"): "cases_sum", ("deaths", "sum"): "deaths_sum"}
    make_figure(day_df, f'{state} Cases for counties {",".join(counties)}')


def plot_states_and_counties_curve(df, state_county_map: dict, name):
    """for metropolitan area. i.e. chicagoland"""
    states = state_county_map.keys()
    # trim dataset to relevant states, counties
    state_counties_df = df[
        df.apply(
            lambda x: x["state"] in states
            and x["county"] in state_county_map[x["state"]],
            axis=1,
        )
    ]
    # arrange by day
    day_df = (
        state_counties_df.drop(columns=["fips", "state", "county"])
        .groupby(by="date")
        .agg(["sum"])
    )
    day_df.columns = {("cases", "sum"): "cases_sum", ("deaths", "sum"): "deaths_sum"}
    make_figure(day_df, name)

In [3]:
plot_state_curve(df, "Indiana")

In [4]:
plot_state_counties_curve(df, 'Illinois', ['Cook'])

In [5]:
# https://en.wikipedia.org/wiki/Chicago_metropolitan_area
chicago_metropolitan = dict(
    Illinois=[
        "Cook",
        "DeKalb",
        "DuPage",
        "Grundy",
        "Kankakee",
        "Kane",
        "Kendall",
        "McHenry",
        "Will",
    ],
    Indiana=["Jasper", "Lake", "Newton", "Porter",],
    Wisconsin=["Kenosha"],
)

plot_states_and_counties_curve(
    df, chicago_metropolitan, "Cases for the Chicago Metropolitan Area"
)

In [6]:
# Denver Metropolitan Area
# https://en.wikipedia.org/wiki/Denver_metropolitan_area
# https://en.wikipedia.org/wiki/Chicago_metropolitan_area
chicago_metropolitan = dict(
    Colorado=[
        "Denver",
        "Arapahoe",
        "Jefferson",
        "Adams",
        "Douglas",
        "Broomfield",
        "Elbert",
        "Park",
        "Clear Creek",
        "Gilpin",
    ],
)

plot_states_and_counties_curve(
    df, chicago_metropolitan, "Cases for the Denver Metropolitan Area"
)