In [None]:
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots

In [None]:
raw = pd.read_csv("https://data.cdc.gov/api/views/2ew6-ywp6/rows.csv?accessType=DOWNLOAD")
counties = pd.read_csv("https://raw.githubusercontent.com/covid-projections/covid-data-model/main/data/misc/fips_population.csv")[["fips", "population"]]
counties["fips"] = counties["fips"].astype(str).str.zfill(5)
site_metadata = raw[raw["date_start"] == "2022-04-16"][["key_plot_id", "county_fips", "county_names", "population_served"]].drop_duplicates().dropna()

In [None]:
# How many sewersheds serve multiple counties (and for what counties)?
multiple_counties = site_metadata["county_fips"].str.contains(",")
print("Sewersheds serving multiple counties: ",len(site_metadata[multiple_counties].county_fips.unique()))
print(len(site_metadata["key_plot_id"].unique()))

multiple_counties_locations = site_metadata["county_fips"].str.split(",").explode().unique()
counties[counties["fips"].isin(multiple_counties_locations)].sort_values("population", ascending=False)
site_metadata["key_plot_id"].unique()


# Covid Concentration Data

In [None]:
# Wastewater
nwss = pd.read_csv("https://data.cdc.gov/api/views/g653-rqe2/rows.csv?accessType=DOWNLOAD")
normalized_nwss = nwss.merge(site_metadata, on="key_plot_id").rename(columns={"county_fips": "fips"})
normalized_nwss = normalized_nwss.merge(counties, on="fips", how="left")
nd = normalized_nwss.dropna()
nd["pcr_conc_smoothed"] = nd["pcr_conc_smoothed"] * nd["population_served"]
by_county = nd.groupby(["fips", "county_names", "date", "normalization", "population"]).sum().reset_index().set_index(["fips", "date"])

In [None]:
# Case data
api = pd.read_csv("https://api.covidactnow.org/v2/counties.timeseries.csv?apiKey=81d0e97ecec0406abf12c80d6cd8ec93")
api = api[api["date"] > "2022-01-01"]

In [None]:
# Combine datasets
cases: pd.DataFrame = api.loc[:,["fips", "date", "county", "state", "metrics.caseDensity"]]
cases["fips"] = cases["fips"].astype(str).str.zfill(5)
cases = cases.set_index(["fips", "date"])
comb = by_county.combine_first(cases)
comb["percent_coverage"] = (comb["population_served"] / comb["population"]) * 100
comb["pcr_smoothed_normalized"] = comb["pcr_conc_smoothed"] / comb["population_served"]

In [None]:
# # Sanity checks
md = nd.set_index(["fips", "date"])
chi = by_county.xs("29189", level="fips", drop_level=False).dropna()
chi
# chi.xs("2022-04-16", level="date").sum()["population_served"]
# comb[comb["fips"] =="29189,29099"]

# Charts

In [None]:
# Create graphs for largest 10 counties (and Manitowoc)
for fips in list(comb.sort_values("population", ascending=False).index.get_level_values("fips").unique()[:10]) + ["55071"]:
    # Get specific county data and metadata
    data = comb.xs(fips, level="fips")
    date_idx = data.index.get_level_values("date")
    county = data.iloc[0]["county"]
    state = data.iloc[0]["state"]
    coverage = round(max(data["percent_coverage"].dropna()),1)
    coverage_date = data[data["percent_coverage"] == max(data["percent_coverage"].dropna())].reset_index().iloc[-1]["date"]
    normalization_methods = data.dropna()["normalization"].unique()
    normalization_method = normalization_methods[0] if len(normalization_methods) == 1 else "combination of both"

    # Plot
    fig = make_subplots(specs=[[{"secondary_y": True}]])
    fig.add_trace(
        go.Scatter(x=date_idx, y=data["metrics.caseDensity"], name="cases per 100k"),
        secondary_y=False,
    )
    fig.add_trace(
        go.Scatter(x=date_idx, y=data["pcr_smoothed_normalized"].ffill(limit=1), name="(viral copies per person) / <br> (population served)"),
        secondary_y=True,
    )

    fig.update_layout(
        title_text=f"Case density and wastewater concentration vs time <br><b><sup>{county}, " 
        f"{state}</sup></b><br><sub>Maximum population coverage: {coverage}% (on {coverage_date}), "
        f"normalization method: {normalization_method}</sub></br>"
    )
    fig.update_xaxes(title_text="Date")
    fig.update_yaxes(title_text="Daily new cases (7-day avg.)", secondary_y=False)
    fig.update_yaxes(title_text="Wastewater concentration", secondary_y=True)
    fig.show()

In [None]:
# Get specific county data and metadata
# 48201 -- Dallas/Harris County

for fips in comb.sort_values("population", ascending=False).index.get_level_values("fips").unique()[:10]:
    data = comb.xs(fips, level="fips")
    date_idx = data.index.get_level_values("date")
    county = data.iloc[0]["county"]
    state = data.iloc[0]["state"]

    # Plot
    fig = make_subplots()
    fig.add_trace(
        go.Scatter(x=date_idx, y=data["percent_coverage"], name="cases per 100k"),
    )
    fig.update_layout(
        title_text=f"Percent of population covered by wastewater data vs time <br><b><sup>{county}, {state}" 
    )
    fig.update(layout_yaxis_range = [0,100])
    fig.update_xaxes(title_text="Date")
    fig.update_yaxes(title_text="% population covered")
    fig.show()