In [113]:
import camelot
import pandas as pd
import altair as alt

In [114]:
pdf = camelot.read_pdf("https://www.mwra.com/biobot/MWRAData20211230-data.pdf", pages="all")

In [154]:
# combine all pages
df = pd.concat([page.df.iloc[:, :9] for page in pdf])

# set header row
df.columns = df.iloc[0]
df = df.drop(0)
new_column_names = {name: name.replace("\n", "") for name in df.columns}
df = df.rename(columns=new_column_names)

# convert columns to proper types
df = df.rename(columns={"Sample Date": "date"})
df["date"] = pd.to_datetime(df["date"], errors="coerce")
for col in df.columns:
    if col != "date":
        df[col] = pd.to_numeric(df[col], errors="coerce")
df = df.dropna(thresh=1)
df.to_csv("covid-19-wastewater.csv", index=False)

In [170]:
def plot(df, low, high, daily, weekly_avg):
    alt.renderers.set_embed_options(theme='light')
    line = alt.Chart(df).mark_line().encode(
        x="date",
        y=weekly_avg,
    )
    dots = alt.Chart(df).mark_point(
        size=1,
    ).encode(
        x="date",
        y=daily,
    )
    return (dots + line).properties(width=700, height=300)

plot(
    df,
    low = "Northern Low Confidence Interval",
    high = "Northern High Confidence Interval",
    daily = "Northern (copies/mL)",
    weekly_avg = "Northern 7 day avg"
)

In [171]:
plot(
    df,
    low = "Southern Low Confidence Interval",
    high = "Southern High Confidence Interval",
    daily = "Southern (copies/mL)",
    weekly_avg = "Southern 7 day avg"
)