---
### NOTE:

To test this requires a `pipenv install` to catch the added dependency, but it also needs one extra step — you'll need to run `playwright install chromium` to pull in the copy of Chrome it needs. This has been added to the GitHub action.

---

## Configuration
_Initial steps to get the notebook ready to play nice with our repository. Do not delete this section._

Code formatting with [black](https://pypi.org/project/nb-black/).

In [1]:
%load_ext lab_black

Add our `utils` directory to the system's `$PATH` so we can import Python files from sibling directories.

In [2]:
import os
import pathlib

In [3]:
this_dir = pathlib.Path(os.path.abspath(""))

In [4]:
data_dir = this_dir / "data"

In [5]:
import pytz
import glob
import pandas as pd
from json import loads
from slugify import slugify
from datetime import datetime
from playwright.async_api import async_playwright

## Download

In [6]:
async with async_playwright() as playwright:
    browser = await playwright.chromium.launch()
    context = await browser.new_context(accept_downloads=True)

    # Open new page
    page = await context.new_page()

    # Go to https://dashboard.cchealth.org/extensions/COVIDDashboard/Overview.html
    await page.goto("https://dashboard.cchealth.org/extensions/COVID/Overview.html")

    last_updated_selector = "#DataLastReloaded"
    await page.wait_for_selector(last_updated_selector)
    await page.wait_for_function(
        f"document.querySelector('{last_updated_selector}').textContent"
    )
    last_updated = await page.text_content(last_updated_selector)  # .split(": ")[1]
    print(last_updated)
    await page.wait_for_timeout(5000)

    async with page.expect_download() as download_info:
        async with page.expect_popup() as popup_info:
            await page.click("#export2")
        page1 = await popup_info.value
    download = await download_info.value

    path = await download.path()
    df = pd.read_excel(path, engine="openpyxl")

    # Close page
    await page1.close()
    # Close context
    await context.close()
    # Close browser
    await browser.close()

Error: net::ERR_CERT_DATE_INVALID at https://dashboard.cchealth.org/extensions/COVID/Overview.html
=========================== logs ===========================
navigating to "https://dashboard.cchealth.org/extensions/COVID/Overview.html", waiting until "load"
============================================================

## Parse

In [10]:
df = df.drop(
    [
        "Cases Last 14 Days",
        "Population",
        "Cases Per 100,000",
        "Cases Last 14 Days Per 100,000",
    ],
    axis=1,
)

In [11]:
df = df[~df["Location"].isin(["Totals"])]

In [12]:
headers = {"Location": "area", "Positive Case Count": "confirmed_cases"}

In [13]:
df = df.rename(columns=headers)
df.insert(0, "county", "Contra Costa")

Set date

In [14]:
last_updated_split = last_updated.split(": ")[1]

In [16]:
tz = pytz.timezone("America/Los_Angeles")

In [17]:
today = datetime.now(tz).date()

In [18]:
slug = "contra-costa"

In [19]:
df["county_date"] = (
    datetime.strptime(last_updated_split, "%m/%d/%Y, %I:%M %p")
    .astimezone(pytz.timezone("America/Los_Angeles"))
    .date()
    .isoformat()
)

## Vet

In [22]:
try:
    assert not len(df) > 31
except AssertionError:
    raise AssertionError("Contra Costa's scraper has extra rows")

In [23]:
try:
    assert not len(df) < 31
except AssertionError:
    raise AssertionError("Contra Costa County's scraper is missing rows")

## Export

In [25]:
df.to_csv(data_dir / slug / f"{today}.csv", index=False)

In [26]:
csv_list = [
    i
    for i in glob.glob(str(data_dir / slug / "*.csv"))
    if not str(i).endswith("timeseries.csv")
]

In [27]:
df_list = []
for csv in csv_list:
    if "manual" in csv:
        df = pd.read_csv(csv, parse_dates=["date"])
    else:
        file_date = csv.split("/")[-1].replace(".csv", "")
        df = pd.read_csv(csv, parse_dates=["county_date"])
        df["date"] = file_date
    df_list.append(df)

In [28]:
df = pd.concat(df_list).sort_values(["date", "area"])

In [29]:
df.to_csv(data_dir / slug / "timeseries.csv", index=False)