## Configuration
_Initial steps to get the notebook ready to play nice with our repository. Do not delete this section._

Code formatting with [black](https://pypi.org/project/nb-black/).

In [1]:
%load_ext lab_black

Add our `utils` directory to the system's `$PATH` so we can import Python files from sibling directories.

In [1]:
import os
import pathlib

In [2]:
this_dir = pathlib.Path(os.path.abspath(""))

In [3]:
data_dir = this_dir / "data"

In [24]:
import pytz
import glob
import requests
import pandas as pd
from bs4 import BeautifulSoup
import unicodedata
import regex as re
from datetime import datetime

## Download

Retrieve the page

In [5]:
url = "https://www.amadorgov.org/services/covid-19/-fsiteid-1"

In [6]:
page = requests.get(url)

## Parse

In [7]:
soup = BeautifulSoup(page.content, "html.parser")

Get content well

In [8]:
content = soup.find("div", id="widget_4_3778_3488")

Get timestamp

In [9]:
date_text = [x.parent for x in soup(text=re.compile(r"Current Status Updated "))][
    0
].text

In [10]:
date_text

'Current Status Updated September 4, 2021:'

In [11]:
date = date_text.replace("Current Status Updated ", "").replace(":", "")

In [12]:
latest_date = pd.to_datetime(date).date()

Get table

In [13]:
table = content.find("table")

In [14]:
tbody = soup.tbody

In [15]:
city_cell = tbody.select("tr")[1].select("td")[0]

In [16]:
cities = city_cell.find_all("p")

Parse the result

In [17]:
dict_list = []

In [18]:
for row in cities:
    city_text = unicodedata.normalize("NFKD", row.get_text())
    city_list = city_text.split(":")
    clean_list = [x.strip(" ") for x in city_list]
    d = dict(area=clean_list[0], confirmed_cases=clean_list[1])
    dict_list.append(d)

Convert to dataframe

In [19]:
df = pd.DataFrame(dict_list)
df.insert(0, "county", "Amador")
df["county_date"] = latest_date

In [20]:
df["area"] = df["area"].astype(str)

Mark the current date

In [22]:
tz = pytz.timezone("America/Los_Angeles")

In [25]:
today = datetime.now(tz).date()

In [27]:
slug = "amador"

## Vet

In [28]:
try:
    assert not len(df) > 7
except AssertionError:
    raise AssertionError("Amador's area scraper has extra rows")

In [29]:
try:
    assert not len(df) < 7
except AssertionError:
    raise AssertionError("Amador's area scraper is missing rows")

## Export

In [31]:
df.to_csv(data_dir / slug / f"{today}.csv", index=False)

## Combine

In [32]:
csv_list = [
    i for i in glob.glob(str(data_dir / slug / "*.csv")) if not str(i).endswith("timeseries.csv")
]

In [33]:
df_list = [pd.read_csv(p, parse_dates=["county_date"]) for p in csv_list]

In [34]:
df = pd.concat(df_list).sort_values(["county_date", "area"])

In [35]:
df.to_csv(os.path.join(data_dir / slug / "timeseries.csv"), index=False)