## Configuration
_Initial steps to get the notebook ready to play nice with our repository. Do not delete this section._

Code formatting with [black](https://pypi.org/project/nb-black/).

In [1]:
%load_ext lab_black

In [2]:
import os
import pathlib

In [3]:
this_dir = pathlib.Path(os.path.abspath(""))

In [4]:
data_dir = this_dir / "data"

In [5]:
import pytz
import glob
import requests
import pandas as pd
import json
from datetime import datetime

## Download

Retrieve the page

In [6]:
url = "https://services1.arcgis.com/Ko5rxt00spOfjMqj/arcgis/rest/services/Napa_Case_Regions/FeatureServer/0//query?where=1%3D1&objectIds=&time=&geometry=&geometryType=esriGeometryEnvelope&inSR=&spatialRel=esriSpatialRelIntersects&resultType=none&distance=0.0&units=esriSRUnit_Meter&returnGeodetic=false&outFields=Region%2CIncidentCount%2C+Death%2CEditDate&returnGeometry=true&returnCentroid=false&featureEncoding=esriDefault&multipatchOption=xyFootprint&maxAllowableOffset=&geometryPrecision=&outSR=&datumTransformation=&applyVCSProjection=false&returnIdsOnly=false&returnUniqueIdsOnly=false&returnCountOnly=false&returnExtentOnly=false&returnQueryGeometry=false&returnDistinctValues=false&cacheHint=false&orderByFields=&groupByFieldsForStatistics=&outStatistics=&having=&resultOffset=&resultRecordCount=&returnZ=false&returnM=false&returnExceededLimitFeatures=true&quantizationParameters=&sqlFormat=none&f=pjson&token="

In [7]:
r = requests.get(url)

In [8]:
data = r.json()

## Parse

In [9]:
dict_list = []

In [10]:
for item in data["features"]:
    d = dict(
        area=item["attributes"]["Region"],
        confirmed_cases=item["attributes"]["IncidentCount"],
    )
    dict_list.append(d)

KeyError: 'features'

In [11]:
df = pd.DataFrame(dict_list)

Change area names

In [12]:
clean_names = {
    "North Calistoga (Unincorporated)": "North Calistoga",
    "Between St Helena and Calistoga (Unincorporated)": "Between St Helena and Calistoga",
    "Pope Valley Area (Unincorporated)": "Pope Valley Area",
    "Between Yountville and St Helena (Unincorporated)": "Between Yountville and St Helena",
    "Chiles Valley Area (Unincorporated)": "Chiles Valley Area",
    "Lake Berryessa (Unincorporated)": "Lake Berryessa",
    "West Napa and Yountville Area (Unincorporated)": "West Napa and Yountville Area",
    "South Napa Area (Unincorporated)": "South Napa Area",
    "Northeast American Canyon Area (Unincorporated)": "Northeast American Canyon Area",
    "City of Calistoga": "Calistoga",
    "City of St. Helena": "St. Helena",
    "Town of Yountville": "Yountville",
    "City of Napa": "Napa",
    "City Napa": "Napa",
    "City of American Canyon": "American Canyon",
    "Angwin (Unincorporated)": "Angwin",
    "Deer Park (Unincorporated)": "Deer Park",
    "West Napa Area (Unincorporated)": "West Napa Area",
    "East Napa Area (Unincorporated)": "East Napa Area",
}

In [13]:
df["clean_name"] = df["area"].map(clean_names)

Get timestamp

In [14]:
timestamp = data["features"][0]["attributes"]["EditDate"]

In [15]:
timestamp = datetime.fromtimestamp((timestamp / 1000))

In [16]:
latest_date = pd.to_datetime(timestamp).date()

In [17]:
df["county_date"] = latest_date

In [18]:
df.insert(0, "county", "Napa")

Clean for export

In [19]:
export_df = df[["county", "clean_name", "confirmed_cases", "county_date"]].rename(
    columns={"clean_name": "area"}
)

## Vet

In [20]:
try:
    assert not len(export_df) < 18
except AssertionError:
    raise AssertionError("Napa County's scraper is missing rows")

In [21]:
try:
    assert not len(export_df) > 18
except AssertionError:
    raise AssertionError("Napa County's scraper has more rows than before")

## Export

Set date

In [22]:
tz = pytz.timezone("America/Los_Angeles")

In [23]:
today = datetime.now(tz).date()

In [24]:
slug = "napa"

In [25]:
export_df.to_csv(data_dir / slug / f"{today}.csv", index=False)

## Combine

In [26]:
csv_list = [
    i
    for i in glob.glob(str(data_dir / slug / "*.csv"))
    if not str(i).endswith("timeseries.csv")
]

In [27]:
df_list = []
for csv in csv_list:
    if "manual" in csv:
        df = pd.read_csv(csv, parse_dates=["date"])
    else:
        file_date = csv.split("/")[-1].replace(".csv", "")
        df = pd.read_csv(csv, parse_dates=["county_date"])
        df["date"] = file_date
    df_list.append(df)

In [28]:
df = pd.concat(df_list).sort_values(["date", "area"])

In [29]:
df.to_csv(data_dir / slug / "timeseries.csv", index=False)