## Configuration
_Initial steps to get the notebook ready to play nice with our repository. Do not delete this section._

Code formatting with [black](https://pypi.org/project/nb-black/).

The lab_black extension is already loaded. To reload it, use:
  %reload_ext lab_black


In [21]:
import os
import pytz
import glob
import pathlib

this_dir = pathlib.Path(os.path.abspath(""))
data_dir = this_dir / "data"

In [22]:
import requests
import pandas as pd
from datetime import datetime

## Download

Retrieve the page

In [23]:
url = "https://services2.arcgis.com/RETsakmE0SJfZXCd/arcgis/rest/services/Covid19_Cases_New_Workflow/FeatureServer/0/query?f=json&cacheHint=true&groupByFieldsForStatistics=ReportingArea&orderByFields=ReportingArea%20ASC&outFields=*&outStatistics=%5B%7B%22onStatisticField%22%3A%22ReportingArea%22%2C%22outStatisticFieldName%22%3A%22count_result%22%2C%22statisticType%22%3A%22count%22%7D%5D&resultType=standard&returnGeometry=false&spatialRel=esriSpatialRelIntersects&where=1%3D1"

In [24]:
r = requests.get(url)

In [25]:
data = r.json()

In [26]:
data

{'objectIdFieldName': 'OBJECTID',
 'uniqueIdField': {'name': 'OBJECTID', 'isSystemMaintained': True},
 'globalIdFieldName': '',
 'geometryType': 'esriGeometryPoint',
 'spatialReference': {'wkid': 4326, 'latestWkid': 4326},
 'fields': [{'name': 'count_result',
   'type': 'esriFieldTypeDouble',
   'alias': 'count_result',
   'sqlType': 'sqlTypeFloat',
   'domain': None,
   'defaultValue': None},
  {'name': 'ReportingArea',
   'type': 'esriFieldTypeString',
   'alias': 'ReportingArea',
   'sqlType': 'sqlTypeOther',
   'length': 255,
   'domain': None,
   'defaultValue': None}],
 'features': [{'attributes': {'count_result': 3023, 'ReportingArea': 'Davis'}},
  {'attributes': {'count_result': 2211, 'ReportingArea': 'Unincorporated'}},
  {'attributes': {'count_result': 117, 'ReportingArea': 'Unknown'}},
  {'attributes': {'count_result': 5757, 'ReportingArea': 'West Sacramento'}},
  {'attributes': {'count_result': 712, 'ReportingArea': 'Winters'}},
  {'attributes': {'count_result': 7353, 'Repo

## Parse

In [27]:
dict_list = []

In [28]:
for item in data["features"]:
    d = dict(
        county="Yolo",
        area=item["attributes"]["ReportingArea"],
        confirmed_cases=item["attributes"]["count_result"],
    )
    dict_list.append(d)

In [29]:
df = pd.DataFrame(dict_list)

Get timestamp

In [30]:
date_url = "https://services2.arcgis.com/RETsakmE0SJfZXCd/arcgis/rest/services/Covid19_Cases_New_Workflow/FeatureServer/0?f=json"
date_r = requests.get(date_url)
date_data = date_r.json()

In [31]:
timestamp = date_data["editingInfo"]["lastEditDate"]

In [32]:
timestamp = datetime.fromtimestamp((timestamp / 1000))

In [33]:
latest_date = pd.to_datetime(timestamp).date()

In [34]:
df["county_date"] = latest_date

## Vet

In [35]:
try:
    assert not len(df) > 6
except AssertionError:
    raise AssertionError("Yolo County's scraper has extra rows")

In [36]:
try:
    assert not len(df) < 6
except AssertionError:
    raise AssertionError("Yolo County's scraper is missing rows")

## Export

Set date

In [37]:
tz = pytz.timezone("America/Los_Angeles")

In [38]:
today = datetime.now(tz).date()

In [39]:
slug = "yolo"

In [40]:
df.to_csv(data_dir / slug / f"{today}.csv", index=False)

## Combine

In [41]:
csv_list = [
    i
    for i in glob.glob(str(data_dir / slug / "*.csv"))
    if not str(i).endswith("timeseries.csv")
]

In [42]:
df_list = []
for csv in csv_list:
    if "manual" in csv:
        df = pd.read_csv(csv, parse_dates=["date"])
    else:
        file_date = csv.split("/")[-1].replace(".csv", "")
        df = pd.read_csv(csv, parse_dates=["county_date"])
        df["date"] = file_date
    df_list.append(df)

In [43]:
df = pd.concat(df_list).sort_values(["date", "area"])

In [44]:
df.to_csv(data_dir / slug / "timeseries.csv", index=False)