## Configuration
_Initial steps to get the notebook ready to play nice with our repository. Do not delete this section._

Code formatting with [black](https://pypi.org/project/nb-black/).

In [1]:
%load_ext lab_black

In [2]:
import os
import pathlib

In [3]:
this_dir = pathlib.Path(os.path.abspath(""))

In [4]:
data_dir = this_dir / "data"

In [5]:
import pytz
import glob
import json
import requests
import pandas as pd
from datetime import datetime

## Download

Retrieve the page

In [6]:
url = "https://gis.maderacounty.com/server/rest/services/COVID19/MC_COVID19/MapServer/1/query?where=1%3D1&text=&objectIds=&time=&geometry=&geometryType=esriGeometryEnvelope&inSR=&spatialRel=esriSpatialRelIntersects&relationParam=&outFields=MCGIS.DBO.PH_COVID19_REPORTING_AREAS.AREA%2C+T_CASES%2C+DECEASED&returnGeometry=true&returnTrueCurves=false&maxAllowableOffset=&geometryPrecision=&outSR=&having=&returnIdsOnly=false&returnCountOnly=false&orderByFields=&groupByFieldsForStatistics=&outStatistics=&returnZ=false&returnM=false&gdbVersion=&historicMoment=&returnDistinctValues=false&resultOffset=&resultRecordCount=&queryByDistance=&returnExtentOnly=false&datumTransformation=&parameterValues=&rangeValues=&quantizationParameters=&f=pjson"

In [7]:
r = requests.get(url)

In [8]:
data = r.json()

## Parse

In [9]:
dict_list = []

In [10]:
for item in data["features"]:
    d = dict(
        county="Madera",
        area=item["attributes"]["MCGIS.DBO.PH_COVID19_REPORTING_AREAS.AREA"],
        confirmed_cases=item["attributes"]["T_CASES"],
    )
    dict_list.append(d)

KeyError: 'features'

In [11]:
df = pd.DataFrame(dict_list)

Get timestamp

In [12]:
date_url = "https://gis.maderacounty.com/server/rest/services/COVID19/MC_COVID19/MapServer/2/query?where=1%3D1&text=&objectIds=&time=&geometry=&geometryType=esriGeometryEnvelope&inSR=&spatialRel=esriSpatialRelIntersects&relationParam=&outFields=DATE_UPDATED&returnGeometry=false&returnTrueCurves=false&maxAllowableOffset=&geometryPrecision=&outSR=&having=&returnIdsOnly=false&returnCountOnly=false&orderByFields=&groupByFieldsForStatistics=&outStatistics=&returnZ=false&returnM=false&gdbVersion=&historicMoment=&returnDistinctValues=false&resultOffset=&resultRecordCount=&queryByDistance=&returnExtentOnly=false&datumTransformation=&parameterValues=&rangeValues=&quantizationParameters=&f=pjson"
date_r = requests.get(date_url)
date_data = date_r.json()

In [13]:
date = date_data["features"][0]["attributes"]["DATE_UPDATED"]

In [14]:
latest_date = pd.to_datetime(date).date()

In [15]:
df["county_date"] = latest_date

## Vet

In [16]:
try:
    assert not len(df) > 6
except AssertionError:
    raise AssertionError("Madera's area scraper has extra rows")

In [17]:
try:
    assert not len(df) < 6
except AssertionError:
    raise AssertionError("Madera's area scraper is missing rows")

## Export

Set date

In [18]:
tz = pytz.timezone("America/Los_Angeles")

In [19]:
today = datetime.now(tz).date()

In [20]:
slug = "madera"

In [21]:
df.to_csv(data_dir / slug / f"{today}.csv", index=False)

## Combine

In [22]:
csv_list = [
    i
    for i in glob.glob(str(data_dir / slug / "*.csv"))
    if not str(i).endswith("timeseries.csv")
]

In [23]:
df_list = []
for csv in csv_list:
    if "manual" in csv:
        df = pd.read_csv(csv, parse_dates=["date"])
    else:
        file_date = csv.split("/")[-1].replace(".csv", "")
        df = pd.read_csv(csv, parse_dates=["county_date"])
        df["date"] = file_date
    df_list.append(df)

In [24]:
df = pd.concat(df_list).sort_values(["date", "area"])

In [25]:
df.to_csv(data_dir / slug / "timeseries.csv", index=False)