## Configuration
_Initial steps to get the notebook ready to play nice with our repository. Do not delete this section._

Code formatting with [black](https://pypi.org/project/nb-black/).

In [302]:
%load_ext lab_black

The lab_black extension is already loaded. To reload it, use:
  %reload_ext lab_black


In [303]:
import os
import pytz
import glob
import pathlib

this_dir = pathlib.Path(os.path.abspath(""))
data_dir = this_dir / "data"

In [304]:
import requests
import pandas as pd
import numpy as np
from datetime import datetime

## Download

Retrieve the data for both Yuba and Sutter counties

In [305]:
url = "https://services5.arcgis.com/THtdW72WxYCCmIVL/ArcGIS/rest/services/CasesWeekly_Public/FeatureServer/0/query?where=1%3D1&objectIds=&time=&resultType=none&outFields=reportdt%2Cname%2CLiveOak%2CYubaCity%2CSutterOther%2CMarysville%2COlivehurstLinda%2CPlumasLake%2CWheatland%2CYubaOther&returnIdsOnly=false&returnUniqueIdsOnly=false&returnCountOnly=false&returnDistinctValues=false&cacheHint=false&orderByFields=&groupByFieldsForStatistics=&outStatistics=&having=&resultOffset=&resultRecordCount=&sqlFormat=none&f=pjson&token="

In [306]:
r = requests.get(url)

In [307]:
data = r.json()

## Parse

In [308]:
dict_list = []

In [309]:
for item in data["features"]:
    d = dict(item["attributes"])
    dict_list.append(d)

In [310]:
df = pd.DataFrame(dict_list)

In [311]:
df = df[
    [
        "reportdt",
        "Name",
        "LiveOak",
        "YubaCity",
        "SutterOther",
        "Marysville",
        "OlivehurstLinda",
        "Wheatland",
        "YubaOther",
        "PlumasLake",
    ]
]

In [312]:
df = df.rename(columns={"Name": "name"})

Set the date

In [313]:
df["reportdt"] = df["reportdt"] / 1000

In [314]:
df["reportdt"] = pd.to_datetime(df["reportdt"], unit="s")

In [315]:
df["county_date"] = df["reportdt"].dt.date

Get the latest date in the timeseries

In [316]:
latest_df = df.loc[df.groupby(["name", "county_date"]).reportdt.idxmax()]

Sift out Yuba County's cities

In [317]:
yuba_df = latest_df[latest_df.name.eq("Yuba County")]

In [318]:
yuba_df = yuba_df[
    [
        "Marysville",
        "OlivehurstLinda",
        "PlumasLake",
        "Wheatland",
        "YubaOther",
        "county_date",
        "reportdt",
    ]
]

In [319]:
yuba_latest = yuba_df[yuba_df.reportdt == yuba_df.reportdt.max()]

Melt down

In [320]:
yuba_latest.set_index("county_date", inplace=True)

In [321]:
yuba_melt = pd.melt(
    yuba_latest.reset_index(),
    id_vars="county_date",
    var_name="area",
    value_name="confirmed_cases",
)

Clean names

In [322]:
yuba_clean = yuba_melt[yuba_melt["area"] != "reportdt"]

In [323]:
clean_yuba_cities = {
    "OlivehurstLinda": "Olivehurst Linda",
    "PlumasLake": "Plumas Lake",
    "YubaOther": "Other",
}

In [324]:
yuba_clean = yuba_clean.replace({"area": clean_yuba_cities})

In [329]:
yuba_clean.insert(0, "county", "Yuba")
yuba_clean

Unnamed: 0,county,county_date,area,confirmed_cases
0,Yuba,2021-10-20,Marysville,4847.0
1,Yuba,2021-10-20,Olivehurst Linda,2957.0
2,Yuba,2021-10-20,Plumas Lake,1236.0
3,Yuba,2021-10-20,Wheatland,577.0
4,Yuba,2021-10-20,Other,853.0


## Vet

In [330]:
try:
    assert not len(yuba_clean) > 5
except AssertionError:
    raise AssertionError("Yuba County's scraper has extra rows")

In [331]:
try:
    assert not len(yuba_clean) < 5
except AssertionError:
    raise AssertionError("Yuba County's scraper is missing rows")

## Export

Set date

In [332]:
tz = pytz.timezone("America/Los_Angeles")

In [333]:
today = datetime.now(tz).date()

In [334]:
slug = "yuba"

In [335]:
yuba_clean.to_csv(data_dir / slug / f"{today}.csv", index=False)

## Combine

In [336]:
csv_list = [
    i
    for i in glob.glob(str(data_dir / slug / "*.csv"))
    if not str(i).endswith("timeseries.csv")
]

In [337]:
df_list = []
for csv in csv_list:
    if "manual" in csv:
        df = pd.read_csv(csv, parse_dates=["date"])
    else:
        file_date = csv.split("/")[-1].replace(".csv", "")
        df = pd.read_csv(csv, parse_dates=["county_date"])
        df["date"] = file_date
    df_list.append(df)

In [338]:
df = pd.concat(df_list).sort_values(["date", "area"])

In [339]:
df.to_csv(data_dir / slug / "timeseries.csv", index=False)