## Configuration
_Initial steps to get the notebook ready to play nice with our repository. Do not delete this section._

Code formatting with [black](https://pypi.org/project/nb-black/).

In [2]:
import os
import pathlib

In [3]:
this_dir = pathlib.Path(os.path.abspath(""))

In [4]:
data_dir = this_dir / "data"

In [5]:
import pytz
import glob
import requests
import pandas as pd
import urllib, json
from datetime import datetime

## Download

Retrieve the page

In [6]:
url = "https://services6.arcgis.com/LYh3hRvKq5ASgAVM/arcgis/rest/services/CommunitiesCaseCountbyDate_v1/FeatureServer/0/query?where=1%3D1&objectIds=&time=&geometry=&geometryType=esriGeometryEnvelope&inSR=&spatialRel=esriSpatialRelIntersects&resultType=none&distance=0.0&units=esriSRUnit_Meter&returnGeodetic=false&outFields=*&returnGeometry=false&featureEncoding=esriDefault&multipatchOption=xyFootprint&maxAllowableOffset=&geometryPrecision=&outSR=&datumTransformation=&applyVCSProjection=false&returnIdsOnly=false&returnUniqueIdsOnly=false&returnCountOnly=false&returnExtentOnly=false&returnQueryGeometry=false&returnDistinctValues=false&cacheHint=false&orderByFields=&groupByFieldsForStatistics=&outStatistics=&having=&resultOffset=&resultRecordCount=&returnZ=false&returnM=false&returnExceededLimitFeatures=true&quantizationParameters=&sqlFormat=none&f=pjson&token="

In [7]:
r = requests.get(url)

In [8]:
data = r.json()

## Parse


In [9]:
features = data["features"]

In [10]:
values = []

In [11]:
for f in features:
    row = f["attributes"].values()
    values.append(row)

In [12]:
cols = list(data["features"][0]["attributes"].keys())

Convert to dataframe

In [13]:
df = pd.DataFrame(values, columns=cols)

Reformat and set the date

In [14]:
df["timestamp"] = df["Date"] / (1000)

In [15]:
df["timestamp_int"] = df["timestamp"].astype(int)

In [16]:
timestamps = list(df["timestamp_int"])

In [17]:
dates = [datetime.fromtimestamp(t) for t in timestamps]

In [18]:
df["county_date"] = dates

In [19]:
df["county_date"] = pd.to_datetime(df["county_date"]).dt.date

Drop columns we know we don't need

In [20]:
trim_df = df.drop(
    ["OBJECTID", "Date", "Point_X", "Point_Y", "timestamp", "timestamp_int"], axis=1
)

Melt

In [21]:
melt_df = pd.melt(
    trim_df,
    id_vars="county_date",
    var_name="area",
    value_name="confirmed_cases",
)

Filter nulls

In [22]:
filter_df = melt_df[~melt_df.confirmed_cases.isnull()]

Sort by date

In [23]:
sort_df = filter_df.sort_values("county_date")

In [24]:
city_names = {
    "Los_Banos": "Los Banos",
    "Dos_Palos": "Dos Palos",
    "LeGrand_Planada": "Le Grand Planada",
}

In [25]:
clean_df = sort_df.replace({"area": city_names})

In [26]:
clean_df.insert(0, "county", "Merced")

In [27]:
latest_df = clean_df[clean_df["county_date"] == clean_df["county_date"].max()]

In [28]:
latest_df = latest_df.drop_duplicates(subset=["area"])

## Vet

In [29]:
try:
    assert not len(latest_df) > 10
except AssertionError:
    raise AssertionError("Merced's area scraper is missing rows")

In [30]:
try:
    assert not len(latest_df) < 10
except AssertionError:
    raise AssertionError("Merced's area scraper is missing rows")

## Export

Set date

In [31]:
tz = pytz.timezone("America/Los_Angeles")

In [32]:
today = datetime.now(tz).date()

In [33]:
slug = "merced"

In [34]:
latest_df.to_csv(data_dir / slug / f"{today}.csv", index=False)

## Combine

In [35]:
csv_list = [
    i
    for i in glob.glob(str(data_dir / slug / "*.csv"))
    if not str(i).endswith("timeseries.csv")
]

In [36]:
df_list = []
for csv in csv_list:
    if "manual" in csv:
        df = pd.read_csv(csv, parse_dates=["date"])
    else:
        file_date = csv.split("/")[-1].replace(".csv", "")
        df = pd.read_csv(csv, parse_dates=["county_date"])
        df["date"] = file_date
    df_list.append(df)

In [37]:
df = pd.concat(df_list).sort_values(["date", "area"])

In [38]:
df.to_csv(data_dir / slug / "timeseries.csv", index=False)