## Configuration
_Initial steps to get the notebook ready to play nice with our repository. Do not delete this section._

Code formatting with [black](https://pypi.org/project/nb-black/).

In [11]:
%load_ext lab_black

The lab_black extension is already loaded. To reload it, use:
  %reload_ext lab_black


In [12]:
import os
import pathlib

In [13]:
this_dir = pathlib.Path(os.path.abspath(""))

In [14]:
data_dir = this_dir / "data"

In [15]:
import pytz
import glob
import requests
import pandas as pd
from slugify import slugify
from datetime import datetime

## Download

Retrieve the page

In [16]:
url = "https://services7.arcgis.com/RomaVqqozKczDNgd/ArcGIS/rest/services/C19_IC_Cases_by_ZipCode_Dynamics_PublicView_07272021/FeatureServer/0/query?where=1%3D1&objectIds=&time=&geometry=&geometryType=esriGeometryEnvelope&inSR=&spatialRel=esriSpatialRelIntersects&resultType=none&distance=0.0&units=esriSRUnit_Meter&returnGeodetic=false&outFields=*&returnGeometry=true&featureEncoding=esriDefault&multipatchOption=xyFootprint&maxAllowableOffset=&geometryPrecision=&outSR=&datumTransformation=&applyVCSProjection=false&returnIdsOnly=false&returnUniqueIdsOnly=false&returnCountOnly=false&returnExtentOnly=false&returnQueryGeometry=false&returnDistinctValues=false&cacheHint=false&orderByFields=&groupByFieldsForStatistics=&outStatistics=&having=&resultOffset=&resultRecordCount=&returnZ=false&returnM=false&returnExceededLimitFeatures=true&quantizationParameters=&sqlFormat=none&f=pjson&token="

In [17]:
r = requests.get(url)

In [18]:
data = r.json()

## Parse

In [19]:
dict_list = []

In [20]:
for item in data["features"]:
    d = dict(
        county="Imperial",
        area_name=item["attributes"]["CITY"],
        zip_code=item["attributes"]["ZIPCODE"],
        confirmed_cases=item["attributes"]["TOTAL_CASES"],
    )
    dict_list.append(d)

In [22]:
df = pd.DataFrame(dict_list)

In [23]:
df["area_name"] = df["area_name"].str.replace(", CA", "").str.title()

Match zips to names

In [24]:
df["area_name"] = df["zip_code"] + ": " + df["area_name"]

In [25]:
df.loc[(df.zip_code == "92004"), "area_name"] = "92004: Borrego Springs"

In [26]:
df.loc[(df.zip_code == "UNKNOWN"), "area_name"] = "Unknown"

Get timestamp

In [27]:
date_url = "https://services7.arcgis.com/RomaVqqozKczDNgd/ArcGIS/rest/services/C19_IC_Cases_by_ZipCode_PublicView/FeatureServer/0/?f=json"
date_r = requests.get(date_url)
date_data = date_r.json()

In [28]:
timestamp = date_data["editingInfo"]["lastEditDate"]

In [29]:
timestamp = datetime.fromtimestamp((timestamp / 1000))

In [30]:
latest_date = pd.to_datetime(timestamp).date()

In [31]:
df["county_date"] = latest_date

In [32]:
export_df = df[
    ["county", "area_name", "confirmed_cases", "county_date", "zip_code"]
].rename(columns={"area_name": "city", "zip_code": "zip"})

## Vet

In [33]:
try:
    assert not len(export_df) < 19
except AssertionError:
    raise AssertionError("Imperial County's scraper is missing rows")

In [34]:
try:
    assert not len(export_df) > 19
except AssertionError:
    raise AssertionError("Imperial County's scraper has more rows than before")

## Export

In [35]:
tz = pytz.timezone("America/Los_Angeles")

In [36]:
today = datetime.now(tz).date()

In [37]:
slug = "imperial"

In [38]:
df.to_csv(data_dir / slug / f"{today}.csv", index=False)

## Combine

In [39]:
csv_list = [
    i
    for i in glob.glob(str(data_dir / slug / "*.csv"))
    if not str(i).endswith("timeseries.csv")
]

In [40]:
df_list = []
for csv in csv_list:
    if "manual" in csv:
        df = pd.read_csv(csv, parse_dates=["date"])
    else:
        file_date = csv.split("/")[-1].replace(".csv", "")
        df = pd.read_csv(csv, parse_dates=["county_date"]).rename(
            columns={"area_name": "area", "city": "area"}
        )
        df["date"] = file_date
    df_list.append(df)

In [41]:
df = pd.concat(df_list).sort_values(["date", "area"])

In [42]:
df.to_csv(data_dir / slug / "timeseries.csv", index=False)