## Configuration
_Initial steps to get the notebook ready to play nice with our repository. Do not delete this section._

Code formatting with [black](https://pypi.org/project/nb-black/).

In [2]:
import os
import pytz
import glob
import pathlib

this_dir = pathlib.Path(os.path.abspath(""))
data_dir = this_dir / "data"

In [3]:
import requests
import pandas as pd
from slugify import slugify
from datetime import datetime

## Download

Retrieve the page

In [4]:
url = "https://services6.arcgis.com/NtWO9krY7z9jd3mY/arcgis/rest/services/SJCPHS_COVIDDashboard_DataPackage/FeatureServer/8/query?where=1%3D1&objectIds=&time=&geometry=&geometryType=esriGeometryEnvelope&inSR=&spatialRel=esriSpatialRelIntersects&resultType=none&distance=0.0&units=esriSRUnit_Meter&returnGeodetic=false&outFields=*&returnGeometry=false&returnCentroid=false&featureEncoding=esriDefault&multipatchOption=xyFootprint&maxAllowableOffset=&geometryPrecision=&outSR=&datumTransformation=&applyVCSProjection=false&returnIdsOnly=false&returnUniqueIdsOnly=false&returnCountOnly=false&returnExtentOnly=false&returnQueryGeometry=false&returnDistinctValues=false&cacheHint=false&orderByFields=&groupByFieldsForStatistics=&outStatistics=&having=&resultOffset=&resultRecordCount=&returnZ=false&returnM=false&returnExceededLimitFeatures=true&quantizationParameters=&sqlFormat=none&f=pjson&token="

In [5]:
r = requests.get(url)

In [6]:
data = r.json()

## Parse

In [7]:
dict_list = []

In [8]:
for item in data["features"]:
    d = dict(
        county="San Joaquin",
        zip_code=item["attributes"]["ZIPCODE"],
        confirmed_cases=item["attributes"]["CASECOUNT"],
    )
    dict_list.append(d)

In [9]:
df = pd.DataFrame(dict_list)

Match city names to zips

In [10]:
zips = [
    "95201",
    "95202",
    "95203",
    "95204",
    "95205",
    "95206",
    "95207",
    "95208",
    "95209",
    "95210",
    "95211",
    "95212",
    "95213",
    "95214",
    "95215",
    "95219",
    "95220",
    "95227",
    "95230",
    "95231",
    "95234",
    "95236",
    "95237",
    "95240",
    "95241",
    "95242",
    "95253",
    "95258",
    "95267",
    "95269",
    "95290",
    "95304",
    "95320",
    "95330",
    "95336",
    "95337",
    "95355",
    "95361",
    "95366",
    "95376",
    "95377",
    "95378",
    "95385",
    "95391",
    "95632",
    "95686",
    "95254",
    "95367",
]

In [11]:
cities = [
    "Stockton",
    "Stockton",
    "Stockton",
    "Stockton",
    "Stockton",
    "Stockton",
    "Stockton",
    "Stockton",
    "Stockton",
    "Stockton",
    "Stockton",
    "Stockton",
    "Stockton",
    "Stockton",
    "Stockton",
    "Stockton",
    "Acampo",
    "Clements",
    "Farmington",
    "French Camp",
    "Holt",
    "Linden",
    "Lockeford",
    "Lodi",
    "Lodi",
    "Lodi",
    "Victor",
    "Woodbridge",
    "Stockton",
    "Stockton",
    "Stockton",
    "Banta",
    "Escalon",
    "Lathrop",
    "Manteca",
    "Manteca",
    "Modesto",
    "Oakdale",
    "Ripon",
    "Tracy",
    "Tracy",
    "Tracy",
    "Vernalis",
    "Mountain House",
    "Galt",
    "Thornton",
    "Wallace",
    "Riverbank",
]

In [12]:
d = {"zip_code": zips, "city": cities}

In [13]:
matchup_df = pd.DataFrame(d)

In [14]:
merge_df = pd.merge(df, matchup_df, how="left", on="zip_code")

In [15]:
merge_df["area"] = merge_df["zip_code"] + ": " + merge_df["city"]

Get timestamp

In [16]:
date_url = "https://services6.arcgis.com/NtWO9krY7z9jd3mY/arcgis/rest/services/SJCPHS_COVIDDashboard_DataPackage/FeatureServer/0?f=json"
date_r = requests.get(date_url)
date_data = date_r.json()

In [17]:
timestamp = date_data["editingInfo"]["lastEditDate"]

In [18]:
timestamp = datetime.fromtimestamp((timestamp / 1000))

In [19]:
latest_date = pd.to_datetime(timestamp).date()

In [20]:
merge_df["county_date"] = latest_date

## Vet

In [21]:
try:
    assert len(df) == 37
except AssertionError:
    raise AssertionError("San Joaquin's zip code scraper is missing rows")

In [22]:
export_df = merge_df[["county", "area", "county_date", "confirmed_cases"]]

## Export

Set date

In [23]:
tz = pytz.timezone("America/Los_Angeles")

In [24]:
today = datetime.now(tz).date()

In [25]:
slug = "san-joaquin"

In [26]:
export_df.to_csv(data_dir / slug / f"{today}.csv", index=False)

## Combine

In [27]:
csv_list = [
    i
    for i in glob.glob(str(data_dir / slug / "*.csv"))
    if not str(i).endswith("timeseries.csv")
]

In [28]:
df_list = []
for csv in csv_list:
    if "manual" in csv:
        df = pd.read_csv(csv, parse_dates=["date"])
    else:
        file_date = csv.split("/")[-1].replace(".csv", "")
        df = pd.read_csv(csv, parse_dates=["county_date"])
        df["date"] = file_date
    df_list.append(df)

In [29]:
df = pd.concat(df_list).sort_values(["date", "area"])

In [30]:
df.to_csv(data_dir / slug / "timeseries.csv", index=False)