## Configuration
_Initial steps to get the notebook ready to play nice with our repository. Do not delete this section._

Code formatting with [black](https://pypi.org/project/nb-black/).

In [1]:
%load_ext lab_black

In [2]:
import os
import pathlib

In [3]:
this_dir = pathlib.Path(os.path.abspath(""))

In [4]:
data_dir = this_dir / "data"

In [5]:
import pytz
import glob
import json
import requests
import pandas as pd
from datetime import datetime

## Fetch a valid token

In [6]:
TOKEN_URL = "https://phweb.kerncounty.com/Geocortex/Essentials/REST/sites/COVID19_PublicInfo?f=json&deep=true"

In [7]:
response = requests.get(TOKEN_URL)
data = response.json()

ConnectionError: HTTPSConnectionPool(host='phweb.kerncounty.com', port=443): Max retries exceeded with url: /Geocortex/Essentials/REST/sites/COVID19_PublicInfo?f=json&deep=true (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7fa76c6294c0>: Failed to establish a new connection: [Errno -3] Temporary failure in name resolution'))

In [8]:
connectionString = data["map"]["mapServices"][0]["connectionString"]
parts = connectionString.split(";")
token = parts[-1]

## Retrieve the feed

In [9]:
url = f"https://phweb.kerncounty.com/arcgis/rest/services/PH/COVID19_PublicInfo/MapServer/0/query?{token}&f=json&where=1%3D1&returnGeometry=false&outFields=*"

In [10]:
response = requests.get(url)
data = response.json()

## Parse

In [11]:
dict_list = []

for item in data["features"]:
    po_name = item["attributes"]["PO_NAME"]
    zip_code = item["attributes"]["zipCode"].strip()

    if po_name == "unk":
        po_name = "Unknown"

    if zip_code:
        area = f"{zip_code}: {po_name}"
    else:
        area = po_name

    d = dict(
        county="Kern",
        area=area,
        city=po_name,
        zip_code=zip_code,
        confirmed_cases=item["attributes"]["cnt"],
    )
    dict_list.append(d)

Convert to dataframe

In [12]:
df = pd.DataFrame(dict_list)

#### Get timestamp

In [13]:
timestamp = data["features"][0]["attributes"]["sysdate"]
timestamp = datetime.fromtimestamp((timestamp / 1000))
latest_date = pd.to_datetime(timestamp).date()

In [14]:
df["county_date"] = latest_date

In [15]:
tz = pytz.timezone("America/Los_Angeles")

In [16]:
today = datetime.now(tz).date()

In [17]:
slug = "kern"

In [18]:
export_df = df[["county", "area", "county_date", "confirmed_cases", "zip_code"]]

## Vet

In [19]:
try:
    assert not len(df) > 51
except AssertionError:
    raise AssertionError("Kern's area scraper has extra rows")

In [20]:
try:
    assert not len(df) < 51
except AssertionError:
    raise AssertionError("Kern's area scraper is missing rows")

## Export 

In [21]:
df.to_csv(data_dir / slug / f"{today}.csv", index=False)

## Combine

In [22]:
csv_list = [
    i
    for i in glob.glob(str(data_dir / slug / "*.csv"))
    if not str(i).endswith("timeseries.csv")
]

In [23]:
df_list = []
for csv in csv_list:
    if "manual" in csv:
        df = pd.read_csv(csv, parse_dates=["date"])
    else:
        file_date = csv.split("/")[-1].replace(".csv", "")
        df = pd.read_csv(csv, parse_dates=["county_date"])
        df["date"] = file_date
    df_list.append(df)

In [24]:
df = pd.concat(df_list).sort_values(["date", "area"]).drop(["city"], axis=1)

In [25]:
df.to_csv(data_dir / slug / "timeseries.csv", index=False)