## Configuration
_Initial steps to get the notebook ready to play nice with our repository. Do not delete this section._

Code formatting with [black](https://pypi.org/project/nb-black/).

In [1]:
%load_ext lab_black

In [2]:
import os
import pytz
import glob
import pathlib

this_dir = pathlib.Path(os.path.abspath(""))
data_dir = this_dir / "data"

In [3]:
import requests
import pandas as pd
from slugify import slugify
from datetime import datetime
import json

## Download

Retrieve the data

In [4]:
POWER_BI_QUERY_URL = (
    "https://wabi-us-gov-iowa-api.analysis.usgovcloudapi.net/public/reports/querydata"
)
REQUEST_PARAMS = (("synchronous", "true"),)
REQUEST_BODY = '{"version":"1.0.0","queries":[{"Query":{"Commands":[{"SemanticQueryDataShapeCommand":{"Query":{"Version":2,"From":[{"Name":"t1","Entity":"tab2_cities","Type":0}],"Select":[{"Column":{"Expression":{"SourceRef":{"Source":"t1"}},"Property":"cities"},"Name":"tab2_cities.cities"},{"Aggregation":{"Expression":{"Column":{"Expression":{"SourceRef":{"Source":"t1"}},"Property":"# Known Cases"}},"Function":0},"Name":"Sum(tab2_cities.# Known Cases)"},{"Column":{"Expression":{"SourceRef":{"Source":"t1"}},"Property":"% Known Cases"},"Name":"Sum(tab2_cities.% Known Cases)"},{"Column":{"Expression":{"SourceRef":{"Source":"t1"}},"Property":"% County Population"},"Name":"tab2_cities.% County Population"}]},"Binding":{"Primary":{"Groupings":[{"Projections":[0,1,2,3],"Subtotal":1}]},"DataReduction":{"DataVolume":3,"Primary":{"Window":{"Count":500}}},"Version":1}}}]},"CacheKey":"{\\"Commands\\":[{\\"SemanticQueryDataShapeCommand\\":{\\"Query\\":{\\"Version\\":2,\\"From\\":[{\\"Name\\":\\"t1\\",\\"Entity\\":\\"tab2_cities\\",\\"Type\\":0}],\\"Select\\":[{\\"Column\\":{\\"Expression\\":{\\"SourceRef\\":{\\"Source\\":\\"t1\\"}},\\"Property\\":\\"cities\\"},\\"Name\\":\\"tab2_cities.cities\\"},{\\"Aggregation\\":{\\"Expression\\":{\\"Column\\":{\\"Expression\\":{\\"SourceRef\\":{\\"Source\\":\\"t1\\"}},\\"Property\\":\\"# Known Cases\\"}},\\"Function\\":0},\\"Name\\":\\"Sum(tab2_cities.# Known Cases)\\"},{\\"Column\\":{\\"Expression\\":{\\"SourceRef\\":{\\"Source\\":\\"t1\\"}},\\"Property\\":\\"% Known Cases\\"},\\"Name\\":\\"Sum(tab2_cities.% Known Cases)\\"},{\\"Column\\":{\\"Expression\\":{\\"SourceRef\\":{\\"Source\\":\\"t1\\"}},\\"Property\\":\\"% County Population\\"},\\"Name\\":\\"tab2_cities.% County Population\\"}]},\\"Binding\\":{\\"Primary\\":{\\"Groupings\\":[{\\"Projections\\":[0,1,2,3],\\"Subtotal\\":1}]},\\"DataReduction\\":{\\"DataVolume\\":3,\\"Primary\\":{\\"Window\\":{\\"Count\\":500}}},\\"Version\\":1}}}]}","QueryId":"","ApplicationContext":{"DatasetId":"7a039a10-0abd-427a-b052-176c1e5251c8","Sources":[{"ReportId":"3495bdfb-261c-46a7-962f-c264e596bfd7"}]}}],"cancelQueries":[],"modelId":317680}'

In [5]:
headers = {
    "Host": "wabi-us-gov-iowa-api.analysis.usgovcloudapi.net",
    "Connection": "keep-alive",
    "Content-Length": "2003",
    "Pragma": "no-cache",
    "Cache-Control": "no-cache",
    "Accept": "application/json, text/plain, */*",
    "RequestId": "daf91abc-25aa-b78a-7a09-13cbccd78474",
    "DNT": "1",
    "X-PowerBI-ResourceKey": "14052c78-2248-41f7-9058-5d29b7d79c51",
    "Content-Type": "application/json;charset=UTF-8",
    "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36",
    "ActivityId": "0191cbe7-712c-f85f-cd3a-f72d2c3feeb0",
    "Origin": "https://app.powerbigov.us",
    "Sec-Fetch-Site": "cross-site",
    "Sec-Fetch-Mode": "cors",
    "Sec-Fetch-Dest": "empty",
    "Referer": "https://app.powerbigov.us/",
    "Accept-Encoding": "gzip, deflate, br",
    "Accept-Language": "en-US,en;q=0.9,da;q=0.8",
}
json_data = json.loads(REQUEST_BODY)

In [6]:
response = requests.post(
    POWER_BI_QUERY_URL,
    params=REQUEST_PARAMS,
    headers=headers,
    json=json_data,
    verify=False,
)



In [7]:
data = response.json()

In [8]:
timestamp = response.headers["Date"]

In [9]:
ds = data["results"][0]["result"]["data"]["dsr"]["DS"][0]

## Parse

In [10]:
areas = ds["ValueDicts"]["D0"]
areas

['Aptos',
 'Ben Lomond',
 'Boulder Creek',
 'Capitola',
 'Felton',
 'Freedom',
 'Santa Cruz',
 'Scotts Valley',
 'Soquel',
 'Under investigation',
 'Unincorporated',
 'Watsonville']

In [11]:
rows = ds["PH"][1]["DM1"]

In [12]:
confirmed_cases = []

for idx, row in enumerate(rows):
    cells = row["C"]
    index = cells[0]
    try:
        cases = cells[3]
    except IndexError:
        cases = rows[idx - 1]["C"][3]
    # print(cases)
    confirmed_cases.append(
        (
            index,
            cases,
        )
    )

In [13]:
matched = [(areas[index], cases) for index, cases in confirmed_cases]

In [14]:
df = pd.DataFrame(matched, columns=["area", "confirmed_cases"])
df.insert(0, "county", "Santa Cruz")

In [15]:
df["county_date"] = pd.to_datetime(timestamp)

In [16]:
df["county_date"] = df["county_date"].dt.tz_convert("US/Pacific")

In [17]:
df["county_date"] = df["county_date"].dt.strftime("%Y-%m-%d")

## Vet

In [18]:
try:
    assert not len(df) > 12
except AssertionError:
    raise AssertionError("Santa Cruz' scraper has extra rows")

In [19]:
try:
    assert not len(df) < 12
except AssertionError:
    raise AssertionError("Santa Cruz' scraper is missing rows")

## Export

Set date

In [20]:
tz = pytz.timezone("America/Los_Angeles")

In [21]:
today = datetime.now(tz).date()

In [22]:
slug = "santa-cruz"

In [23]:
df.to_csv(data_dir / slug / f"{today}.csv", index=False)

## Combine

In [24]:
csv_list = [
    i
    for i in glob.glob(str(data_dir / slug / "*.csv"))
    if not str(i).endswith("timeseries.csv")
]

In [25]:
df_list = []
for csv in csv_list:
    if "manual" in csv:
        df = pd.read_csv(csv, parse_dates=["date"])
    else:
        file_date = csv.split("/")[-1].replace(".csv", "")
        df = pd.read_csv(csv, parse_dates=["county_date"])
        df["date"] = file_date
    df_list.append(df)

In [26]:
df = pd.concat(df_list).sort_values(["date", "area"])

In [27]:
df.to_csv(data_dir / slug / "timeseries.csv", index=False)