## Configuration
_Initial steps to get the notebook ready to play nice with our repository. Do not delete this section._

Code formatting with [black](https://pypi.org/project/nb-black/).

In [1]:
%load_ext lab_black

In [2]:
import os
import pathlib

In [3]:
this_dir = pathlib.Path(os.path.abspath(""))

In [4]:
data_dir = this_dir / "data"

In [5]:
import pytz
import glob
import json
import requests
import pandas as pd
from datetime import datetime, date

## Download

In [6]:
POWER_BI_QUERY_URL = (
    "https://wabi-us-gov-iowa-api.analysis.usgovcloudapi.net/public/reports/querydata"
)
REQUEST_PARAMS = (("synchronous", "true"),)
REQUEST_BODY = '{"version":"1.0.0","queries":[{"Query":{"Commands":[{"SemanticQueryDataShapeCommand":{"Query":{"Version":2,"From":[{"Name":"d","Entity":"demo_region_count","Type":0},{"Name":"r","Entity":"region_lastmonth","Type":0}],"Select":[{"Column":{"Expression":{"SourceRef":{"Source":"d"}},"Property":"mregion"},"Name":"demo_region_count.mregion"},{"Aggregation":{"Expression":{"Column":{"Expression":{"SourceRef":{"Source":"d"}},"Property":"case_count"}},"Function":0},"Name":"Sum(demo_region_count.case_count)"},{"Column":{"Expression":{"SourceRef":{"Source":"d"}},"Property":"zip2"},"Name":"demo_region_count.zip2"},{"Aggregation":{"Expression":{"Column":{"Expression":{"SourceRef":{"Source":"r"}},"Property":"case_count"}},"Function":0},"Name":"Sum(region_lastmonth.case_count)"}],"Where":[{"Condition":{"Not":{"Expression":{"In":{"Expressions":[{"Column":{"Expression":{"SourceRef":{"Source":"r"}},"Property":"zip2"}}],"Values":[[{"Literal":{"Value":"null"}}]]}}}}},{"Condition":{"Not":{"Expression":{"In":{"Expressions":[{"Column":{"Expression":{"SourceRef":{"Source":"d"}},"Property":"mregion"}}],"Values":[[{"Literal":{"Value":"\'Unknown\'"}}]]}}}}}],"OrderBy":[{"Direction":2,"Expression":{"Aggregation":{"Expression":{"Column":{"Expression":{"SourceRef":{"Source":"d"}},"Property":"case_count"}},"Function":0}}}]},"Binding":{"Primary":{"Groupings":[{"Projections":[0,1,2,3]}]},"DataReduction":{"DataVolume":3,"Primary":{"Window":{"Count":500}}},"Version":1},"ExecutionMetricsKind":1}}]},"QueryId":"","ApplicationContext":{"DatasetId":"2e66a584-6b5f-4c9f-bffd-12d35d2b8442","Sources":[{"ReportId":"40085a43-f04a-4a8b-ae72-c38ca32531a5","VisualId":"136278efd4c6f694b9b7"}]}}],"cancelQueries":[],"modelId":358821}'

In [7]:
headers = {
    "accept": "application/json, text/plain, */*",
    "accept-language": "en-US,en;q=0.9,da;q=0.8",
    "activityid": "07a2f3cb-ea39-4519-8ee0-b5605aa78145",
    "content-type": "application/json;charset=UTF-8",
    "requestid": "a5565ffd-b522-00a7-afe8-5c4f3a3ce843",
    "sec-ch-ua": '" Not A;Brand";v="99", "Chromium";v="100", "Google Chrome";v="100"',
    "sec-ch-ua-mobile": "?0",
    "sec-ch-ua-platform": '"macOS"',
    "sec-fetch-dest": "empty",
    "sec-fetch-mode": "cors",
    "sec-fetch-site": "cross-site",
    "x-powerbi-resourcekey": "69a5588e-f8a1-4aef-aac5-ca27ee543007",
    "Referer": "https://app.powerbigov.us/",
    "Referrer-Policy": "strict-origin-when-cross-origin",
}
json_data = json.loads(REQUEST_BODY)

In [8]:
response = requests.post(
    POWER_BI_QUERY_URL,
    params=REQUEST_PARAMS,
    headers=headers,
    json=json_data,
    verify=False,
)



In [9]:
data = response.json()

## Parse

In [10]:
timestamp = response.headers["Date"]

In [11]:
timestamp

'Wed, 27 Jul 2022 16:20:52 GMT'

In [12]:
ds = data["results"][0]["result"]["data"]["dsr"]["DS"][0]

In [13]:
zip_codes = ds["ValueDicts"]["D1"]

In [14]:
rows = ds["PH"][0]["DM0"]

In [15]:
rows

[{'S': [{'N': 'G0', 'T': 1, 'DN': 'D0'},
   {'N': 'G1', 'T': 1, 'DN': 'D1'},
   {'N': 'M0', 'T': 4},
   {'N': 'M1', 'T': 4}],
  'C': [0, 0, 15112, 405]},
 {'C': [1, 15081, 406], 'R': 1},
 {'C': [1, 2, 9596, 206]},
 {'C': [2, 3, 6789, 211]},
 {'C': [0, 4, 6249, 190]},
 {'C': [1, 5, 4615, 133]},
 {'C': [2, 6, 4456, 158]},
 {'C': [3, 7, 4015, 104]},
 {'C': [1, 8, 3918, 101]},
 {'C': [2, 9, 3848, 164]},
 {'C': [3, 10, 3393, 104]},
 {'C': [11, 2819, 88], 'R': 1},
 {'C': [1, 12, 2134, 44]},
 {'C': [2, 13, 1632, 91]},
 {'C': [0, 14, 1621, 55]},
 {'C': [2, 15, 943, 54]},
 {'C': [16, 545, 19], 'R': 1},
 {'C': [3, 17, 429, 14]},
 {'C': [2, 18, 386, 20]},
 {'C': [1, 19, 339, 5]},
 {'C': [3, 20, 282, 3]},
 {'C': [0, 21, 274, 10]},
 {'C': [2, 22, 217, 11]},
 {'C': [23, 211, 7], 'R': 1},
 {'C': [1, 24, 187, 1]},
 {'C': [0, 25, 148, 3]},
 {'C': [3, 26, 137, 4]},
 {'C': [1, 27, 117], 'R': 8},
 {'C': [2, 28, 115, 1]},
 {'C': [1, 29, 109], 'R': 8},
 {'C': [0, 30, 98, 2]},
 {'C': [2, 31, 92, 5]},
 {'C': 

In [16]:
confirmed_cases = []

cases = 0

for row in rows:
    # in some cases, data are encoded in separate keys
    keys = row.keys()
    # pull out the list containing values for most rows
    cells = row["C"]
    # second value in the list is expected to be an iterator/index
    index = cells[1]
    # if this key is present, the value for this place is identical to the previous row
    if "R" in keys:
        if row["R"] == 1:
            cases = cells[1]
            index = cells[0]
        else:
            cases = cells[2]
            index = cells[1]
    # every other row has its value in the third position of the list
    else:
        cases = cells[2]
    # append values to master list
    confirmed_cases.append(
        (
            index,
            cases,
        )
    )

In [17]:
matched = [(zip_codes[index], cases) for index, cases in confirmed_cases]

In [18]:
df = pd.DataFrame(matched, columns=["zip_code", "confirmed_cases"])

In [19]:
df

Unnamed: 0,zip_code,confirmed_cases
0,93906,15112
1,93905,15081
2,93960,9596
3,93955,6789
4,93901,6249
5,93927,4615
6,93933,4456
7,93907,4015
8,93930,3918
9,93940,3848


Match up place names with zip codes

In [20]:
df = df.loc[~df.zip_code.str.contains("Other")]

In [21]:
zip_code_names = {
    "93902": "Prunedale",
    "93907": "Salinas",
    "95004": "Aromas",
    "95012": "Castroville",
    "95039": "Moss Landing",
    "95076": "Watsonville",
    "93920": "Big Sur",
    "93921": "Carmel",
    "93923": "Carmel",
    "93924": "Carmel Valley",
    "93933": "Marina",
    "93940": "Monterey",
    "93942": "Monterey",
    "93950": "Pacific Grove",
    "93953": "Pebble Beach",
    "93955": "Seaside",
    "93901": "Salinas",
    "93905": "Salinas",
    "93906": "Salinas",
    "93908": "Salinas",
    "93912": "Boronda",
    "93915": "Salinas",
    "93962": "Spreckels",
    "93450": "San Ardo",
    "93925": "Chualar",
    "93926": "Gonzales",
    "93927": "Greenfield",
    "93930": "King City",
    "93954": "San Lucas",
    "93960": "Soledad",
    "93922": "Carmel",
    "93426": "Bradley",
    "93451": "San Miguel",
}

In [22]:
df["area_name"] = df.zip_code.map(zip_code_names)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["area_name"] = df.zip_code.map(zip_code_names)


In [23]:
df["area_name"] = df["zip_code"] + ": " + df["area_name"]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["area_name"] = df["zip_code"] + ": " + df["area_name"]


In [24]:
df = df[(df.zip_code != "OtherPBS")]

Set column with county date

In [25]:
df["county_date"] = pd.to_datetime(timestamp)

In [26]:
df["county_date"] = df["county_date"].dt.tz_convert("US/Pacific")

In [27]:
df["county_date"] = df["county_date"].dt.strftime("%Y-%m-%d")

In [28]:
df.insert(0, "county", "Monterey")

Clean up for export

In [29]:
export_df = df[
    ["county", "area_name", "confirmed_cases", "county_date", "zip_code"]
].rename(columns={"area_name": "area", "zip_code": "zip"})

## Vet

In [30]:
try:
    assert not len(export_df) < 33
except AssertionError:
    raise AssertionError("Monterey County's zip code scraper is missing rows")

AssertionError: Monterey County's zip code scraper is missing rows

In [31]:
try:
    assert not len(export_df) > 33
except AssertionError:
    raise AssertionError(
        f"Monterey County's zip code scraper has {len(export_df)-29} more rows than before"
    )

## Export

Set date

In [33]:
tz = pytz.timezone("America/Los_Angeles")

In [34]:
today = datetime.now(tz).date()

In [35]:
slug = "monterey"

In [36]:
export_df.to_csv(data_dir / slug / f"{today}.csv", index=False)

## Combine

In [37]:
csv_list = [
    i
    for i in glob.glob(str(data_dir / slug / "*.csv"))
    if not str(i).endswith("timeseries.csv")
]

In [38]:
df_list = []
for csv in csv_list:
    if "manual" in csv:
        df = pd.read_csv(csv, parse_dates=["date"])
    else:
        file_date = csv.split("/")[-1].replace(".csv", "")
        df = pd.read_csv(csv, parse_dates=["county_date"])
        df["date"] = file_date
    df_list.append(df)

In [39]:
df = pd.concat(df_list).sort_values(["date", "area"])

In [40]:
df.to_csv(data_dir / slug / "timeseries.csv", index=False)