## Configuration
_Initial steps to get the notebook ready to play nice with our repository. Do not delete this section._

Code formatting with [black](https://pypi.org/project/nb-black/).

In [1]:
%load_ext lab_black

In [2]:
import os
import pathlib

In [3]:
this_dir = pathlib.Path(os.path.abspath(""))

In [4]:
data_dir = this_dir / "data"

In [5]:
import requests
from bs4 import BeautifulSoup

import json
import re

import pandas as pd
import numpy as np

import pytz
import glob
from datetime import datetime

## Download

Paths to Dashboard

In [6]:
host_url = "https://public.tableau.com"
path = "/views/TITLEDStorylinewithallinone/Storyline2"

Retrive Dashboard data

In [7]:
url = f"{host_url}{path}"

r = requests.get(url, params={":embed": "y", ":showVizHome": "no"})
soup = BeautifulSoup(r.text, "html.parser")

tableauData = json.loads(soup.find("textarea", {"id": "tsConfigContainer"}).text)

dataUrl = f'{host_url}{tableauData["vizql_root"]}/bootstrapSession/sessions/{tableauData["sessionid"]}'

r = requests.post(
    dataUrl,
    data={
        "sheet_id": tableauData["sheetId"],
    },
)

dataReg = re.search("\d+;({.*})\d+;({.*})", r.text, re.MULTILINE)
info = json.loads(dataReg.group(1))
data = json.loads(dataReg.group(2))

## Parse

Filter out the cities data

In [8]:
columnsData = info["worldUpdate"]["applicationPresModel"]["workbookPresModel"][
    "dashboardPresModel"
]["zones"]["5"]["presModelHolder"]["flipboard"]["storyPoints"]["1"][
    "dashboardPresModel"
][
    "zones"
][
    "33"
][
    "presModelHolder"
][
    "visual"
][
    "vizData"
][
    "paneColumnsData"
]
result = [
    {
        "fieldCaption": t.get("fieldCaption", ""),
        "valueIndices": columnsData["paneColumnsList"][t["paneIndices"][0]][
            "vizPaneColumns"
        ][t["columnIndices"][0]]["valueIndices"],
        "aliasIndices": columnsData["paneColumnsList"][t["paneIndices"][0]][
            "vizPaneColumns"
        ][t["columnIndices"][0]]["aliasIndices"],
        "dataType": t.get("dataType"),
        "paneIndices": t["paneIndices"][0],
        "columnIndices": t["columnIndices"][0],
    }
    for t in columnsData["vizDataColumns"]
    if t.get("fieldCaption")
]
dataFull = data["secondaryInfo"]["presModelMap"]["dataDictionary"]["presModelHolder"][
    "genDataDictionaryPresModel"
]["dataSegments"]["0"]["dataColumns"]


def onAlias(it, value, cstring):
    return value[it] if (it >= 0) else cstring["dataValues"][abs(it) - 1]


frameData = {}
cstring = [t for t in dataFull if t["dataType"] == "cstring"][0]
for t in dataFull:
    for index in result:
        if t["dataType"] == index["dataType"]:
            if len(index["valueIndices"]) > 0:
                frameData[f'{index["fieldCaption"]}-value'] = [
                    t["dataValues"][abs(it)] for it in index["valueIndices"]
                ]
            if len(index["aliasIndices"]) > 0:
                filterOut = [
                    onAlias(it, t["dataValues"], cstring)
                    for it in index["aliasIndices"]
                ]
                try:
                    if filterOut[0] > 1:
                        frameData[f'{index["fieldCaption"]}-alias'] = [
                            onAlias(it, t["dataValues"], cstring)
                            for it in index["aliasIndices"]
                        ]
                except:
                    frameData[f'{index["fieldCaption"]}-alias'] = [
                        onAlias(it, t["dataValues"], cstring)
                        for it in index["aliasIndices"]
                    ]

Get Correctional facilities data

In [9]:
columnsData = info["worldUpdate"]["applicationPresModel"]["workbookPresModel"][
    "dashboardPresModel"
]["zones"]["5"]["presModelHolder"]["flipboard"]["storyPoints"]["1"][
    "dashboardPresModel"
][
    "zones"
][
    "8"
][
    "presModelHolder"
][
    "visual"
][
    "vizData"
][
    "paneColumnsData"
]
result = [
    {
        "fieldCaption": t.get("fieldCaption", ""),
        "valueIndices": columnsData["paneColumnsList"][t["paneIndices"][0]][
            "vizPaneColumns"
        ][t["columnIndices"][0]]["valueIndices"],
        "aliasIndices": columnsData["paneColumnsList"][t["paneIndices"][0]][
            "vizPaneColumns"
        ][t["columnIndices"][0]]["aliasIndices"],
        "dataType": t.get("dataType"),
        "paneIndices": t["paneIndices"][0],
        "columnIndices": t["columnIndices"][0],
    }
    for t in columnsData["vizDataColumns"]
    if t.get("fieldCaption")
]
dataFull = data["secondaryInfo"]["presModelMap"]["dataDictionary"]["presModelHolder"][
    "genDataDictionaryPresModel"
]["dataSegments"]["0"]["dataColumns"]


def onAlias(it, value, cstring):
    return value[it] if (it >= 0) else cstring["dataValues"][abs(it) - 1]


frameData_2 = {}
cstring = [t for t in dataFull if t["dataType"] == "cstring"][0]
for t in dataFull:
    for index in result:
        if t["dataType"] == index["dataType"]:
            if len(index["valueIndices"]) > 0:
                frameData_2[f'{index["fieldCaption"]}-value'] = [
                    t["dataValues"][abs(it)] for it in index["valueIndices"]
                ]
            #                 print(frameData[f'{index["fieldCaption"]}-value'])
            if len(index["aliasIndices"]) > 0:
                filterOut = [
                    onAlias(it, t["dataValues"], cstring)
                    for it in index["aliasIndices"]
                ]
                try:
                    if filterOut[0] > 1:
                        frameData_2[f'{index["fieldCaption"]}-alias'] = [
                            onAlias(it, t["dataValues"], cstring)
                            for it in index["aliasIndices"]
                        ]
                except:
                    frameData_2[f'{index["fieldCaption"]}-alias'] = [
                        onAlias(it, t["dataValues"], cstring)
                        for it in index["aliasIndices"]
                    ]

Turn both into DataFrames

In [10]:
df = pd.DataFrame.from_dict(frameData, orient="index").fillna(0).T
df_2 = pd.DataFrame.from_dict(frameData_2, orient="index").fillna(0).T

Just get correctional facilities cases from second dataframe

In [11]:
df_2 = df_2[df_2["Measure Names-alias"] == "Cases"]

In [12]:
df_2 = df_2[df_2["Kings County-alias"] == "State Correctional Facility (Inmate)"]

KeyError: 'Kings County-alias'

In [49]:
df_2 = df_2[["Kings County-alias", "Measure Values-alias"]]

In [50]:
df_2 = df_2.rename(
    columns={"Kings County-alias": "area", "Measure Values-alias": "confirmed_cases"}
)

Rename city dataframe columns

In [51]:
df = df.rename(
    columns={"City-alias": "area", "SUM(Number of Cases)-alias": "confirmed_cases"}
)

In [52]:
df = df[~df.area.isnull()]

In [53]:
df_2.replace(
    "State Correctional Facility (Inmate)", "Correctional facilities", inplace=True
)

Reorder columns, Combine and add date, Kings County, and fix text

In [54]:
df_final = pd.concat([df, df_2])

In [55]:
df_final["county"] = "Kings"
df_final = df_final[["county", "area", "confirmed_cases"]]

In [56]:
df_final

Unnamed: 0,county,area,confirmed_cases
0,Kings,Stratford,264
1,Kings,Other Area,321
2,Kings,Lemoore,5551
3,Kings,Kettleman City,229
4,Kings,Hanford,11946
5,Kings,Corcoran,2747
6,Kings,Avenal,1625
7,Kings,Armona,854
10,Kings,Correctional facilities,7315


Remove any commas from cases column

In [57]:
df_final["confirmed_cases"] = df_final.confirmed_cases.replace(",", "", regex=True)

Dig up updated time

In [58]:
date_sentence = info["worldUpdate"]["applicationPresModel"]["workbookPresModel"][
    "dashboardPresModel"
]["zones"]["5"]["presModelHolder"]["flipboard"]["storyPoints"]["1"][
    "dashboardPresModel"
][
    "zones"
][
    "43"
][
    "zoneCommon"
][
    "name"
]

In [59]:
date_strings = date_sentence.replace("Overview Last Updated on ", "").split(" at ")

In [60]:
date = date_strings[0]

In [61]:
df_final["county_date"] = pd.to_datetime(date)

## Vet

In [62]:
default_kings_len = 9

In [63]:
try:
    assert not len(df_final) > default_kings_len
except AssertionError:
    raise AssertionError("Kings County has more rows than before")

In [64]:
try:
    assert not len(df_final) < default_kings_len
except AssertionError:
    raise AssertionError("Kings County has missing row(s)")

## Export

Set the date

In [65]:
tz = pytz.timezone("America/Los_Angeles")

In [66]:
today = datetime.now(tz).date()

In [67]:
slug = "kings"

In [68]:
df_final.to_csv(data_dir / slug / f"{today}.csv", index=False)

## Combine

In [69]:
csv_list = [
    i
    for i in glob.glob(str(data_dir / slug / "*.csv"))
    if not str(i).endswith("timeseries.csv")
]

In [70]:
df_list = []
for csv in csv_list:
    if "manual" in csv:
        df = pd.read_csv(csv, parse_dates=["date"])
    else:
        file_date = csv.split("/")[-1].replace(".csv", "")
        df = pd.read_csv(csv, parse_dates=["county_date"])
        df["date"] = file_date
    df_list.append(df)

In [71]:
df = pd.concat(df_list).sort_values(["date", "area"])

In [72]:
df.to_csv(data_dir / slug / "timeseries.csv", index=False)