## Configuration
_Initial steps to get the notebook ready to play nice with our repository. Do not delete this section._

Code formatting with [black](https://pypi.org/project/nb-black/).

In [1]:
%load_ext lab_black

In [2]:
import os
import pathlib

In [3]:
this_dir = pathlib.Path(os.path.abspath(""))

In [4]:
data_dir = this_dir / "data"

In [5]:
import requests
from bs4 import BeautifulSoup

import json
import re

import pandas as pd
import numpy as np

import pytz
import glob
from datetime import datetime
from tableauscraper import TableauScraper as TS

## Download

Paths to Dashboard

In [6]:
host_url = "https://public.tableau.com"
path = "/views/COVID_Case_Dashboard_English/Overview"

Retrive Dashboard data

In [7]:
url = f"{host_url}{path}"

In [8]:
ts = TS()
ts.loads(url)

In [9]:
workbook = ts.getWorkbook()

In [10]:
sheet = next(w for w in workbook.worksheets if w.name == "City")

In [11]:
df = sheet.data

In [12]:
city_df = df[df["Measure Names-alias"] == "Cases"][
    [
        "City -value",
        "Measure Values-alias",
    ]
].rename(columns={"City -value": "area", "Measure Values-alias": "confirmed_cases"})

In [13]:
city_df

Unnamed: 0,area,confirmed_cases
9,Kettleman City,446
10,Other Area,480
11,Stratford,499
12,%null%,815
13,Armona,1622
14,Avenal,7295
15,Corcoran,10902
16,Lemoore,11035
17,Hanford,23579


In [14]:
df2 = next(w for w in workbook.worksheets if w.name == "Totals Cases & Deaths").data

In [15]:
prison_df = (
    df2[
        (df2["Measure Names-alias"] == "Cases")
        & (df2["type-alias"] == "State Correctional Facilities")
    ]
    .rename(columns={"type-alias": "area", "Measure Values-alias": "confirmed_cases"})[
        ["area", "confirmed_cases"]
    ]
    .replace("State Correctional Facilities", "Correctional facilities")
)

In [16]:
prison_df

Unnamed: 0,area,confirmed_cases
4,Correctional facilities,9897


Reorder columns, Combine and add date, Kings County, and fix text

In [17]:
df_final = pd.concat([city_df, prison_df])

In [18]:
df_final["county"] = "Kings"
df_final = df_final[["county", "area", "confirmed_cases"]]

In [19]:
df_final

Unnamed: 0,county,area,confirmed_cases
9,Kings,Kettleman City,446
10,Kings,Other Area,480
11,Kings,Stratford,499
12,Kings,%null%,815
13,Kings,Armona,1622
14,Kings,Avenal,7295
15,Kings,Corcoran,10902
16,Kings,Lemoore,11035
17,Kings,Hanford,23579
4,Kings,Correctional facilities,9897


Remove any commas from cases column

In [20]:
df_final["confirmed_cases"] = df_final.confirmed_cases.replace(",", "", regex=True)

## Vet

In [21]:
default_kings_len = 9

In [22]:
try:
    assert not len(df_final) > default_kings_len
except AssertionError:
    raise AssertionError("Kings County has more rows than before")

AssertionError: Kings County has more rows than before

In [23]:
try:
    assert not len(df_final) < default_kings_len
except AssertionError:
    raise AssertionError("Kings County has missing row(s)")

## Export

Set the date

In [24]:
tz = pytz.timezone("America/Los_Angeles")

In [25]:
today = datetime.now(tz).date()

In [26]:
slug = "kings"

In [27]:
df_final["county_date"] = today

In [28]:
df_final.to_csv(data_dir / slug / f"{today}.csv", index=False)

## Combine

In [29]:
csv_list = [
    i
    for i in glob.glob(str(data_dir / slug / "*.csv"))
    if not str(i).endswith("timeseries.csv")
]

In [30]:
df_list = []
for csv in csv_list:
    if "manual" in csv:
        df = pd.read_csv(csv, parse_dates=["date"])
    else:
        file_date = csv.split("/")[-1].replace(".csv", "")
        df = pd.read_csv(csv, parse_dates=["county_date"])
        df["date"] = file_date
    df_list.append(df)

In [31]:
df = pd.concat(df_list).sort_values(["date", "area"]).drop_duplicates()

In [32]:
df.to_csv(data_dir / slug / "timeseries.csv", index=False)