## Configuration
_Initial steps to get the notebook ready to play nice with our repository. Do not delete this section._

Code formatting with [black](https://pypi.org/project/nb-black/).

In [1]:
%load_ext lab_black

In [2]:
import os
import pathlib

In [3]:
this_dir = pathlib.Path(os.path.abspath(""))

In [4]:
data_dir = this_dir / "data"

In [5]:
import pytz
import glob
import json
import requests
import pandas as pd
from datetime import datetime

## Download

Retrieve the page

In [6]:
url = "https://services3.arcgis.com/ibgDyuD2DLBge82s/arcgis/rest/services/Fresno_County_Zip_Data_Summary/FeatureServer/0/query?f=json&where=FZIP%3C%3E%2799999%27%20AND%20COUNT_Cases%3E%3D75%20AND%20FZIP%20IS%20NOT%20NULL&returnGeometry=false&spatialRel=esriSpatialRelIntersects&outFields=*&groupByFieldsForStatistics=FCITY&orderByFields=value%20desc&outStatistics=%5B%7B%22statisticType%22%3A%22sum%22%2C%22onStatisticField%22%3A%22COUNT_Cases%22%2C%22outStatisticFieldName%22%3A%22value%22%7D%5D&resultType=standard&cacheHint=true"

In [7]:
r = requests.get(url)

In [8]:
data = r.json()

## Parse

In [9]:
dict_list = []

In [10]:
for item in data["features"]:
    d = dict(
        county="Fresno",
        area=item["attributes"]["FCITY"],
        confirmed_cases=item["attributes"]["value"],
    )
    dict_list.append(d)

KeyError: 'features'

In [11]:
df = pd.DataFrame(dict_list)

In [12]:
df["area"] = df["area"].str.title()

Get timestamp

In [13]:
date_url = "https://services3.arcgis.com/ibgDyuD2DLBge82s/arcgis/rest/services/Fresno_County_Zip_Data_Summary/FeatureServer/0/?f=json"
date_r = requests.get(date_url)
date_data = date_r.json()

In [14]:
timestamp = date_data["editingInfo"]["lastEditDate"]

In [15]:
tz = pytz.timezone("America/Los_Angeles")

In [16]:
latest_date = datetime.fromtimestamp(timestamp / 1000, tz).date()

In [17]:
df["county_date"] = latest_date

## Trim

In [18]:
df["county"] = "Fresno"

In [19]:
df["area"] = df["area"].str.title()

In [20]:
c = df[["county", "area", "county_date", "confirmed_cases"]]

## Vet

In [23]:
len(df)

32

In [24]:
df

Unnamed: 0,county,area,confirmed_cases,county_date
0,Fresno,Fresno,159727,2022-08-08
1,Fresno,Clovis,26304,2022-08-08
2,Fresno,Sanger,11450,2022-08-08
3,Fresno,Selma,10088,2022-08-08
4,Fresno,Reedley,8371,2022-08-08
5,Fresno,Coalinga,7254,2022-08-08
6,Fresno,Kerman,5751,2022-08-08
7,Fresno,Parlier,5546,2022-08-08
8,Fresno,Mendota,3474,2022-08-08
9,Fresno,Orange Cove,3362,2022-08-08


In [25]:
try:
    assert not len(df) > 32
except AssertionError:
    raise AssertionError("Fresno County's place scraper has additional rows")

In [26]:
try:
    assert not len(df) < 32
except AssertionError:
    raise AssertionError("Fresno County's place scraper is missing rows")

In [27]:
len(df)

32

In [28]:
old = pd.read_csv("data/fresno/2022-06-15.csv")

In [29]:
df[~df.area.isin(old.area)]

Unnamed: 0,county,area,confirmed_cases,county_date
30,Fresno,Dos Palos,91,2022-06-15


## Export

Mark the current date

In [30]:
tz = pytz.timezone("America/Los_Angeles")

In [31]:
today = datetime.now(tz).date()

In [32]:
slug = "fresno"

In [33]:
df.to_csv(data_dir / slug / f"{today}.csv", index=False)

## Combine

In [34]:
csv_list = [
    i
    for i in glob.glob(str(data_dir / slug / "*.csv"))
    if not str(i).endswith("timeseries.csv")
]

In [35]:
df_list = []
for csv in csv_list:
    if "manual" in csv:
        df = pd.read_csv(csv, parse_dates=["date"])
    else:
        file_date = csv.split("/")[-1].replace(".csv", "")
        df = pd.read_csv(csv, parse_dates=["county_date"])
        df["date"] = file_date
    df_list.append(df)

In [36]:
df = pd.concat(df_list).sort_values(["date", "area"])

In [37]:
df.to_csv(data_dir / slug / "timeseries.csv", index=False)

In [38]:
df[df.area == "Fresno"]

Unnamed: 0,county,area,confirmed_cases,county_date,date
2634,Fresno,Fresno,128,NaT,2020-04-14 00:00:00
2618,Fresno,Fresno,148,NaT,2020-04-15 00:00:00
2602,Fresno,Fresno,159,NaT,2020-04-16 00:00:00
2586,Fresno,Fresno,166,NaT,2020-04-17 00:00:00
2570,Fresno,Fresno,189,NaT,2020-04-20 00:00:00
...,...,...,...,...,...
0,Fresno,Fresno,101523,2022-02-14,2022-06-12
0,Fresno,Fresno,101523,2022-02-14,2022-06-13
0,Fresno,Fresno,101523,2022-02-14,2022-06-14
0,Fresno,Fresno,101523,2022-02-14,2022-06-15
