# Get vaccine coverage by ZIP Codes data from CDPH

In [41]:
%load_ext lab_black

The lab_black extension is already loaded. To reload it, use:
  %reload_ext lab_black


In [42]:
import pandas as pd
import datetime as dt
import json
import os
import glob
import urllib.request

In [43]:
pd.options.display.max_columns = 50
pd.options.display.max_rows = 1000
pd.set_option("display.max_colwidth", None)

In [44]:
today = dt.datetime.today().strftime("%Y-%m-%d")

### Get the metadata from the API

In [45]:
endpoint = "https://data.chhs.ca.gov/api/3/action/package_show?id=covid-19-vaccine-progress-dashboard-data-by-zip-code"

In [46]:
jsonurl = urllib.request.urlopen(endpoint)
text = json.loads(jsonurl.read())

### Get the object with the max date

In [70]:
latest_obj = max(text["result"]["resources"], key=lambda ev: ev["created"])

In [49]:
latest_date = pd.to_datetime(
    latest_obj["description"].replace("COVID-19 Vaccines by ZIP Code ", "")
).strftime("%Y-%m-%d")

### Read that latest file into a dataframe to limit redacted rows and save it in raw

In [51]:
df = pd.read_csv(latest_obj["url"])

In [52]:
df = df[df["VEM Source"] != "No VEM Assigned"]

In [53]:
df.to_csv("data/raw/" + latest_date + ".csv", index=False)

---

## Concatenate all the weekly updates

### Get all files and assign a date to each table based on the file name

In [54]:
path = ""
files = glob.glob(os.path.join(path, "data/raw/*.csv"))

In [56]:
file_df = (
    pd.read_csv(f, low_memory=False).assign(date=os.path.basename(f)) for f in files
)

### Concatenate them into one timeseries and clean update date field

In [57]:
concat_df = pd.concat(
    file_df,
    ignore_index=True,
)

In [58]:
concat_df["date"] = pd.to_datetime(
    concat_df["date"].str.replace(".csv", "", regex=False)
)

---

## Export

### All updates

In [59]:
concat_df.to_csv("data/timeseries.csv", index=False)

### Latest update

In [60]:
concat_df[concat_df["date"] == concat_df["date"].max()].to_csv(
    "data/latest.csv", index=False
)