In [66]:
import pandas as pd
import numpy as np
import altair as al
import os
from toolz.curried import pipe

def json_dir(data, data_dir='altairdata'):
    os.makedirs(data_dir, exist_ok=True)
    return pipe(data, al.to_json(filename=data_dir + '/{prefix}-{hash}.{extension}') )
al.data_transformers.register('json_dir', json_dir)
al.data_transformers.enable('json_dir', data_dir='mydata')
pd.options.display.max_rows = 999

In [67]:
county_totals = pd.read_csv("https://raw.githubusercontent.com/datadesk/california-coronavirus-data/master/latimes-county-totals.csv", parse_dates=['date'])
county_totals.head(1)

Unnamed: 0,date,county,fips,confirmed_cases,deaths,new_confirmed_cases,new_deaths
0,2020-06-23,Alameda,1,5140,120,133.0,0.0


In [68]:
cdph_state = pd.read_csv("https://raw.githubusercontent.com/datadesk/california-coronavirus-data/master/cdph-state-totals.csv", parse_dates=['date'])
cdph_state.head(1)

Unnamed: 0,date,confirmed_cases,deaths,travel,person_to_person,community_spread,under_investigation,other_causes,self_monitoring,age_0_to_17,...,total_tests,received_tests,pending_tests,confirmed_hospitalizations,confirmed_icu,suspected_hospitalizations,suspected_icu,healthcare_worker_infections,healthcare_worker_deaths,source_url
0,2020-06-23,183073,5580.0,,,,,,,13867.0,...,3496929.0,,,3868.0,1225.0,1197.0,196.0,13546.0,85.0,


In [69]:
cdph_skilled_nursing = pd.read_csv("https://raw.githubusercontent.com/datadesk/california-coronavirus-data/master/cdph-skilled-nursing-totals.csv", parse_dates=['date'])
cdph_skilled_nursing.head(1)

Unnamed: 0,date,staff_active_cases,patients_active_cases,staff_confirmed_cases,patients_confirmed_cases,staff_confirmed_cases_note,patients_confirmed_cases_note,staff_deaths,patients_deaths,staff_deaths_note,patients_deaths_note,staff_deaths_note.1,source_url
0,2020-06-23,52,2004,7739,12400,,,91,2321,,,,https://www.cdph.ca.gov/Programs/CID/DCDC/Page...


In [70]:
cdph_adult_and_senior_care = pd.read_csv("https://raw.githubusercontent.com/datadesk/california-coronavirus-data/master/cdph-adult-and-senior-care-totals.csv", parse_dates=['date'])
cdph_adult_and_senior_care.head(1)

Unnamed: 0,date,confirmed_cases,deaths,source_url
0,2020-06-23,3008,400,https://www.cdss.ca.gov/Portals/9/Additional-R...


In [71]:
df0 = (
    pd.concat([
        cdph_skilled_nursing
        .set_index('date')
        .sort_index(axis=0)
        [['staff_deaths', 'patients_deaths']]
        .cummax()
        .diff()
        .rolling(14, min_periods=0)
        .mean()
        .reset_index()
        .melt(id_vars=['date'], value_vars=['staff_deaths', 'patients_deaths']),
        cdph_adult_and_senior_care
        .set_index('date')
        .sort_index(axis=0)
        [['deaths']]
        .cummax()
        .diff()
        .rolling(14, min_periods=0)
        .mean()
        .reset_index()
        .rename(columns={'deaths': 'adult_and_senior_care_deaths'})
        .melt(id_vars=['date'], value_vars=['adult_and_senior_care_deaths'])
    ], axis=0, sort=True)
    .merge(
        county_totals
        .groupby('date')
        [['new_deaths']]
        .sum()
        .rolling(14, min_periods=0)
        .mean()
        .reset_index()
        .rename(columns={'new_deaths': 'deaths'})
        [['date', 'deaths']]
    )
    .fillna(0)
    .pipe(lambda x: x.assign(p=x['value']/x['deaths']))
    .pipe(lambda x: x.assign(date=pd.to_datetime(x['date'])))
)
df0.head(1)

Unnamed: 0,date,value,variable,deaths,p
0,2020-04-19,0.0,staff_deaths,59.785714,0.0


In [72]:
al.Chart(df0).mark_line(clip=True).encode(
    x='date:T',
    y=al.Y('p:Q', scale=al.Scale(domain=[0, 1])),
    color='variable:N'
)

In [73]:
df0 = (
    pd.concat([
        cdph_skilled_nursing
        .set_index('date')
        .sort_index(axis=0)
        [['staff_deaths', 'patients_deaths']]
        .cummax()
        .reset_index()
        .melt(id_vars=['date'], value_vars=['staff_deaths', 'patients_deaths']),
        cdph_state
        .set_index('date')
        .sort_index(axis=0)
        [['deaths']]
        .cummax()
        .reset_index()
        .melt(id_vars=['date'], value_vars=['deaths'])
    ], axis=0, sort=True)
)
df0.head(1)

Unnamed: 0,date,value,variable
0,2020-04-19,0.0,staff_deaths


In [74]:
al.Chart(df0).mark_line(clip=True).encode(
    x='date:T',
    y='value:Q',
    color='variable:N'
)

In [75]:
al.Chart(
    cdph_state
    .set_index('date')
    .sort_index(axis=0)
    [['age_0_to_17', 'age_18_to_49', 'age_50_to_64', 'age_65_and_up']]
    .cummax()
    .diff()
    .reset_index()
    .melt(id_vars=['date'], value_vars=['age_0_to_17', 'age_18_to_49', 'age_50_to_64', 'age_65_and_up'])
    .pipe(lambda x: x[x['date'] >= pd.Timestamp('2020-04-01')])
).mark_area(clip=True).encode(
    x='date:T',
    y=al.Y('sum(value):Q', scale=al.Scale(domain=[0, 1]), stack='normalize'),
    color='variable:N'
)