In [2]:
import pandas as pd
import numpy as np
import altair as al
import os
from toolz.curried import pipe

def json_dir(data, data_dir='altairdata'):
    os.makedirs(data_dir, exist_ok=True)
    return pipe(data, al.to_json(filename=data_dir + '/{prefix}-{hash}.{extension}') )
al.data_transformers.register('json_dir', json_dir)
al.data_transformers.enable('json_dir', data_dir='mydata')
pd.options.display.max_rows = 999

In [3]:
county_totals = pd.read_csv("https://raw.githubusercontent.com/datadesk/california-coronavirus-data/master/latimes-county-totals.csv", parse_dates=['date'])
county_totals.head(1)

Unnamed: 0,date,county,fips,confirmed_cases,deaths,new_confirmed_cases,new_deaths
0,2020-06-24,Alameda,1,5275,122,135.0,2.0


In [4]:
cdph_state = pd.read_csv("https://raw.githubusercontent.com/datadesk/california-coronavirus-data/master/cdph-state-totals.csv", parse_dates=['date'])
cdph_state.head(1)

Unnamed: 0,date,confirmed_cases,deaths,travel,person_to_person,community_spread,under_investigation,other_causes,self_monitoring,age_0_to_17,...,total_tests,received_tests,pending_tests,confirmed_hospitalizations,confirmed_icu,suspected_hospitalizations,suspected_icu,healthcare_worker_infections,healthcare_worker_deaths,source_url
0,2020-06-24,190222,5632.0,,,,,,,14524.0,...,3592899.0,,,4095.0,1268.0,1304.0,219.0,13737.0,85.0,


In [5]:
cdph_skilled_nursing = pd.read_csv("https://raw.githubusercontent.com/datadesk/california-coronavirus-data/master/cdph-skilled-nursing-totals.csv", parse_dates=['date'])
cdph_skilled_nursing.head(1)

Unnamed: 0,date,staff_active_cases,patients_active_cases,staff_confirmed_cases,patients_confirmed_cases,staff_confirmed_cases_note,patients_confirmed_cases_note,staff_deaths,patients_deaths,staff_deaths_note,patients_deaths_note,staff_deaths_note.1,source_url
0,2020-06-24,84,2068,7865,12566,,,91,2350,,,,https://www.cdph.ca.gov/Programs/CID/DCDC/Page...


In [6]:
cdph_adult_and_senior_care = pd.read_csv("https://raw.githubusercontent.com/datadesk/california-coronavirus-data/master/cdph-adult-and-senior-care-totals.csv", parse_dates=['date'])
cdph_adult_and_senior_care.head(1)

Unnamed: 0,date,confirmed_cases,deaths,source_url
0,2020-06-24,3099,406,https://www.cdss.ca.gov/Portals/9/Additional-R...


In [7]:
df0 = (
    pd.concat([
        cdph_skilled_nursing
        .set_index('date')
        .sort_index(axis=0)
        [['staff_deaths', 'patients_deaths']]
        .cummax()
        .diff()
        .rolling(14, min_periods=0)
        .mean()
        .reset_index()
        .melt(id_vars=['date'], value_vars=['staff_deaths', 'patients_deaths']),
        cdph_adult_and_senior_care
        .set_index('date')
        .sort_index(axis=0)
        [['deaths']]
        .cummax()
        .diff()
        .rolling(14, min_periods=0)
        .mean()
        .reset_index()
        .rename(columns={'deaths': 'adult_and_senior_care_deaths'})
        .melt(id_vars=['date'], value_vars=['adult_and_senior_care_deaths'])
    ], axis=0, sort=True)
    .merge(
        county_totals
        .groupby('date')
        [['new_deaths']]
        .sum()
        .rolling(14, min_periods=0)
        .mean()
        .reset_index()
        .rename(columns={'new_deaths': 'deaths'})
        [['date', 'deaths']]
    )
    .fillna(0)
    .pipe(lambda x: x.assign(p=x['value']/x['deaths']))
    .pipe(lambda x: x.assign(date=pd.to_datetime(x['date'])))
)
df0.head(1)

Unnamed: 0,date,value,variable,deaths,p
0,2020-04-19,0.0,staff_deaths,59.785714,0.0


In [8]:
al.Chart(df0).mark_line(clip=True).encode(
    x='date:T',
    y=al.Y('p:Q', scale=al.Scale(domain=[0, 1])),
    color='variable:N'
)

In [9]:
df0 = (
    pd.concat([
        cdph_skilled_nursing
        .set_index('date')
        .sort_index(axis=0)
        [['staff_deaths', 'patients_deaths']]
        .cummax()
        .reset_index()
        .melt(id_vars=['date'], value_vars=['staff_deaths', 'patients_deaths']),
        cdph_state
        .set_index('date')
        .sort_index(axis=0)
        [['deaths']]
        .cummax()
        .reset_index()
        .melt(id_vars=['date'], value_vars=['deaths'])
    ], axis=0, sort=True)
)
df0.head(1)

Unnamed: 0,date,value,variable
0,2020-04-19,0.0,staff_deaths


In [10]:
al.Chart(df0).mark_line(clip=True).encode(
    x='date:T',
    y='value:Q',
    color='variable:N'
)

In [11]:
al.Chart(
    cdph_state
    .set_index('date')
    .sort_index(axis=0)
    [['age_0_to_17', 'age_18_to_49', 'age_50_to_64', 'age_65_and_up']]
    .cummax()
    .diff()
    .reset_index()
    .melt(id_vars=['date'], value_vars=['age_0_to_17', 'age_18_to_49', 'age_50_to_64', 'age_65_and_up'])
    .pipe(lambda x: x[x['date'] >= pd.Timestamp('2020-04-01')])
).mark_area(clip=True).encode(
    x='date:T',
    y=al.Y('sum(value):Q', scale=al.Scale(domain=[0, 1]), stack='normalize'),
    color='variable:N'
)

In [12]:
cdph_state

Unnamed: 0,date,confirmed_cases,deaths,travel,person_to_person,community_spread,under_investigation,other_causes,self_monitoring,age_0_to_17,...,total_tests,received_tests,pending_tests,confirmed_hospitalizations,confirmed_icu,suspected_hospitalizations,suspected_icu,healthcare_worker_infections,healthcare_worker_deaths,source_url
0,2020-06-24,190222,5632.0,,,,,,,14524.0,...,3592899.0,,,4095.0,1268.0,1304.0,219.0,13737.0,85.0,
1,2020-06-23,183073,5580.0,,,,,,,13867.0,...,3496929.0,,,3868.0,1225.0,1197.0,196.0,13546.0,85.0,
2,2020-06-22,178054,5515.0,,,,,,,13346.0,...,3411686.0,,,3702.0,1199.0,1102.0,213.0,13476.0,83.0,https://www.cdph.ca.gov/Programs/OPA/Pages/NR2...
3,2020-06-21,173824,5495.0,,,,,,,12987.0,...,3319256.0,,,3574.0,1163.0,1105.0,203.0,13373.0,83.0,https://www.cdph.ca.gov/Programs/OPA/Pages/NR2...
4,2020-06-20,169309,5424.0,,,,,,,12441.0,...,3234412.0,,,3494.0,1145.0,1088.0,187.0,13150.0,82.0,https://www.cdph.ca.gov/Programs/OPA/Pages/NR2...
5,2020-06-19,165416,5360.0,,,,,,,12063.0,...,3155702.0,,,3428.0,1128.0,1159.0,201.0,12914.0,80.0,https://www.cdph.ca.gov/Programs/OPA/Pages/NR2...
6,2020-06-18,161099,5290.0,,,,,,,11582.0,...,3074530.0,,,3439.0,1119.0,1145.0,179.0,12685.0,78.0,https://www.cdph.ca.gov/Programs/OPA/Pages/NR2...
7,2020-06-17,157015,5208.0,,,,,,,11064.0,...,2997988.0,,,3421.0,1109.0,1101.0,209.0,12459.0,76.0,https://www.cdph.ca.gov/Programs/OPA/Pages/NR2...
8,2020-06-16,153560,5121.0,,,,,,,10702.0,...,2937755.0,,,3335.0,1069.0,1127.0,203.0,12295.0,75.0,https://www.cdph.ca.gov/Programs/OPA/Pages/NR2...
9,2020-06-15,151452,5089.0,,,,,,,10451.0,...,2868182.0,,,3103.0,1053.0,1220.0,217.0,12237.0,74.0,https://www.cdph.ca.gov/Programs/OPA/Pages/NR2...


In [18]:
lat = pd.read_csv("https://raw.githubusercontent.com/datadesk/california-coronavirus-data/master/latimes-county-totals.csv", parse_dates=['date'])
lat.head(1)

Unnamed: 0,date,county,fips,confirmed_cases,deaths,new_confirmed_cases,new_deaths
0,2020-06-24,Alameda,1,5275,122,135.0,2.0


In [26]:
big_counties = (
    lat
    .pipe(lambda x: x[x['date'] >= x['date'].max() - pd.Timedelta(days=30)])
    .groupby('county')
    ['new_confirmed_cases']
    .sum()
    .pipe(lambda x: x/x.sum())
    .pipe(lambda x: x[x >= 0.05])
    .index
    .values
)
lat2 = (
    lat
    .pipe(lambda x: x.assign(county=x['county'].where(x['county'].isin(big_counties), 'other')))
    .groupby(['county', 'date'])
    [['new_confirmed_cases', 'new_deaths']]
    .sum()
    .reset_index()
    .set_index(['county', 'date'])
    .sort_index(axis=0)
    .groupby(level='county')
    .rolling(14, min_period=0)
    .mean()
    .reset_index(level=[0], drop=True)
    .reset_index()
)
lat2.head(1)

Unnamed: 0,county,date,new_confirmed_cases,new_deaths
0,Los Angeles,2020-01-26,,


In [22]:
al.Chart(
    lat2
    .melt(id_vars=['date', 'county'], value_vars=['new_confirmed_cases', 'new_deaths'])
).mark_line(point=True).encode(
    x='date:T',
    y='value:Q',
    color='county:N',
    column='variable:N'
).resolve_scale(y='independent')