# COVID-19 Growth By State (US)
> Growth of COVID-19 for the US by State.

- comments: true
- author: Avy Faingezicht
- categories: [growth, US, states]
- image: images/covid-growth-states.png
- permalink: /growth-us-states/

In [1]:
#hide
%matplotlib inline
import math
import requests
import pandas as pd
import numpy as np
import altair as alt
from IPython.display import HTML
import matplotlib.pyplot as plt

states_url = "https://covidtracking.com/api/states/daily"
us_url = "https://covidtracking.com/api/us/daily"
case_threshold = 100

r = requests.get(states_url)
states_df = pd.DataFrame(r.json())
states_df['date'] = pd.to_datetime(states_df.date, format="%Y%m%d")
states_df = states_df[['date', 'state', 'positive', 'death']].sort_values('date')
cols = {}
for state in states_df.state.unique():
    cases = states_df[(states_df.state == state) & (states_df.positive > case_threshold)]
    cases = cases.reset_index().positive.reset_index(drop=True)
    if len(cases) > 1:
        cols[state] = cases
        
r = requests.get(us_url)
us_df = pd.DataFrame(r.json())
us_df['date'] = pd.to_datetime(us_df.date, format="%Y%m%d")
us_df = us_df[['date', 'positive', 'death']].sort_values('date')
us_df['state'] = 'US'
cols['US'] = us_df.positive.reset_index(drop=True)

us_df['state'] = 'US'
states_df = states_df.append(us_df, ignore_index=True)

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,


In [2]:
SINCE_CASES_NUM = 100
MIN_CASES = 200

st = states_df[states_df['positive'].ge(MIN_CASES)
                     ].sort_values(by='positive', ascending=False)
states = st['state'].values
df = states_df[states_df['state'].isin(states)].copy()

days_since = (df.assign(F=df['positive'].ge(SINCE_CASES_NUM))
              .set_index('date')
              .groupby('state')['F'].transform('idxmax'))

df['Days since 100 cases'] = (df['date'] - days_since.values).dt.days.values

days_since = (df.assign(F=df['death'].ge(1))
              .set_index('date')
              .groupby('state')['F'].transform('idxmax'))

df['Days since first death'] = (df['date'] - days_since.values).dt.days.values

df = df.rename(columns={"positive": "Confirmed Cases", "state": "State", "death": "Confirmed Deaths"})
dfc = df[df['Days since 100 cases'].ge(0)]
dfd = df[df['Days since first death'].ge(0)]

In [3]:
baseline_states = ['CA', 'WA', 'NY']
max_date = dfc['date'].max()
color_domain = list(states_df.state.unique())

def make_since_chart(highlight_states=[], baseline_states=baseline_states):
    selection = alt.selection_multi(fields=['State'], bind='legend', 
                                    init=[{'State': x} for x in highlight_states + baseline_states])

    base = alt.Chart(dfc, width=550).encode(
        x='Days since 100 cases:Q',
        y=alt.Y('Confirmed Cases:Q', scale=alt.Scale(type='log')),
        color=alt.Color(
            'State:N',
            scale=alt.Scale(scheme="category20b"),
            legend=alt.Legend(columns=len(color_domain)//18+1, symbolLimit=len(color_domain))),
        tooltip=list(dfc),
        opacity=alt.condition(selection, alt.value(1), alt.value(0.05))
    )
    max_day = dfc['Days since 100 cases'].max()
    ref = pd.DataFrame([[x, 100*1.33**x] for x in range(max_day+1)], columns=['Days since 100 cases', 'Confirmed Cases'])
    base_ref = alt.Chart(ref).encode(x='Days since 100 cases:Q', y='Confirmed Cases:Q')
    return (
        base_ref.mark_line(color='black', opacity=.5, strokeDash=[3,3]) +
        base_ref.transform_filter(
            alt.datum['Days since 100 cases'] >= max_day
        ).mark_text(dy=-6, align='right', fontSize=10, text='33% Daily Growth') +
        base.mark_line(point=True).add_selection(selection) + 
        base.transform_filter(
            alt.datum['date'] >= int(max_date.timestamp() * 1000)
        ).mark_text(dy=-8, align='right', fontWeight='bold').encode(text='State:N')
    ).properties(
        title=f"Compare {', '.join(highlight_states)} trajectory with {', '.join(baseline_states)}"
    )

In [4]:
make_since_chart(['US'])

In [5]:
baseline_states = ['CA', 'WA', 'NY']
max_date = dfd['date'].max()
color_domain = list(states_df.state.unique())

def make_death_chart(highlight_states=[], baseline_states=baseline_states):
    selection = alt.selection_multi(fields=['State'], bind='legend', 
                                    init=[{'State': x} for x in highlight_states + baseline_states])

    base = alt.Chart(dfd, width=550).encode(
        x='Days since first death:Q',
        y=alt.Y('Confirmed Deaths:Q', scale=alt.Scale(type='log')),
        color=alt.Color(
            'State:N',
            scale=alt.Scale(scheme="category20b"),
            legend=alt.Legend(columns=len(color_domain)//18+1, symbolLimit=len(color_domain))),
        tooltip=list(df),
        opacity=alt.condition(selection, alt.value(1), alt.value(0.05))
    )
    max_day = dfd['Days since first death'].max()
    ref = pd.DataFrame([[x, 20*1.33**x] for x in range(max_day+1)], columns=['Days since first death', 'Confirmed Deaths'])
    base_ref = alt.Chart(ref).encode(x='Days since first death:Q', y='Confirmed Deaths:Q')
    return (
        base_ref.mark_line(color='black', opacity=.5, strokeDash=[3,3]) +
        base_ref.transform_filter(
            alt.datum['Days since 100 cases'] >= max_day
        ).mark_text(dy=-6, align='right', fontSize=10, text='33% Daily Growth') +
        base.mark_line(point=True).add_selection(selection) + 
        base.transform_filter(
            alt.datum['date'] >= int(max_date.timestamp() * 1000)
        ).mark_text(dy=-8, align='right', fontWeight='bold').encode(text='State:N')
    ).properties(
        title=f"Compare {', '.join(highlight_states)} trajectory with {', '.join(baseline_states)}"
    )

In [6]:
make_death_chart(['US'])

In [7]:
#hide_input
base = alt.Chart(dfc, width=600).encode(
    x='Days since 100 cases:Q',
    y=alt.Y('Confirmed Cases:Q', scale=alt.Scale(type='log')),
    color=alt.Color('State:N', scale=alt.Scale(scheme='category10'), legend=None),
    tooltip=['State', 'date', 'Confirmed Cases', 'Days since 100 cases']
)

State_selection = alt.selection_single(
    name='Select', fields=['State'], 
    bind=alt.binding_select(options=list(sorted(set(states) - set(baseline_states)))),
    init={'State': 'US'})

date_filter = alt.datum['date'] >= int(max_date.timestamp() * 1000)
base2 = base.transform_filter(alt.FieldOneOfPredicate('State', baseline_states))
base3 = base.transform_filter(State_selection)
base4 = base3.transform_filter(date_filter)

max_day = dfc['Days since 100 cases'].max()
ref = pd.DataFrame([[x, 100*1.33**x] for x in range(max_day+1)], columns=['Days since 100 cases', 'Confirmed Cases'])
base_ref = alt.Chart(ref).encode(x='Days since 100 cases:Q', y='Confirmed Cases:Q')
base_ref_f = base_ref.transform_filter(alt.datum['Days since 100 cases'] >= max_day)

chart5 = (
 base_ref.mark_line(color='black', opacity=.5, strokeDash=[3,3]) + 
 base_ref_f.mark_text(dy=-6, align='right', fontSize=10, text='33% Daily Growth') + 
 base2.mark_line(point=True, tooltip=True) +
 base3.mark_line(point={'size':50}, tooltip=True) +
 base2.transform_filter(date_filter).mark_text(dy=-8, align='right').encode(text='State:N') +
 base4.mark_text(dx=8, align='left', fontWeight='bold').encode(text='State:N') +
 base4.mark_text(dx=8, dy=12, align='left', fontWeight='bold').encode(text='Confirmed Cases:Q')
).add_selection(State_selection).properties(
    title=f"State's Trajectory compared to {', '.join(baseline_states)}"
)
chart5

In [8]:
#hide_input
base = alt.Chart(dfd, width=600).encode(
    x='Days since first death:Q',
    y=alt.Y('Confirmed Deaths:Q', scale=alt.Scale(type='log')),
    color=alt.Color('State:N', scale=alt.Scale(scheme='category10'), legend=None),
    tooltip=['State', 'date', 'Confirmed Deaths', 'Days since first death']
)

State_selection = alt.selection_single(
    name='Select', fields=['State'], 
    bind=alt.binding_select(options=list(sorted(set(states) - set(baseline_states)))),
    init={'State': 'US'})

date_filter = alt.datum['date'] >= int(max_date.timestamp() * 1000)
base2 = base.transform_filter(alt.FieldOneOfPredicate('State', baseline_states))
base3 = base.transform_filter(State_selection)
base4 = base3.transform_filter(date_filter)

max_day = dfd['Days since first death'].max()
ref = pd.DataFrame([[x, 20*1.33**x] for x in range(max_day+1)], columns=['Days since first death', 'Confirmed Deaths'])
base_ref = alt.Chart(ref).encode(x='Days since first death:Q', y='Confirmed Deaths:Q')
base_ref_f = base_ref.transform_filter(alt.datum['Days since first death'] >= max_day)

chart5 = (
 base_ref.mark_line(color='black', opacity=.5, strokeDash=[3,3]) + 
 base_ref_f.mark_text(dy=-6, align='right', fontSize=10, text='33% Daily Growth') + 
 base2.mark_line(point=True, tooltip=True) +
 base3.mark_line(point={'size':50}, tooltip=True) +
 base2.transform_filter(date_filter).mark_text(dy=-8, align='right').encode(text='State:N') +
 base4.mark_text(dx=8, align='left', fontWeight='bold').encode(text='State:N') +
 base4.mark_text(dx=8, dy=12, align='left', fontWeight='bold').encode(text='Confirmed Deaths:Q')
).add_selection(State_selection).properties(
    title=f"State's Trajectory compared to {', '.join(baseline_states)}"
)
chart5

This visualization was made by [Rob Zinkov](https://twitter.com/zaxtax)[^1].

[^1]:  Data sourced from ["The COVID Tracking Project"](https://covidtracking.com/). Link to [original notebook](https://github.com/avyfain/covid19/blob/master/covid19.ipynb).  Updated hourly by [GitHub Actions](https://github.com/features/actions).