In [2]:
import pandas as pd
import requests
import io
import altair as alt

In [6]:
#request from api

base_url = 'https://covidtracking.com/api/'

states_daily = 'states/daily.csv'
us_daily = 'us/daily.csv'


# request from api
request_states = requests.get(base_url + states_daily)
request_us = requests.get(base_url + us_daily)


# get content from requests
states_resp = request_states.content

us_resp = request_us.content

# close request connection
request_states.close()
request_us.close()

In [7]:
# read api responses as BytesIO objects (b/c the response returns bytes object)
# what I googled to figure this out: "pandas read csv from byte string"
states_df = pd.read_csv(io.BytesIO(states_resp))
us_df = pd.read_csv(io.BytesIO(us_resp))

# convert date string col to datetime obj
# what I googled: pandas convert string to datetime
states_df['date'] = pd.to_datetime(states_df['date'], format='%Y%m%d')
us_df['date'] = pd.to_datetime(us_df['date'], format='%Y%m%d')


In [15]:
states_df.head()

Unnamed: 0,date,state,positive,negative,pending,death,total,dateChecked
0,2020-03-19,AK,6,400.0,,,406,2020-03-19T20:00:00Z
1,2020-03-19,AL,68,28.0,,0.0,96,2020-03-19T20:00:00Z
2,2020-03-19,AR,46,310.0,113.0,,469,2020-03-19T20:00:00Z
3,2020-03-19,AS,0,,,0.0,0,2020-03-19T20:00:00Z
4,2020-03-19,AZ,44,175.0,130.0,0.0,349,2020-03-19T20:00:00Z


In [16]:
us_df.head()

Unnamed: 0,date,states,positive,negative,posNeg,pending,death,total
0,2020-03-04,14,118,748,866,103,,969
1,2020-03-05,24,176,953,1129,197,,1326
2,2020-03-06,36,223,1571,1794,458,,2252
3,2020-03-07,51,341,1809,2150,602,,2752
4,2020-03-08,51,417,2335,2752,347,,3099


In [8]:
states_positive_quantiles = (states_df
                             .groupby(['state'], as_index=False)
                             .agg({'positive': sum})
                            )

states_positive_quantiles['quantile'] = pd.qcut(states_positive_quantiles['positive'], 5, labels=['first','second','third','fourth','fifth'])

In [9]:
us_agg = (us_df[["date", "positive"]].groupby(["date"], as_index=False).sum())

In [10]:
states_agg = (states_df[["state", "date", "positive"]]
              .groupby(["date", "state"], as_index=False).sum()
              .merge(states_positive_quantiles[['state','quantile']], on='state'))

In [11]:
# ripped off/modified https://altair-viz.github.io/gallery/multiline_tooltip.html

# could modify this to aggregate on something other than positive by adding to args and raplaceing 'positive:Q' with that variable

def plot_positive_by_quantile(state_df, quantile):
    """plot df based on quantile string value
    """
    data = state_df[state_df['quantile']==quantile]
    
    line = (alt.Chart(data).mark_line(interpolate='basis').encode(
        alt.X('date:T', axis=alt.Axis(format='%b %d')),
        alt.Y('positive:Q', title='positive cases'),
        alt.Color('state:N', legend=alt.Legend(title="States"))))
    
    nearest = alt.selection(type='single', nearest=True, on='mouseover',
                        fields=['date'], empty='none')
    
    selectors = alt.Chart(data).mark_point().encode(
        x='date:T',
        opacity=alt.value(0)).add_selection(nearest)
    
    points = line.mark_point().encode(
        opacity=alt.condition(nearest, alt.value(1), alt.value(0)))
    
    text_value = line.mark_text(align='left', dx=5, dy=-5).encode(
        text=alt.condition(nearest, 'positive:Q', alt.value(' ')))
    
    text_state = line.mark_text(align='right', dx=-5, dy=-5).encode(
        text=alt.condition(nearest, 'state:N', alt.value(' ')))
    
    rules = alt.Chart(data).mark_rule(color='gray').encode(
        x='date:T',
            ).transform_filter(nearest)
    
    return (alt.layer(
    line, selectors, points, rules, text_value, text_state).properties(
    width=600, height=600))

### Plots by state and quantile

You can mouse over for numbers, but might want to log transform to get the scale a little easier to read


Apparently NY is really killing it...

In [12]:
plot_positive_by_quantile(states_agg, 'fifth')

In [13]:
plot_positive_by_quantile(states_agg, 'fourth')

### Overall US deaths

In [19]:
us_agg = us_df.groupby(["date"], as_index=False).agg({"death": sum, "positive":sum, "pending":sum})

alt.Chart(us_agg).mark_line(interpolate='basis').encode(
    alt.X('date:T', axis=alt.Axis(format='%b %d')),
    alt.Y('death:Q')
)

In [20]:
# y no flattening of curve???
alt.Chart(us_agg).mark_line(interpolate='basis').encode(
    alt.X('date:T', axis=alt.Axis(format='%b %d')),
    alt.Y('positive:Q')
)

In [21]:
# pending tests
alt.Chart(us_agg).mark_line(interpolate='basis').encode(
    alt.X('date:T', axis=alt.Axis(format='%b %d')),
    alt.Y('pending:Q')
)