# COVID-19 report analysis

Utilizes the daily data reporting from Johns Hopkins University Center for Systems Science and Engineering (JHU CSSE):
https://systems.jhu.edu/

Using the introductory Data Science Tables (for reference see http://data8.org/datascience/tables.html) of introductory courses

In [None]:
# HIDDEN
# This useful nonsense should just go at the top of your notebook.
from datascience import *
%matplotlib inline
#%matplotlib notebook
import matplotlib.pyplot as plots
import numpy as np
plots.style.use('fivethirtyeight')
plots.rc('lines', linewidth=2, color='r')
from ipywidgets import interact
import ipywidgets as widgets
# datascience version number of last run of this notebook
version.__version__

In [None]:
import sys
sys.path.append(".")
from timetable import TimeTable

import locale
locale.setlocale( locale.LC_ALL, 'en_US.UTF-8' ) 

import os
import datetime

# Parsing and cleaning
def denan(v):
    return v if v != 'nan' else np.nan

def clean(tbl):
    for lbl in tbl.labels:
        tbl[lbl] = tbl.apply(denan, lbl)

def is_state(name):
    return not ',' in name
def is_county(name):
    return ',' in name
def getstate(name):
    county, state = name.split(', ')
    return state
def getcounty(name):
    county, state = name.split(', ')
    return county

# Tools for working with timestamps
def afterday(refday, day):
    return datetime.datetime.strptime(refday, "%m/%d/%y") <= datetime.datetime.strptime(day, "%m/%d/%y")

def after(trend, refday):
    return trend.where(trend.apply(lambda day: afterday(refday, day), trend.time_column))

In [None]:
# Tools for working with content
def by_country(raw_world):
    res = raw_world.drop(['Province/State', 'Lat', 'Long']).group('Country/Region', sum)
    for lbl in res.labels[1:] :
        res.relabel(lbl, lbl[:-4])
    return res

In [None]:
# Projecting growth rates

def incday(day, ndays=1):
    date =  datetime.datetime.strptime(day, "%m/%d/%y") + datetime.timedelta(days=ndays)
    return datetime.datetime.strftime(date, "%m/%d/%y")

def ave_growth(trend, window=4):
    """Average recent growth rate of single trend"""
    return np.mean(trend.take[-window:]['rate'])

def project_trend(trend, num_days, rate=None):
    if rate :
        growth_rate = rate
    else :
        growth_rate = ave_growth(trend)
    day = trend.last('Day')
    val = trend.last(1)
    growth = trend.last('new')
    pnew = trend.last('% new')
    proj = trend.copy()
    for i in range(num_days):
        day = incday(day)
        growth = round(growth * growth_rate)
        val = val + growth
        pnew = growth/val
        proj.append((day, val, growth, pnew, growth_rate))
    return proj

In [None]:
# Pull the most recent data
if (not os.path.exists('./COVID-19')) :
    os.system('git clone https://github.com/CSSEGISandData/COVID-19')
else :
    os.system('pushd ./COVID19; git pull; popd')

In [None]:
# Raw data of confirmed cases
raw_confirmed = Table.read_table('./COVID-19/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Confirmed.csv')
raw_confirmed

In [None]:
raw_by_country = by_country(raw_confirmed)
countries_by_day = TimeTable.transpose(raw_by_country, 'Country/Region')
after(countries_by_day, "3/15/20")

In [None]:
# What is the last day reported
countries_by_day.last('Day')

### How many of the 195 recognized countries in the world have reported cases?

This does include some non-countries, like Princess Cruises

In [None]:
raw_by_country.num_rows

### Total confirmed cases worldwide

In [None]:
total_confirmed = countries_by_day.select('Day')
total_confirmed['Worldwide'] = countries_by_day.sum_rows()
total_confirmed.obar()

In [None]:
# Breaking this picture down by largest caseloads

countries_by_day.stackbar(10, height=6)

In [None]:
# Recent picture of countries with most cases
countries_by_day.top(15).take[-10:]

In [None]:
total_confirmed.trend().take[-10:]

In [None]:
project_trend(total_confirmed.trend().take[-10:], 10).show()

In [None]:
project_trend(total_confirmed.trend().take[-10:], 10).select(range(3)).bar('Day')

In [None]:
countries_by_day.top(5).trend().take[-10:]

### Confirmed cases in one country

In [None]:
w = widgets.Dropdown(
    options=countries_by_day.categories,
    value='US',
    # rows=10,
    description='Country:',
    disabled=False
)
w

In [None]:
country = w.value

In [None]:
country_trend = countries_by_day.extract(country).trend()
after(country_trend, '3/5/20').show()

In [None]:
recent = after(country_trend,'3/11/20')
recent.extract([country, 'new']).bar('Day', height=5)

In [None]:
projection = project_trend(recent, 10)
projection.show()

In [None]:
projection.extract([country, 'new']).bar('Day')

### State/Province level

What countries are further broken down by state

In [None]:
countries_with_states = list(np.unique(raw_confirmed.where(raw_confirmed['Province/State'] != 'nan')['Country/Region']))

cw = widgets.Dropdown(
    options=countries_with_states,
    value='US',
    # rows=10,
    description='Country:',
    disabled=False
)
cw

In [None]:
sel_country = cw.value
sel_country

In [None]:
raw_sel_confirmed = raw_confirmed.where('Country/Region', sel_country)
raw_sel_confirmed

In [None]:
raw_by_State_confirmed = raw_sel_confirmed.where(raw_sel_confirmed.apply(is_state, 'Province/State'))
states_by_day = TimeTable.transpose(raw_by_State_confirmed.drop(['Country/Region', 'Lat', 'Long']), 'Province/State', 'Day')
recent_by_State = after(states_by_day,'3/10/20')
recent_by_State = recent_by_State.order_cols()
recent_by_State.show()

In [None]:
recent_by_State.stackbar(15, height=6)

# State level analysis

In [None]:
sw = widgets.Dropdown(
    options=recent_by_State.categories,
    value=recent_by_State.categories[0],
    # rows=10,
    description='State/Province:',
    disabled=False
)
sw

In [None]:
state = sw.value

In [None]:
state_trend = recent_by_State.extract(state).trend()
state_trend

In [None]:
state_trend.extract(['Day', state, 'new']).bar('Day', height=5)

In [None]:
project_trend(state_trend, 10).show()

In [None]:
project_trend(state_trend, 10).extract([state, 'new']).bar('Day')

### County level reporting - not there recently

In [None]:
raw_US_confirmed = raw_confirmed.where('Country/Region', 'US')
raw_US_by_County_confirmed = raw_US_confirmed.where(raw_US_confirmed.apply(is_county, 'Province/State'))
raw_US_by_County_confirmed['State'] = raw_US_by_County_confirmed.apply(getstate, 'Province/State')
raw_US_by_County_confirmed['County'] = raw_US_by_County_confirmed.apply(getcounty, 'Province/State')
raw_US_by_County_confirmed.move_to_start('County')
raw_US_by_County_confirmed.move_to_start('State')
raw_US_by_County_confirmed.sort('State')

In [None]:
raw_US_by_County_confirmed.where('State', 'CA')

In [None]:
raw_CA_county = raw_US_by_County_confirmed.where('State', 'CA').drop(['State', 'Province/State', 'Country/Region', 'Lat', 'Long'])
CA_county_by_day = TimeTable.transpose(raw_CA_county, 'County', 'Day')
CA_county_by_day.show()