In [1]:
import datetime as dt
today = dt.datetime.now() 
print('This notebook was last updated on', today.strftime('%A %B %D at %H:%M'))

This notebook was last updated on Sunday June 06/28/20 at 10:08


In [2]:
import sys
import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
import datetime as dt
from matplotlib import rc
from matplotlib import ticker
from matplotlib import dates as mdates

rc('text', usetex=True)
rc('text.latex', preamble=r'\usepackage{cmbright}')
rc('font', **{'family': 'sans-serif', 'sans-serif': ['Helvetica']})

%matplotlib inline
%config InlineBackend.figure_formats = {'png', 'retina'}
rc = {'lines.linewidth': 2, 
      'axes.labelsize': 18, 
      'axes.titlesize': 18, 
      'axes.facecolor': 'DFDFE5'}
sns.set_context('notebook', rc=rc)
sns.set_style("dark")

mpl.rcParams['xtick.labelsize'] = 16 
mpl.rcParams['ytick.labelsize'] = 16 
mpl.rcParams['legend.fontsize'] = 14

sys.path.append('./utils')

# see https://github.com/dangeles/dangeles.github.io/blob/master/jupyter/utils/covid_utils.py
import covid_utils as cv 

In [25]:
# load into a dataframe:
pop = pd.read_excel('../data/county_pops.xlsx', comment='#', header=1)

# fetch NYT data:
url = 'https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-counties.csv'
df = pd.read_csv(url, usecols=[0, 1, 3, 4], parse_dates=['date'], squeeze=True)

pop.columns = np.append(np.array(['county']), pop.columns[1:].values)
pop.county = pop.county.str.strip('.')
pop.county = pop.county.str.split('County,', expand=True)[0].str.strip()


# merge dfs:
df = df.merge(pop, left_on='county', right_on='county')

# df['normedCases'] = df.cases/ df[2019]
# df['normedDeaths'] = df.deaths / df[2019]

cases = df.groupby('county').cases.apply(max).sum()
# death_toll = df.groupby('county').deaths.apply(max).sum()
print('Cases in the US at last update: {0:.2f}'.format(cases / 10 ** 6), 'million')
# print('Death toll in the US at last update: {0:.0f} thousand'.format(death_toll / 10 ** 3)) 

Cases in the US at last update: 1.94 million


In [21]:
df.head()

Unnamed: 0,date,county,fips,cases
0,2020-01-21,Snohomish,53061.0,1
1,2020-01-22,Snohomish,53061.0,1
2,2020-01-23,Snohomish,53061.0,1
3,2020-01-24,Cook,17031.0,1
4,2020-01-24,Snohomish,53061.0,1


In [22]:
df.merge(pop, left_on='county', right_on='county')

Unnamed: 0,date,county,fips,cases,Census,Estimates Base,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019
0,2020-01-21,Snohomish,53061.0,1,713335,713299,715500,722068,732097,744112,757239,769698,787110,802089,813059,822083
1,2020-01-22,Snohomish,53061.0,1,713335,713299,715500,722068,732097,744112,757239,769698,787110,802089,813059,822083
2,2020-01-23,Snohomish,53061.0,1,713335,713299,715500,722068,732097,744112,757239,769698,787110,802089,813059,822083
3,2020-01-24,Snohomish,53061.0,1,713335,713299,715500,722068,732097,744112,757239,769698,787110,802089,813059,822083
4,2020-01-25,Snohomish,53061.0,1,713335,713299,715500,722068,732097,744112,757239,769698,787110,802089,813059,822083
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1306166,2020-06-27,Boundary,16021.0,1,10972,10972,10999,10823,10817,10853,10956,11299,11701,11965,12016,12245
1306167,2020-06-26,Eureka,32011.0,1,1987,1987,1990,1977,1991,2049,1988,2029,1936,1946,1998,2029
1306168,2020-06-27,Eureka,32011.0,1,1987,1987,1990,1977,1991,2049,1988,2029,1936,1946,1998,2029
1306169,2020-06-26,Hettinger,38041.0,1,2477,2478,2476,2504,2542,2644,2627,2657,2597,2479,2503,2499


In [16]:
np.in1d(df.county.unique(), pop.county)

array([False, False, False, ..., False, False, False])

In [19]:
df[df.county.isin(pop.county)]

Unnamed: 0,date,county,fips,cases
