In [1]:
import sys
sys.path.append('../src')

In [2]:
from covid import daily_snapshot
from pathlib import Path

import population as pop
import pandas as pd
import requests

In [3]:
pd.set_option('max_rows', 100) 

In [4]:
url = 'https://github.com/CSSEGISandData/COVID-19/tree/master/csse_covid_19_data/csse_covid_19_daily_reports/03-31-2020.csv'

### Quick glance at the data

In [5]:
df = daily_snapshot('03-31-2020')
df.head()

Unnamed: 0_level_0,Unnamed: 0,Admin2,Province_State,Country_Region,Last_Update,Lat,Long_,Confirmed,Deaths,Recovered,Active,Combined_Key
FIPS,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
45001.0,,Abbeville,South Carolina,US,2020-03-31 23:43:56,34.223334,-82.461707,4,0,0,0,"Abbeville, South Carolina, US"
22001.0,,Acadia,Louisiana,US,2020-03-31 23:43:56,30.295065,-92.414197,39,1,0,0,"Acadia, Louisiana, US"
51001.0,,Accomack,Virginia,US,2020-03-31 23:43:56,37.767072,-75.632346,7,0,0,0,"Accomack, Virginia, US"
16001.0,,Ada,Idaho,US,2020-03-31 23:43:56,43.452658,-116.241552,195,3,0,0,"Ada, Idaho, US"
19001.0,,Adair,Iowa,US,2020-03-31 23:43:56,41.330756,-94.471059,1,0,0,0,"Adair, Iowa, US"


In [6]:
df[df.Country_Region=='US'].describe()

Unnamed: 0.1,Unnamed: 0,Lat,Long_,Confirmed,Deaths,Recovered,Active
count,0.0,2177.0,2177.0,2179.0,2179.0,2179.0,2179.0
mean,,37.226079,-88.658406,86.357045,1.777421,3.223497,0.0
std,,7.057726,18.503999,1004.773418,23.680408,150.472067,0.0
min,,0.0,-159.596679,0.0,0.0,0.0,0.0
25%,,33.953399,-95.539078,2.0,0.0,0.0,0.0
50%,,37.841579,-87.722071,5.0,0.0,0.0,0.0
75%,,41.408032,-81.956663,20.0,0.0,0.0,0.0
max,,64.807262,145.6739,43119.0,932.0,7024.0,0.0


### Aggregate by state

In [7]:
state_totals = df[df.Country_Region=='US'].groupby(by='Province_State').sum()

In [8]:
state_totals.describe()

Unnamed: 0.1,Unnamed: 0,Lat,Long_,Confirmed,Deaths,Recovered,Active
count,58.0,58.0,58.0,58.0,58.0,58.0,58.0
mean,0.0,1397.261622,-3327.74741,3244.344828,66.775862,121.103448,0.0
std,0.0,1154.954186,2887.627484,10179.628981,208.85066,922.296384,0.0
min,0.0,0.0,-13378.805429,0.0,0.0,0.0,0.0
25%,0.0,418.858961,-5190.570613,252.5,4.25,0.0,0.0
50%,0.0,1152.194028,-3017.705127,659.5,12.5,0.0,0.0
75%,0.0,2292.925088,-864.637952,2343.0,52.75,0.0,0.0
max,0.0,4531.701456,145.6739,75833.0,1550.0,7024.0,0.0


In [9]:
state = 'New Jersey'
state_covid = df[df.Province_State == state]
state_columns=['Admin2', 'Confirmed', 'Deaths', 'Recovered', 'Active', ]

In [10]:
state_covid[state_columns].sort_values(by='Confirmed', ascending=False)

Unnamed: 0_level_0,Admin2,Confirmed,Deaths,Recovered,Active
FIPS,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
,Unassigned,3686,247,0,0
34003.0,Bergen,2909,4,0,0
34013.0,Essex,1900,4,0,0
34017.0,Hudson,1606,2,0,0
34039.0,Union,1418,0,0,0
34031.0,Passaic,1294,1,0,0
34023.0,Middlesex,1277,0,0,0
34025.0,Monmouth,1140,2,0,0
34029.0,Ocean,1022,1,0,0
34027.0,Morris,841,1,0,0


In [11]:
state_covid[state_columns].sort_values(by='Deaths', ascending=False)

Unnamed: 0_level_0,Admin2,Confirmed,Deaths,Recovered,Active
FIPS,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
,Unassigned,3686,247,0,0
34013.0,Essex,1900,4,0,0
34003.0,Bergen,2909,4,0,0
34007.0,Camden,228,3,0,0
34017.0,Hudson,1606,2,0,0
34025.0,Monmouth,1140,2,0,0
34031.0,Passaic,1294,1,0,0
34011.0,Cumberland,18,1,0,0
34035.0,Somerset,413,1,0,0
34027.0,Morris,841,1,0,0


In [12]:
nj_county_population_url = 'https://www.newjersey-demographics.com/counties_by_population'

In [13]:
nj_pop = pop.nj_county_population(nj_county_population_url)

In [14]:
nj_pop

Unnamed: 0_level_0,county,pop2019
Rank,Unnamed: 1_level_1,Unnamed: 2_level_1
1,Bergen County,929999
2,Middlesex County,826698
3,Essex County,793555
4,Hudson County,668631
5,Monmouth County,623387
6,Ocean County,591939
7,Union County,553066
8,Camden County,507367
9,Passaic County,504041
10,Morris County,494383


In [15]:
nj_pop['county'] = nj_pop['county'].apply(lambda x: str(x)[:-7])

In [16]:
nj_pop.head(1)

Unnamed: 0_level_0,county,pop2019
Rank,Unnamed: 1_level_1,Unnamed: 2_level_1
1,Bergen,929999


In [17]:
state_with_population = pd.merge(state_covid[state_columns], nj_pop,left_on='Admin2', right_on='county')

state_with_population['fraction_confirmed'] = state_with_population['Confirmed'] / state_with_population['pop2019']   * 100.0
state_with_population['deaths']             = state_with_population['Deaths']    / state_with_population['pop2019']   * 100.0
state_with_population['death_rate?']        = state_with_population['Deaths']    / state_with_population['Confirmed'] * 100.0

In [18]:
state_with_population.sort_values(by='fraction_confirmed', ascending=False)

Unnamed: 0,Admin2,Confirmed,Deaths,Recovered,Active,county,pop2019,fraction_confirmed,deaths,death_rate?
1,Bergen,2909,4,0,0,Bergen,929999,0.312796,0.00043,0.137504
15,Passaic,1294,1,0,0,Passaic,504041,0.256725,0.000198,0.07728
19,Union,1418,0,0,0,Union,553066,0.256389,0.0,0.0
8,Hudson,1606,2,0,0,Hudson,668631,0.240192,0.000299,0.124533
6,Essex,1900,4,0,0,Essex,793555,0.239429,0.000504,0.210526
12,Monmouth,1140,2,0,0,Monmouth,623387,0.182872,0.000321,0.175439
14,Ocean,1022,1,0,0,Ocean,591939,0.172653,0.000169,0.097847
13,Morris,841,1,0,0,Morris,494383,0.170111,0.000202,0.118906
11,Middlesex,1277,0,0,0,Middlesex,826698,0.15447,0.0,0.0
17,Somerset,413,1,0,0,Somerset,330176,0.125085,0.000303,0.242131


In [19]:
state_with_population.sort_values(by='Deaths', ascending=False)

Unnamed: 0,Admin2,Confirmed,Deaths,Recovered,Active,county,pop2019,fraction_confirmed,deaths,death_rate?
6,Essex,1900,4,0,0,Essex,793555,0.239429,0.000504,0.210526
1,Bergen,2909,4,0,0,Bergen,929999,0.312796,0.00043,0.137504
3,Camden,228,3,0,0,Camden,507367,0.044938,0.000591,1.315789
8,Hudson,1606,2,0,0,Hudson,668631,0.240192,0.000299,0.124533
12,Monmouth,1140,2,0,0,Monmouth,623387,0.182872,0.000321,0.175439
14,Ocean,1022,1,0,0,Ocean,591939,0.172653,0.000169,0.097847
5,Cumberland,18,1,0,0,Cumberland,153400,0.011734,0.000652,5.555556
17,Somerset,413,1,0,0,Somerset,330176,0.125085,0.000303,0.242131
15,Passaic,1294,1,0,0,Passaic,504041,0.256725,0.000198,0.07728
13,Morris,841,1,0,0,Morris,494383,0.170111,0.000202,0.118906


In [20]:
state_with_population.sort_values(by='death_rate?', ascending=False)

Unnamed: 0,Admin2,Confirmed,Deaths,Recovered,Active,county,pop2019,fraction_confirmed,deaths,death_rate?
5,Cumberland,18,1,0,0,Cumberland,153400,0.011734,0.000652,5.555556
3,Camden,228,3,0,0,Camden,507367,0.044938,0.000591,1.315789
17,Somerset,413,1,0,0,Somerset,330176,0.125085,0.000303,0.242131
6,Essex,1900,4,0,0,Essex,793555,0.239429,0.000504,0.210526
12,Monmouth,1140,2,0,0,Monmouth,623387,0.182872,0.000321,0.175439
1,Bergen,2909,4,0,0,Bergen,929999,0.312796,0.00043,0.137504
8,Hudson,1606,2,0,0,Hudson,668631,0.240192,0.000299,0.124533
13,Morris,841,1,0,0,Morris,494383,0.170111,0.000202,0.118906
14,Ocean,1022,1,0,0,Ocean,591939,0.172653,0.000169,0.097847
15,Passaic,1294,1,0,0,Passaic,504041,0.256725,0.000198,0.07728
