In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

# Process Data

In [134]:
confirmed_df = pd.read_csv("../data/csse_covid_19_time_series/time_series_covid19_confirmed_US.csv")
deaths_df = pd.read_csv("../data/csse_covid_19_time_series/time_series_covid19_deaths_US.csv")

In [135]:
confirmed_df.rename(columns={"Admin2": "County"}, inplace=True)
deaths_df.rename(columns={"Admin2": "County"}, inplace=True)

In [106]:
confirmed_df.head()

Unnamed: 0,UID,iso2,iso3,code3,FIPS,County,Province_State,Country_Region,Lat,Long_,...,3/25/20,3/26/20,3/27/20,3/28/20,3/29/20,3/30/20,3/31/20,4/1/20,4/2/20,4/3/20
0,16,AS,ASM,16,60.0,,American Samoa,US,-14.271,-170.132,...,0,0,0,0,0,0,0,0,0,0
1,316,GU,GUM,316,66.0,,Guam,US,13.4443,144.7937,...,37,45,51,55,56,58,69,77,82,84
2,580,MP,MNP,580,69.0,,Northern Mariana Islands,US,15.0979,145.6739,...,0,0,0,0,0,0,2,6,6,6
3,630,PR,PRI,630,72.0,,Puerto Rico,US,18.2208,-66.5901,...,51,64,79,100,127,174,239,286,316,316
4,850,VI,VIR,850,78.0,,Virgin Islands,US,18.3358,-64.8963,...,17,17,19,22,0,0,30,30,30,37


# Create last_n_day_growth_rate

In [107]:
def avg_growth_rate_last_n_days(df, n_days):
    last_n_days = df.values[:, -n_days:]
    try:
        last_n_days = last_n_days.astype("int")
    except:
        raise Exception("Incorrectly specified number of days")
    
    delta_list = []
    for i in range(n_days-1):
        delta = (last_n_days[:,i+1] - last_n_days[:,i]) / np.maximum(last_n_days[:,i], 1)
        delta_list.append(delta)
    
    return np.mean(delta_list, axis=0)

## Use to Examine California

In [108]:
CA_df = confirmed_df.loc[confirmed_df['Province_State'] == "California"].copy()

In [109]:
ca_growth_last_3_days = avg_growth_rate_last_n_days(CA_df, 3)

In [110]:
CA_df["Location"] = "S"
CA_df.loc[CA_df["Lat"] > 36, "Location"] = "N"

In [111]:
CA_df['ROC'] = ca_growth_last_3_days

In [112]:
sorted_by_severity = CA_df.sort_values(by=["ROC"], ascending=False)

In [115]:
thresh = 20
sorted_by_severity.loc[sorted_by_severity["4/3/20"] > thresh, ["County", "ROC", "Location"]].head(20)

Unnamed: 0,County,ROC,Location
244,Tulare,0.36952,N
224,Sacramento,0.321912,N
226,San Bernardino,0.292414,S
205,Kern,0.254609,S
191,Alameda,0.190474,N
232,Santa Barbara,0.186732,S
202,Humboldt,0.179233,N
200,Fresno,0.176112,N
203,Imperial,0.170441,S
223,Riverside,0.16442,S


## Look at the whole US

In [136]:
national_growth_last_3_days = avg_growth_rate_last_n_days(confirmed_df, 3)
confirmed_df['ROC'] = national_growth_last_3_days

In [139]:
nat_sorted_by_severity = confirmed_df.sort_values(by=["ROC"], ascending=False)
thresh = 1000
nat_sorted_by_severity.loc[nat_sorted_by_severity["4/3/20"] > thresh, ["County", "Province_State", "ROC", "4/3/20"]].head(20)

Unnamed: 0,County,Province_State,ROC,4/3/20
1143,Jefferson,Louisiana,0.332717,2495
1153,Orleans,Louisiana,0.245489,3476
367,Miami-Dade,Florida,0.238127,3364
1798,Union,New Jersey,0.223714,2487
1794,Passaic,New Jersey,0.218819,2216
1787,Hudson,New Jersey,0.21869,2835
2299,Philadelphia,Pennsylvania,0.205437,2430
1285,Macomb,Michigan,0.197718,1560
1790,Middlesex,New Jersey,0.193069,2125
1226,Essex,Massachusetts,0.182771,1238
