In [1]:
%matplotlib inline

import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
import scipy
from datetime import date
from datetime import timedelta
plt.style.use('fivethirtyeight')
plt.rcParams['figure.figsize'] = 16, 10

In [2]:
base_path = 'csse_covid_19_data/csse_covid_19_daily_reports/'

start = date.fromisoformat('2020-02-29')
#start = date.fromisoformat('2020-03-22')
end = date.today()

df_all = pd.DataFrame(columns=['FIPS', 'Admin2', 'Province_State', 'Country_Region', 'Last_Update',
                              'Lat', 'Long_', 'Confirmed', 'Deaths', 'Recovered', 'Active', 'Combined_Key'])

def rename_column_if_exists(df, old_col_name, new_col_name):
    if old_col_name in df.columns:
        df.rename(columns = {old_col_name: new_col_name}, inplace=True)
        
def str_join(df, sep, *cols):
    from functools import reduce
    return reduce(lambda x, y: x.astype(str).str.cat(y.astype(str), sep=sep), 
                    [df[col] for col in cols])

def create_combined_key_if_missing(df):
    if not 'Combined_Key' in df.columns:
        df.insert(len(df.columns), 'Combined_Key', np.NaN)
    df_missing = df['Combined_Key'].isna()
    df.loc[df_missing,'Combined_Key'] = df.loc[df_missing,:].Province_State.astype(str).str.cat(df.loc[df_missing,:].Country_Region.astype(str), sep=', ', na_rep='')
    

d = end - start
print(d.days)
for dayindex in range(0,d.days+1):
    day = start + timedelta(days=dayindex)
    filename = '{:%m-%d-%Y}'.format(day)
    #print(base_path + filename + '.csv')
    df_input = pd.read_csv(base_path + filename + '.csv')
    rename_column_if_exists(df_input, 'Province/State', 'Province_State')
    rename_column_if_exists(df_input, 'Country/Region', 'Country_Region')
    rename_column_if_exists(df_input, 'Last Update', 'Last_Update')
    rename_column_if_exists(df_input, 'Latitude', 'Lat')
    rename_column_if_exists(df_input, 'Longitude', 'Long_')
    create_combined_key_if_missing(df_input)
    df_all = df_all.append(df_input)
    
df_all.loc[df_all['Province_State']=='California',:]


#df_raw = pd.read_csv('csse_covid_19_data/csse_covid_19_daily_reports/03-30-2020.csv')
#df_raw.head(30)

30


Unnamed: 0,FIPS,Admin2,Province_State,Country_Region,Last_Update,Lat,Long_,Confirmed,Deaths,Recovered,Active,Combined_Key
40,,,California,US,2020-03-10T19:13:28,36.116200,-119.681600,144,2,2,,"California, US"
39,,,California,US,2020-03-11T23:53:03,36.116200,-119.681600,177,3,2,,"California, US"
37,,,California,US,2020-03-12T23:44:33,36.116200,-119.681600,221,4,6,,"California, US"
175,,,California,US,2020-03-11T20:00:00,36.116203,-119.681564,282,4,6,,"California, US"
32,,,California,US,2020-03-14T22:13:32,36.116200,-119.681600,340,5,6,,"California, US"
...,...,...,...,...,...,...,...,...,...,...,...,...
2840,6109,Tuolumne,California,US,2020-03-30 22:52:45,38.026440,-119.952509,0,0,0,0,"Tuolumne, California, US"
2858,,Unassigned,California,US,2020-03-30 22:52:45,0.000000,0.000000,0,0,0,0,"Unassigned, California, US"
2925,6111,Ventura,California,US,2020-03-30 22:52:45,34.444657,-119.091061,109,4,0,0,"Ventura, California, US"
3164,6113,Yolo,California,US,2020-03-30 22:52:45,38.682789,-121.901829,16,1,0,0,"Yolo, California, US"
