In [1]:
import pandas as pd
import numpy as np
import math

import plotly.offline as py
import plotly.graph_objs as go
py.offline.init_notebook_mode(connected=True)

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [2]:
def PercentFormatter(x):
    return "{:.0%}".format(x) if not math.isnan(x) and not math.isinf(x) else "-" 

In [3]:
# Read all the files
dfConfirmedGlobal = pd.read_csv(r'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv')
dfDeathsGlobal = pd.read_csv(r'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv')

baseURLFormat = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/{}.csv'
filePaths = [baseURLFormat.format(d.strftime('%m-%d-%Y')) for d in pd.date_range('2020-03-23', pd.to_datetime('today')).tolist()]

df_from_each_file = []
for f in filePaths:
    try:        
        df_from_each_file.append(pd.read_csv(f))
    except:
        break
dfByDay = pd.concat(df_from_each_file, ignore_index=True)

dfConfirmedUS = pd.read_csv(r'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/archived_data/archived_time_series/time_series_19-covid-Confirmed_archived_0325.csv')
dfConfirmedUS = dfConfirmedUS[dfConfirmedUS.columns[:-1]]
dfDeathsUS = pd.read_csv(r'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/archived_data/archived_time_series/time_series_19-covid-Deaths_archived_0325.csv')
dfDeathsUS = dfDeathsUS[dfDeathsUS.columns[:-1]]

In [4]:
# Get US data only
dfConfirmedUS = dfConfirmedUS [(dfConfirmedUS['Country/Region'] == 'US') & (dfConfirmedUS['Province/State'] != 'US')]
dfDeathsUS = dfDeathsUS [(dfDeathsUS['Country/Region'] == 'US') & (dfDeathsUS['Province/State'] != 'US')]

In [6]:
# Merge columns after 3/23 when the timeseries broke
dfByDayState = dfByDay[dfByDay.Country_Region == 'US'].groupby(['Last_Update', 'Province_State']).agg({'Confirmed':sum, 'Deaths':sum}).unstack(level=0)
dfByDayStateConfirmed = dfByDayState[['Confirmed']]
dfByDayStateConfirmed.columns = dfByDayStateConfirmed.columns.droplevel().map(lambda x: pd.to_datetime(x).strftime('%#m/%d/%y'))

dfConfirmedUSStates = dfConfirmedUS.merge(dfByDayStateConfirmed, how='left', left_on='Province/State', right_on='Province_State')
dfConfirmedUSStates[dfConfirmedUSStates.columns[4:]] = dfConfirmedUSStates[dfConfirmedUSStates.columns[4:]].fillna(0).astype(np.int64)
dfConfirmedUSStates = dfConfirmedUSStates.sort_values(dfConfirmedUSStates.columns[-1], ascending = False)

dfByDayStateDeaths = dfByDayState[['Deaths']]
dfByDayStateDeaths.columns = dfByDayStateDeaths.columns.droplevel().map(lambda x: pd.to_datetime(x).strftime('%#m/%d/%y'))
dfDeathsUSStates = dfDeathsUS.merge(dfByDayStateDeaths, how='left', left_on='Province/State', right_on='Province_State')
dfDeathsUSStates[dfDeathsUSStates.columns[4:]] = dfDeathsUSStates[dfDeathsUSStates.columns[4:]].fillna(0).astype(np.int64)
dfDeathsUSStates = dfDeathsUSStates.sort_values(dfDeathsUSStates.columns[-1], ascending = False)

In [6]:
dfWAConfirmed = dfConfirmedUSStates[dfConfirmedUSStates['Province/State'] == 'Washington']
dfWADeaths =  dfDeathsUSStates[dfDeathsUSStates['Province/State'] == 'Washington']

trace1 = go.Scatter(
    x = dfWAConfirmed.columns[50:],
    y = dfWAConfirmed.iloc[0][50:], name = 'Confirmed Cases')

trace2 = go.Scatter(
    x = dfWADeaths.columns[50:],
    y = dfWADeaths.iloc[0][50:], name = 'Deaths')

data = [trace1, trace2]
layout = dict(title = 'WA state - Cumulative Cases and Deaths')

py.iplot(dict(data=data, layout=layout))

dfWAConfirmed[dfWAConfirmed.columns[50:]]
dfWADeaths[dfWAConfirmed.columns[50:]]


Unnamed: 0,3/8/20,3/9/20,3/10/20,3/11/20,3/12/20,3/13/20,3/14/20,3/15/20,3/16/20,3/17/20,3/18/20,3/19/20,3/20/20,3/21/20,3/22/20,3/23/20,3/24/20,3/25/20,3/26/20,3/27/20
0,0,0,267,366,442,568,572,643,904,1076,1014,1376,1524,1793,1996,2221,2328,2591,3207,3477


Unnamed: 0,3/8/20,3/9/20,3/10/20,3/11/20,3/12/20,3/13/20,3/14/20,3/15/20,3/16/20,3/17/20,3/18/20,3/19/20,3/20/20,3/21/20,3/22/20,3/23/20,3/24/20,3/25/20,3/26/20,3/27/20
0,0,0,23,29,31,37,37,40,48,55,55,74,83,94,95,109,116,133,150,157


In [7]:
dfWAConfirmedDailyNew = dfWAConfirmed[dfWAConfirmed.columns[50:]].diff(axis=1)
dfWADeathsDailyNew = dfWADeaths[dfWADeaths.columns[50:]].diff(axis=1)


trace1 = go.Bar(
    x = dfWAConfirmedDailyNew.columns,
    y = dfWAConfirmedDailyNew.iloc[0], name = 'New Confirmed Cases')

trace2 = go.Bar(
    x = dfWADeathsDailyNew.columns,
    y = dfWADeathsDailyNew.iloc[0], name = 'New Deaths')

data = [trace1, trace2]
layout = dict(title = 'WA state - Daily Cases and Deaths')

py.iplot(dict(data=data, layout=layout))

In [8]:
dfWAByCounty = dfByDay[dfByDay.Province_State == 'Washington'].groupby(['Last_Update', 'Admin2']).agg({'Confirmed':sum, 'Deaths':sum}).unstack(level=0)
dfWAByCountyConfirmed = dfWAByCounty.Confirmed
dfWAByCountyConfirmed.columns = dfWAByCountyConfirmed.columns.map(lambda x: pd.to_datetime(x).strftime('%#m/%d/%y'))
# dfWAByCountyConfirmed[dfWAByCountyConfirmed.columns[4:]] = dfConfirmedUSStates[dfConfirmedUSStates.columns[4:]].fillna(0).astype(np.int64)
dfWAByCountyConfirmed = dfWAByCountyConfirmed.sort_values(dfWAByCountyConfirmed.columns[-1], ascending = False)
dfWAByCountyConfirmedTop = dfWAByCountyConfirmed.head(20)
dfWAByCountyConfirmedTop

dfWAByCountyConfirmedTopNew = dfWAByCountyConfirmedTop.diff(axis=1)
dfWAByCountyConfirmedTopNew

Last_Update,3/23/20,3/24/20,3/25/20,3/26/20,3/27/20
Admin2,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
King,1170.0,1170.0,1359.0,1577.0,1577.0
Snohomish,519.0,614.0,634.0,778.0,912.0
Pierce,126.0,138.0,155.0,186.0,231.0
Whatcom,48.0,48.0,64.0,86.0,92.0
Spokane,20.0,29.0,33.0,67.0,83.0
Skagit,33.0,45.0,48.0,78.0,78.0
Clark,13.0,13.0,16.0,48.0,76.0
Unassigned,145.0,112.0,51.0,69.0,67.0
Yakima,16.0,25.0,47.0,61.0,66.0
Island,25.0,25.0,38.0,64.0,64.0


Last_Update,3/23/20,3/24/20,3/25/20,3/26/20,3/27/20
Admin2,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
King,,0.0,189.0,218.0,0.0
Snohomish,,95.0,20.0,144.0,134.0
Pierce,,12.0,17.0,31.0,45.0
Whatcom,,0.0,16.0,22.0,6.0
Spokane,,9.0,4.0,34.0,16.0
Skagit,,12.0,3.0,30.0,0.0
Clark,,0.0,3.0,32.0,28.0
Unassigned,,-33.0,-61.0,18.0,-2.0
Yakima,,9.0,22.0,14.0,5.0
Island,,0.0,13.0,26.0,0.0


In [9]:
dfNYConfirmed = dfConfirmedUSStates[dfConfirmedUSStates['Province/State'] == 'New York']
dfNYDeaths =  dfDeathsUSStates[dfDeathsUSStates['Province/State'] == 'New York']
dfNYConfirmedDailyNew = dfNYConfirmed[dfNYConfirmed.columns[50:]].diff(axis=1)
dfNYDeathsDailyNew = dfNYDeaths[dfNYDeaths.columns[50:]].diff(axis=1)

trace1 = go.Bar(
    x = dfNYConfirmedDailyNew.columns,
    y = dfNYConfirmedDailyNew.iloc[0], name = 'New Confirmed Cases')

trace2 = go.Bar(
    x = dfNYDeathsDailyNew.columns,
    y = dfNYDeathsDailyNew.iloc[0], name = 'New Deaths')

data = [trace1, trace2]
layout = dict(title = 'NY - Daily Cases and Deaths')

py.iplot(dict(data=data, layout=layout))

In [10]:
dfUSConfirmedTop = dfConfirmedUSStates.head(10)

data = [{
    'x': row[1].index[50:],
    'y': row[1].values[50:],
    'name': row[1].values[0]
}  for row in dfUSConfirmedTop.iterrows()]

layout = dict(title = 'US Confirmed Cases')
py.iplot(dict(data=data, layout=layout))
dfUSConfirmedTop = dfUSConfirmedTop[np.append(dfUSConfirmedTop.columns[0], dfUSConfirmedTop.columns[-15:])]
dfUSConfirmedTop['percent_total'] = dfConfirmedUSStates[dfConfirmedUSStates.columns[-1]]/sum(dfConfirmedUSStates[dfConfirmedUSStates.columns[-1]])
dfUSConfirmedTop.style.format({'percent_total': PercentFormatter})

Unnamed: 0,Province/State,3/13/20,3/14/20,3/15/20,3/16/20,3/17/20,3/18/20,3/19/20,3/20/20,3/21/20,3/22/20,3/23/20,3/24/20,3/25/20,3/26/20,3/27/20,percent_total
1,New York,421,525,732,967,1706,2495,5365,8310,11710,15793,20884,25681,30841,37877,44876,44%
9,New Jersey,29,69,98,178,267,267,742,890,1327,1914,2844,3675,4402,6876,8825,9%
2,California,282,340,426,557,698,751,952,1177,1364,1642,2108,2538,2998,3899,4657,5%
44,Michigan,16,25,33,53,65,83,334,552,788,1037,1329,1793,2296,2845,3634,4%
0,Washington,568,572,643,904,1076,1014,1376,1524,1793,1996,2221,2328,2591,3207,3477,3%
3,Massachusetts,123,138,164,197,218,218,328,413,525,646,777,1159,1838,2417,3240,3%
12,Illinois,46,64,93,105,161,162,422,585,753,1049,1285,1537,1865,2538,3024,3%
8,Florida,50,76,115,155,216,314,417,563,659,830,1227,1412,1682,2357,2900,3%
36,Louisiana,36,77,91,136,196,257,392,538,585,837,1172,1388,1795,2304,2744,3%
13,Pennsylvania,41,47,66,77,112,152,206,303,396,509,698,946,1260,1795,2345,2%


In [11]:
dfUSDeathsTop = dfDeathsUSStates.head(10)

data = [{
    'x': row[1].index[50:],
    'y': row[1].values[50:],
    'name': row[1].values[0]
}  for row in dfUSDeathsTop.iterrows()]

layout = dict(title = 'US Deaths')
py.iplot(dict(data=data, layout=layout))
dfUSDeathsTop = dfUSDeathsTop[np.append(dfUSDeathsTop.columns[0], dfUSDeathsTop.columns[-15:])]
dfUSDeathsTop['percent_total'] = dfDeathsUSStates[dfDeathsUSStates.columns[-1]]/sum(dfDeathsUSStates[dfDeathsUSStates.columns[-1]])
dfUSDeathsTop.style.format({'percent_total': PercentFormatter})

Unnamed: 0,Province/State,3/13/20,3/14/20,3/15/20,3/16/20,3/17/20,3/18/20,3/19/20,3/20/20,3/21/20,3/22/20,3/23/20,3/24/20,3/25/20,3/26/20,3/27/20,percent_total
1,New York,0,2,3,10,13,16,34,42,60,117,158,210,285,385,527,33%
0,Washington,37,37,40,48,55,55,74,83,94,95,109,116,133,150,157,10%
36,Louisiana,0,1,2,3,4,4,10,14,16,20,35,46,65,83,119,8%
9,New Jersey,1,1,2,2,3,3,9,11,16,20,27,44,62,81,108,7%
2,California,4,5,6,7,12,13,18,23,24,30,39,50,65,81,94,6%
44,Michigan,0,0,0,0,0,0,3,3,5,9,15,24,43,61,92,6%
6,Georgia,1,1,1,1,1,3,10,13,14,23,25,32,40,48,64,4%
3,Massachusetts,0,0,0,0,0,0,0,1,1,5,9,11,15,25,35,2%
8,Florida,2,3,4,5,6,7,9,10,13,13,18,18,23,29,35,2%
12,Illinois,0,0,0,0,1,1,4,5,6,9,12,16,19,26,34,2%


In [12]:
chinaDataConfirmed = dfConfirmedGlobal[dfConfirmedGlobal['Country/Region'] == 'China'].sum()
chinaDataConfirmed['Province/State', 'Country/Region'] = [np.NaN, 'China']

dfConfirmedWorldTop = dfConfirmedGlobal[dfConfirmedGlobal['Country/Region'] != 'China'].sort_values(dfConfirmedGlobal.columns[-1], ascending=False).head(10)
dfConfirmedWorldTop = dfConfirmedWorldTop.append(chinaDataConfirmed, ignore_index=True)
dfConfirmedWorldTop = dfConfirmedWorldTop.sort_values(dfConfirmedGlobal.columns[-1], ascending=False)

data = [{
    'x': row[1].index[35:],
    'y': row[1].values[35:],
    'name': row[1].values[1] if str(row[1].values[0]) == 'nan' else '{} {}'.format( row[1].values[0], row[1].values[1])
}  for row in dfConfirmedWorldTop.iterrows()]

layout = dict(title = 'World TOTAL Confirmed Cases')
py.iplot(dict(data=data, layout=layout))

dfConfirmedWorldTop

Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,3/18/20,3/19/20,3/20/20,3/21/20,3/22/20,3/23/20,3/24/20,3/25/20,3/26/20,3/27/20
0,,US,37.0902,-95.7129,1,1,2,2,5,5,...,7783,13677,19100,25489,33276,43847,53740,65778,83836,101657
1,,Italy,43.0,12.0,0,0,0,0,0,0,...,35713,41035,47021,53578,59138,63927,69176,74386,80589,86498
10,,China,1083.3367,3684.4197,548,643,920,1406,2075,2877,...,81102,81156,81250,81305,81435,81498,81591,81661,81782,81897
2,,Spain,40.0,-4.0,0,0,0,0,0,0,...,13910,17963,20410,25374,28768,35136,39885,49515,57786,65719
3,,Germany,51.0,9.0,0,0,0,0,0,1,...,12327,15320,19848,22213,24873,29056,32986,37323,43938,50871
4,,France,46.2276,2.2137,0,0,2,3,3,3,...,9043,10871,12612,14282,16018,19856,22304,25233,29155,32964
5,,Iran,32.0,53.0,0,0,0,0,0,0,...,17361,18407,19644,20610,21638,23049,24811,27017,29406,32332
6,,United Kingdom,55.3781,-3.436,0,0,0,0,0,0,...,2626,2689,3983,5018,5683,6650,8077,9529,11658,14543
7,,Switzerland,46.8182,8.2275,0,0,0,0,0,0,...,3028,4075,5294,6575,7474,8795,9877,10897,11811,12928
8,,"Korea, South",36.0,128.0,1,1,2,2,3,4,...,8413,8565,8652,8799,8961,8961,9037,9137,9241,9332


In [13]:
dfConfirmedWorldTop5 = dfConfirmedWorldTop.head(5)

dfConfirmedWorldTop5DailyDataCols = dfConfirmedWorldTop5[dfConfirmedWorldTop5.columns[4:]]
dfConfirmedWorldTop5Daily = pd.concat([dfConfirmedWorldTop5[dfConfirmedWorldTop5.columns[:4]], dfConfirmedWorldTop5DailyDataCols.diff(axis=1)], axis=1)

data = [{
    'x': row[1].index[35:],
    'y': row[1].values[35:],
    'name': row[1].values[1] if str(row[1].values[0]) == 'nan' else '{} {}'.format( row[1].values[0], row[1].values[1])
}  for row in dfConfirmedWorldTop5Daily.iterrows()]

layout = dict(title = 'World Daily NEW Confirmed Cases')
py.iplot(dict(data=data, layout=layout))

dfConfirmedWorldTop5Daily

Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,3/18/20,3/19/20,3/20/20,3/21/20,3/22/20,3/23/20,3/24/20,3/25/20,3/26/20,3/27/20
0,,US,37.0902,-95.7129,,0.0,1.0,0.0,3.0,0.0,...,1362.0,5894.0,5423.0,6389.0,7787.0,10571.0,9893.0,12038.0,18058.0,17821.0
1,,Italy,43.0,12.0,,0.0,0.0,0.0,0.0,0.0,...,4207.0,5322.0,5986.0,6557.0,5560.0,4789.0,5249.0,5210.0,6203.0,5909.0
10,,China,1083.3367,3684.4197,,95.0,277.0,486.0,669.0,802.0,...,44.0,54.0,94.0,55.0,130.0,63.0,93.0,70.0,121.0,115.0
2,,Spain,40.0,-4.0,,0.0,0.0,0.0,0.0,0.0,...,2162.0,4053.0,2447.0,4964.0,3394.0,6368.0,4749.0,9630.0,8271.0,7933.0
3,,Germany,51.0,9.0,,0.0,0.0,0.0,0.0,1.0,...,3070.0,2993.0,4528.0,2365.0,2660.0,4183.0,3930.0,4337.0,6615.0,6933.0


In [7]:
chinaDataDeaths = dfDeathsGlobal[dfDeathsGlobal['Country/Region'] == 'China'].sum()
chinaDataDeaths['Province/State', 'Country/Region'] = [np.NaN, 'China']

dfDeathsWorldTop = dfDeathsGlobal[dfDeathsGlobal['Country/Region'] != 'China'].sort_values(dfDeathsGlobal.columns[-1], ascending=False).head(10)
dfDeathsWorldTop = dfDeathsWorldTop.append(chinaDataDeaths, ignore_index=True)
dfDeathsWorldTop = dfDeathsWorldTop.sort_values(dfDeathsGlobal.columns[-1], ascending=False)


data = [{
    'x': row[1].index[35:],
    'y': row[1].values[35:],
    'name': row[1].values[1] if str(row[1].values[0]) == 'nan' else '{} {}'.format( row[1].values[0], row[1].values[1])
}  for row in dfDeathsWorldTop.iterrows()]

layout = dict(title = 'World Deaths Deaths')
py.iplot(dict(data=data, layout=layout))
dfDeathsWorldTop

Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,3/18/20,3/19/20,3/20/20,3/21/20,3/22/20,3/23/20,3/24/20,3/25/20,3/26/20,3/27/20
0,,Italy,43.0,12.0,0,0,0,0,0,0,...,2978,3405,4032,4825,5476,6077,6820,7503,8215,9134
1,,Spain,40.0,-4.0,0,0,0,0,0,0,...,623,830,1043,1375,1772,2311,2808,3647,4365,5138
10,,China,1083.3367,3684.4197,17,18,26,42,56,82,...,3241,3249,3253,3259,3274,3274,3281,3285,3291,3296
2,,Iran,32.0,53.0,0,0,0,0,0,0,...,1135,1284,1433,1556,1685,1812,1934,2077,2234,2378
3,,France,46.2276,2.2137,0,0,0,0,0,0,...,148,243,450,562,674,860,1100,1331,1696,1995
4,,US,37.0902,-95.7129,0,0,0,0,0,0,...,118,200,244,307,417,557,706,942,1209,1581
5,,United Kingdom,55.3781,-3.436,0,0,0,0,0,0,...,71,137,177,233,281,335,422,465,578,759
6,,Netherlands,52.1326,5.2913,0,0,0,0,0,0,...,58,76,106,136,179,213,276,356,434,546
7,,Germany,51.0,9.0,0,0,0,0,0,0,...,28,44,67,84,94,123,157,206,267,342
8,,Belgium,50.8333,4.0,0,0,0,0,0,0,...,14,21,37,67,75,88,122,178,220,289


In [9]:
from distutils.sysconfig import get_python_lib
print(get_python_lib())


c:\python36\Lib\site-packages
