### Load the needed libraries

In [22]:
#%% load the libs
import pandas as pd 
import numpy as np 
import plotly.graph_objects as go

### Read the data and print the top fields

In [23]:
#url = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Deaths.csv'
url = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Confirmed.csv'
data = pd.read_csv(url,error_bad_lines=False)
data.head()

Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,3/6/20,3/7/20,3/8/20,3/9/20,3/10/20,3/11/20,3/12/20,3/13/20,3/14/20,3/15/20
0,,Thailand,15.0,101.0,2,3,5,7,8,8,...,48,50,50,50,53,59,70,75,82,114
1,,Japan,36.0,138.0,2,1,2,2,4,4,...,420,461,502,511,581,639,639,701,773,839
2,,Singapore,1.2833,103.8333,0,1,3,3,4,5,...,130,138,150,150,160,178,178,200,212,226
3,,Nepal,28.1667,84.25,0,0,0,1,1,1,...,1,1,1,1,1,1,1,1,1,1
4,,Malaysia,2.5,112.5,0,0,0,3,4,4,...,83,93,99,117,129,149,149,197,238,428


### Clean up the data by putting the country as the province when its empty

In [24]:
chinaTag = 'China'
data.loc[pd.isna(data.loc[:,'Province/State']),'Province/State'] = data['Country/Region']
data.loc[data['Country/Region'] == chinaTag,'Province/State'] = data.loc[data['Country/Region'] == chinaTag,'Country/Region']

group states toghether

In [25]:
groupBy = 'Province/State'
grpedData = data.groupby([groupBy],as_index=False).sum()
grpedData.head()

Unnamed: 0,Province/State,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,1/28/20,...,3/6/20,3/7/20,3/8/20,3/9/20,3/10/20,3/11/20,3/12/20,3/13/20,3/14/20,3/15/20
0,"Adams, IN",39.8522,-77.2865,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Afghanistan,33.0,65.0,0,0,0,0,0,0,0,...,1,1,4,4,5,7,7,7,11,16
2,Alabama,32.3182,-86.9023,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,5,6,12
3,"Alachua, FL",29.7938,-82.4944,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,"Alameda County, CA",37.6017,-121.7195,0,0,0,0,0,0,0,...,1,2,2,2,0,0,0,0,0,0


Two function to plot the updated daily cases and daily increase in cases

In [26]:
def plot_daily_cases(data,listOfCountries,minOutbreakThresh=30,groupBy='Province/State'):
    colNames = data.columns[4:]
    fig = go.Figure()
    for state in listOfCountries:
        countryTbl = data[data[groupBy] == state][colNames]
        dailyCases = countryTbl.values[countryTbl>minOutbreakThresh]
        fig.add_trace(go.Scatter(x=np.array(range(0,dailyCases.size)),y=dailyCases.squeeze(),mode='lines',name=state))
 
    fig.update_layout(showlegend=True,xaxis_title='Days',yaxis_title='Number of daily cases')
    fig.show()
def plot_daily_increases(data,listOfCountries,minOutbreakThresh=30,groupBy='Province/State'):
    colNames = data.columns[4:]
    fig = go.Figure()
    for state in listOfCountries:
        countryTbl = data[data[groupBy] == state][colNames]
        countryTbl = countryTbl.values[countryTbl>minOutbreakThresh]
        inc = countryTbl[1:] - countryTbl[0:-1]
        fig.add_trace(go.Scatter(x=np.array(range(0,inc.size)),y=inc.squeeze(),mode='lines',name=state))
    fig.update_layout(showlegend=True,xaxis_title='Days',yaxis_title='Daily increase')
    fig.show()
def plot_daily_cases_log(data,listOfCountries,minOutbreakThresh=10,groupBy='Province/State'):
    colNames = data.columns[4:]
    fig = go.Figure()
    for state in listOfCountries:
        countryTbl = data[data[groupBy] == state][colNames]
        dailyCases = countryTbl.values[countryTbl>minOutbreakThresh]
        fig.add_trace(go.Scatter(x=np.array(range(0,dailyCases.size)),y=np.log10(dailyCases.squeeze()),
        mode='lines',name=state))
    fig.update_layout(showlegend=True,xaxis_title='Days',yaxis_title='Daily increase log10 scale')
    fig.show()

In [27]:
plot_daily_cases(grpedData,np.array([chinaTag]),groupBy=groupBy)

In [28]:
plot_daily_increases(grpedData,np.array([chinaTag]),groupBy=groupBy)

In [29]:
plot_daily_cases_log(grpedData,np.array([chinaTag]),groupBy=groupBy)

### Get list of countries that have the covid-19 outbreak

Define outbreak as more 2000 cases

In [30]:
breakOutTh =2000
exludedList = [chinaTag,'Others','Diamond Princess']
col_names = grpedData.columns
list_of_countries = grpedData[groupBy][grpedData[col_names[-1]]>breakOutTh].values
# %% treat china separately
remInd = []
for i,x in enumerate(list_of_countries):
    if x in exludedList:
        remInd.append(i)
list_of_countries = np.delete(list_of_countries,remInd)
print(list_of_countries)

['France' 'Germany' 'Iran' 'Italy' 'Korea, South' 'Spain' 'Switzerland']


In [31]:
plot_daily_cases(grpedData,list_of_countries,groupBy=groupBy,minOutbreakThresh=20)

In [32]:
plot_daily_increases(grpedData,list_of_countries,groupBy=groupBy,minOutbreakThresh=30)

In [33]:
plot_daily_cases_log(grpedData,list_of_countries,groupBy=groupBy,minOutbreakThresh=30)

### Get countries at risk of outbreak
define as countries between 1000 to 2000 cases

In [34]:
breakOutRiskTh =500
col_names = grpedData.columns
list_of_countries = grpedData[groupBy][(grpedData[col_names[-1]]>breakOutRiskTh) & (grpedData[col_names[-1]]<breakOutTh)].values
# %% treat china separately
remInd = []
for i,x in enumerate(list_of_countries):
    if x in exludedList:
        remInd.append(i)
list_of_countries = np.delete(list_of_countries,remInd)
print(list_of_countries)

['Austria' 'Belgium' 'Denmark' 'Japan' 'Netherlands' 'New York' 'Norway'
 'Sweden' 'United Kingdom' 'Washington']


In [35]:
plot_daily_cases(grpedData,list_of_countries,groupBy=groupBy)

In [36]:
plot_daily_increases(grpedData,list_of_countries,groupBy=groupBy)

Plot for a specific country of your interest

In [37]:
selectedCountry = 'Spain'
plot_daily_cases(grpedData,np.array([selectedCountry]),1,groupBy=groupBy)

In [38]:
plot_daily_increases(grpedData,np.array([selectedCountry]),1,groupBy=groupBy)

In [39]:
plot_daily_cases_log(grpedData,np.array([selectedCountry]),1,groupBy=groupBy)