In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 
import plotly.express as px
import warnings
import ipywidgets as widgets
from ipywidgets import interact, interactive, interact_manual
import plotly.graph_objects as go
import folium
from pandas.plotting import autocorrelation_plot
from statsmodels.tsa.arima_model import ARIMA
from tqdm.notebook import trange, tqdm

# Supress scientific notation/warning
pd.set_option('display.float_format', lambda x: '%.5f' % x)
warnings.filterwarnings("ignore")

## Coronavirus Dashboard

In [3]:
# Confirmed Global Cases
confirmed_df = pd.read_csv("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv")
# Number of Death Cases - Global
deaths_df = pd.read_csv("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv")
# Number of recovered cases - Global
recovered_df = pd.read_csv("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv")
# US Confirmed Cases
us_confirmed_df = pd.read_csv("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_US.csv")
# US Death Cases
us_death_df = pd.read_csv("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_US.csv")
# Country-specific Data
country_df = pd.read_csv("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/web-data/data/cases_country.csv")

In [4]:
# Rename column 
confirmed_df.columns = confirmed_df.columns.str.lower()
deaths_df.columns = deaths_df.columns.str.lower()
recovered_df.columns = recovered_df.columns.str.lower()
us_confirmed_df.columns = us_confirmed_df.columns.str.lower()
us_death_df.columns = us_death_df.columns.str.lower()
country_df.columns = country_df.columns.str.lower()

In [5]:
def highlight_column(s, column_dict):
    """Change the color in columns if passed on column_dict"""
    if (s.name in column_dict.keys()):
        return ['background-color: {}'.format(column_dict[s.name])] * len(s)
    return [''] * len(s)

## Number of Confirmed and Deaths by Country

In [6]:
@interact
def number_of_rows(row_max=len(country_df)): 
    if(row_max<0):
        row_max=0
    return country_df.sort_values(by="confirmed", ascending=False).head(row_max).style.apply(highlight_column,column_dict={'confirmed':'grey','deaths':'red','recovered':'green'})

interactive(children=(IntSlider(value=188, description='row_max', max=564, min=-188), Output()), _dom_classes=…

## Confirmed Cases

Select number of countries to display. 

In [7]:
@interact
def number_of_countries(number_of_countries=10): 
    if(number_of_countries<0):
        number_of_countries=0
    countries_to_show_df = country_df.sort_values(by="confirmed", ascending=False).head(number_of_countries)
    fig_num_countries = px.scatter(countries_to_show_df, x="confirmed", y="deaths",size="deaths", color="country_region",
                 hover_name="country_region", log_x=True, size_max=60)
    fig_num_countries.show()

interactive(children=(IntSlider(value=10, description='number_of_countries', max=30, min=-10), Output()), _dom…

In [8]:
confirmed_df.rename(columns={"country/region": "country"}, inplace=True)
deaths_df.rename(columns={"country/region": "country"}, inplace=True)

In [9]:
# Number of confirmed cases
confirmed_df_y=confirmed_df.query("country=='US'").loc[:,'1/22/20'::].values[0]
# X values - dates
confirmed_df_x=confirmed_df.query("country=='US'").loc[:,'1/22/20'::].columns
# Number of deads
deaths_df_y=deaths_df.query("country=='US'").loc[:,'1/22/20'::].values[0]
# X values - dates
deaths_df_x=deaths_df.query("country=='US'").loc[:,'1/22/20'::].columns

In [10]:
confirmed_by_country = confirmed_df.groupby(['country']).sum()
deaths_by_country    = deaths_df.groupby(['country']).sum()

### Show confirmed by Country

In [11]:
@interact
def show_by_country(country=confirmed_by_country.index): 
    """Show confirmed and deaths per country"""
    # Number of confirmed cases
    confirmed_df_y=confirmed_by_country.query(f"country=='{country}'").loc[:,'1/22/20'::].values[0]
    # X values - dates
    confirmed_df_x=confirmed_by_country.query(f"country=='{country}'").loc[:,'1/22/20'::].columns
    # Number of deads
    deaths_df_y=deaths_by_country.query(f"country=='{country}'").loc[:,'1/22/20'::].values[0]
    # X values - dates
    deaths_df_x=deaths_by_country.query(f"country=='{country}'").loc[:,'1/22/20'::].columns
    fig_by_country = go.Figure()
    fig_by_country.add_trace(go.Scatter(x=confirmed_df_x,y=confirmed_df_y,mode='lines+markers', name='confirmed'))
    fig_by_country.add_trace(go.Scatter(x=deaths_df_x,y=deaths_df_y,mode='markers',name='deaths'))
    fig_by_country.show()

interactive(children=(Dropdown(description='country', options=('Afghanistan', 'Albania', 'Algeria', 'Andorra',…

### Top 10 worst hit countries

In [12]:
#Confirmed Cases
top_10_confirmed = country_df.sort_values(by="confirmed", ascending=False).head(10)
fig_top_10 = px.scatter(top_10_confirmed, 
                 x="confirmed", y="deaths",size="confirmed", color="country_region",
                 hover_name="country_region", log_x=True, size_max=60)
fig_top_10.show()

In [13]:
#Dead Cases
top_10_death = country_df.sort_values(by="deaths", ascending=False).head(10)
fig_top_10_death = px.scatter(top_10_death, 
                 x="deaths", y="confirmed",size="deaths", color="country_region",
                 hover_name="country_region", log_x=True, size_max=60)
fig_top_10_death.show()

In [14]:
#Active Cases
top_10_active = country_df.sort_values(by="active", ascending=False).head(10)
fig_top_10_active = px.scatter(top_10_active, 
                 x="active", y="confirmed",size="active", color="country_region",
                 hover_name="country_region", log_x=True, size_max=60)
fig_top_10_active.show()

In [15]:
#Mortality Rate
top_10_mortality_rate = country_df.sort_values(by="mortality_rate", ascending=False).head(10)
fig_top_10_mortality = px.scatter(top_10_mortality_rate, 
                 x="mortality_rate", y="incident_rate",size="mortality_rate", color="country_region",
                 hover_name="country_region", log_x=True, size_max=60)
fig_top_10_mortality.show()

## World Map

In [16]:
# Filter only those that have lat and long_ values
country_df = country_df[~country_df['lat'].isnull()]
country_df = country_df[~country_df['long_'].isnull()]

In [17]:
max_confirmed = country_df['confirmed'].max()
min_confirmed = country_df['confirmed'].min()

In [18]:
m = folium.Map(location=[0, 0], zoom_start=1)

for country_region, lat, long_, confirmed, deaths, recovered,mortality_rate in zip(country_df['country_region'],
                                      country_df['lat'],
                                      country_df['long_'],
                                      country_df['confirmed'],
                                      country_df['deaths'],
                                      country_df['recovered'],
                                      country_df['mortality_rate']
                                     ):

    folium.Circle(
        #Normalize and augment 
        radius=((confirmed-min_confirmed)/(max_confirmed-min_confirmed))*1000000,
        location=[lat, long_],
        popup=country_region,
        color='crimson',
        fill=True,
        fill_color = 'red',
        tooltip = f"Country:{country_region}<br/>Confirmed:{confirmed}<br/>Death:{deaths}<br/>Death Rate:{mortality_rate}"
    ).add_to(m)

m

## Forecasting

In [19]:
# World Confirmed Cases
confirmed_df = pd.read_csv("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv")
# World Death Cases
death_df = pd.read_csv("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv")

# Drop Unused columns for confirmed (Only keep columns needed, in this case Province_State and latest numbers
confirmed_df.drop(columns=['Province/State','Lat','Long'], inplace=True)
death_df.drop(columns=['Province/State','Lat','Long'], inplace=True)

# Create grouped dataframes for entire world
world_confirmed_grouped_df = confirmed_df.groupby('Country/Region').sum()
world_death_grouped_df     = death_df.groupby('Country/Region').sum()

all_confirmed_df = pd.DataFrame(world_confirmed_grouped_df.sum(), columns=["World"])
all_death_df     = pd.DataFrame(world_death_grouped_df.sum(), columns=["World"])

# Transpose so Series becomes row in df. Rename column to match 
all_confirmed_df.T.index.rename('Country/Region', inplace=True)
all_death_df.T.index.rename('Country/Region', inplace=True)

# Concatenate dataframes
world_confirmed_df = pd.concat([all_confirmed_df.T,world_confirmed_grouped_df])
world_death_df     = pd.concat([all_death_df.T,world_death_grouped_df])

In [21]:
def get_country_data(country_name, df):
    """Given a country name, return the Series with information"""    
    results = None    
    if(country_name in df.index):  
        results = df.loc[country_name]
        # Convert index to DateTime
        results.index = pd.to_datetime(results.index)
    return results

# Calculate ARIMA for Canada and display forexast for about 30 days
def plot_country_forecast(country_name, df=world_confirmed_df):
    """Given a country, forecast 30 days ahead from 09/26"""
    series = get_country_data(country_name, df) 
    model = ARIMA(series.values,order=(10,1,0), dates=series.index, freq='D')
    fig2, ax = plt.subplots(figsize=(12,8))
    ax = series.loc['2020-01-22':].plot(ax=ax, label="Confirmed")
    model_fit = model.fit(disp=0)
    model_fit.plot_predict('2020-09-26','2020-10-26', dynamic=True, ax=ax, plot_insample=False)
    plt.show()

## Confirmed Cases

In [22]:
@interact
def forecast_by_country(country=world_confirmed_df.index): 
    """Show interactive widget with forecasts confirmed cases"""
    plot_country_forecast(country, df=world_confirmed_df)

interactive(children=(Dropdown(description='country', options=('World', 'Afghanistan', 'Albania', 'Algeria', '…

## Death

In [23]:
@interact
def forecast_by_country(country=world_death_df.index): 
    """Show interactive widget with forecasts dead"""
    plot_country_forecast(country, df=world_death_df)

interactive(children=(Dropdown(description='country', options=('World', 'Afghanistan', 'Albania', 'Algeria', '…

## The United States

In [24]:
# US Confirmed Cases
us_confirmed_df = pd.read_csv("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_US.csv")
# US Death Cases
us_death_df = pd.read_csv("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_US.csv")

# Name of last columns
last_confirmed_column = us_confirmed_df.columns[-1]
last_dead_column = us_death_df.columns[-1]

# Drop Unused columns for confirmed (Only keep columns needed, in this case Province_State and latest numbers
us_confirmed_df = us_confirmed_df[['Province_State',last_confirmed_column]]
# Rename last column to Confirmed
us_confirmed_df.rename(columns={last_confirmed_column:'Confirmed'}, inplace=True)

# Drop Unused columns for dead  (Only keep columns needed, in this case Province_State and latest numbers
us_death_df = us_death_df[['Province_State',last_dead_column]]
us_death_df.rename(columns={last_confirmed_column:'Death'}, inplace=True)

us_confirmed_grouped_df = us_confirmed_df.groupby('Province_State').sum()
us_death_grouped_df = us_death_df.groupby('Province_State').sum()

In [25]:
us_confirmed_and_death = pd.merge(us_confirmed_grouped_df,us_death_grouped_df, on="Province_State", how="inner")

In [26]:
# Display the worst affected states 
us_confirmed_and_death.sort_values(by=["Confirmed","Death"], ascending=False).head(15)

Unnamed: 0_level_0,Confirmed,Death
Province_State,Unnamed: 1_level_1,Unnamed: 2_level_1
California,815654,15782
Texas,767264,15902
Florida,704568,14143
New York,457649,33144
Georgia,316306,6994
Illinois,293448,8881
Arizona,218184,5632
North Carolina,209136,3494
New Jersey,204563,16117
Tennessee,194611,2420


### Top 10 states confirmed

In [27]:
# Top 10 confirmed
us_confirmed_and_death_df = us_confirmed_and_death.sort_values(by=["Confirmed","Death"], ascending=False).head(10)
fig_confirmed_and_death = px.bar(us_confirmed_and_death_df, x=us_confirmed_and_death_df.index, y='Confirmed')
fig_confirmed_and_death.show()

### Top 10 states with most death

In [28]:
# Top 10 confirmed death
us_confirmed_and_death_df = us_confirmed_and_death.sort_values(by=["Death"], ascending=False).head(10)
fig_confirmed_and_death_states = px.bar(us_confirmed_and_death_df, x=us_confirmed_and_death_df.index, y='Death')
fig_confirmed_and_death_states.show()

In [29]:
# Reset values of original DF
# US Confirmed Cases
us_confirmed_df = pd.read_csv("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_US.csv")
# US Death Cases
us_death_df = pd.read_csv("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_US.csv")

In [30]:
last_confirmed_column = us_confirmed_df.columns[-1]
max_confirmed = us_confirmed_df[last_confirmed_column].max()
min_confirmed = us_confirmed_df[last_confirmed_column].min()
# To Center Map
lat_long_mean = us_confirmed_df[['Lat','Long_']].mean().tolist()

## Map of US confirmed cases

In [31]:
# Plot US Regions of Confirmed Cases
m = folium.Map(location=lat_long_mean,width=750, height=500,zoom_start=4)

for combined_key, lat, long_, confirmed in zip(us_confirmed_df['Combined_Key'],
                                      us_confirmed_df['Lat'],
                                      us_confirmed_df['Long_'],
                                      us_confirmed_df[last_confirmed_column]
                                     ):

    folium.Circle(
        #Normalize and augment 
        radius=((confirmed-min_confirmed)/(max_confirmed-min_confirmed))*100000,
        location=[lat, long_],
        popup=combined_key,
        color='crimson',
        fill=True,
        fill_color = 'red',
        tooltip = f"County:{combined_key}<br/>Confirmed:{confirmed:,}<br/>"
    ).add_to(m)

m

In [32]:
# Display the worst affected states 
us_confirmed_and_death['State'] = us_confirmed_and_death.index

## Map of US confirmed cases - Choropleth

In [62]:
# Initialize the map:
m = folium.Map(location=[37, -102], zoom_start=4)
 
state_geo = "us-states.json"

# Add the color for the chloropleth:
m.choropleth(
 geo_data=state_geo,
 name='choropleth',
 data=us_confirmed_and_death,
 columns=['State', 'Confirmed'],
 key_on='feature.properties.name',
 fill_color='OrRd',
 fill_opacity=0.7,
 line_opacity=0.2,
 legend_name='Confirmed Cases'
)
folium.LayerControl().add_to(m)

m
