In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 
import plotly.express as px
import warnings
import ipywidgets as widgets
from ipywidgets import interact, interactive, interact_manual
import plotly.graph_objects as go

# Supress scientific notation/warning
pd.set_option('display.float_format', lambda x: '%.5f' % x)
warnings.filterwarnings("ignore")

## Load Data

In [2]:
# Confirmed Global Cases
confirmed_df = pd.read_csv("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv")
# Number of Death Cases - Global
deaths_df = pd.read_csv("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv")
# Number of recovered cases - Global
recovered_df = pd.read_csv("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv")
# US Confirmed Cases
us_confirmed_df = pd.read_csv("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_US.csv")
# US Death Cases
us_death_df = pd.read_csv("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_US.csv")
# Country-specific Data
country_df = pd.read_csv("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/web-data/data/cases_country.csv")

### Rename column names to lower case

In [3]:
confirmed_df.columns = confirmed_df.columns.str.lower()
deaths_df.columns = deaths_df.columns.str.lower()
recovered_df.columns = recovered_df.columns.str.lower()
us_confirmed_df.columns = us_confirmed_df.columns.str.lower()
us_death_df.columns = us_death_df.columns.str.lower()
country_df.columns = country_df.columns.str.lower()

### Explore the aggregate numbers

In [4]:
# Number of global confirmed cases. Use last column's value as they are the latest
print(f"Number of global confirmed cases:{confirmed_df.iloc[:,-1:].sum().values[0]:,}")

Number of global confirmed cases:31,517,087


In [5]:
# Number of global deads. Use last column's value as they are the latest
print(f"Number of global deads:{deaths_df.iloc[:,-1:].sum().values[0]:,}")

Number of global deads:969,541


In [6]:
# Number of global recovered cases. Use last column's value as they are the latest
print(f"Number of recovered cases:{recovered_df.iloc[:,-1:].sum().values[0]:,}")

Number of recovered cases:21,624,434


### Countries Sorted by total number of confirmed cases

In [7]:
def highlight_column(s, column_dict):
    """Change the color in columns if passed on column_dict"""
    if (s.name in column_dict.keys()):
        return ['background-color: {}'.format(column_dict[s.name])] * len(s)
    return [''] * len(s)

In [8]:
@interact
def number_of_rows(row_max=len(country_df)): 
    if(row_max<0):
        row_max=0
    return country_df.sort_values(by="confirmed", ascending=False).head(row_max).style.apply(highlight_column,column_dict={'confirmed':'grey','deaths':'red','recovered':'green'})

interactive(children=(IntSlider(value=188, description='row_max', max=564, min=-188), Output()), _dom_classes=…

In [9]:
@interact
def number_of_countries(row_max=len(country_df)): 
    if(row_max<0):
        row_max=0
    countries_to_show_df = country_df.sort_values(by="confirmed", ascending=False).head(row_max)
    fig = px.scatter(countries_to_show_df, x="confirmed", y="deaths",size="deaths", color="country_region",
                 hover_name="country_region", log_x=True, size_max=60)
    fig.show()

interactive(children=(IntSlider(value=188, description='row_max', max=564, min=-188), Output()), _dom_classes=…

In [10]:
confirmed_df.rename(columns={"country/region": "country"}, inplace=True)

In [11]:
deaths_df.rename(columns={"country/region": "country"}, inplace=True)

In [12]:
# Number of confirmed cases
confirmed_df_y=confirmed_df.query("country=='US'").loc[:,'1/22/20'::].values[0]
# X values - dates
confirmed_df_x=confirmed_df.query("country=='US'").loc[:,'1/22/20'::].columns

In [13]:
# Number of deads
deaths_df_y=deaths_df.query("country=='US'").loc[:,'1/22/20'::].values[0]
# X values - dates
deaths_df_x=deaths_df.query("country=='US'").loc[:,'1/22/20'::].columns

In [16]:
confirmed_by_country = confirmed_df.groupby(['country']).sum()
deaths_by_country = deaths_df.groupby(['country']).sum()

In [17]:
confirmed_by_country.index

Index(['Afghanistan', 'Albania', 'Algeria', 'Andorra', 'Angola',
       'Antigua and Barbuda', 'Argentina', 'Armenia', 'Australia', 'Austria',
       ...
       'United Kingdom', 'Uruguay', 'Uzbekistan', 'Venezuela', 'Vietnam',
       'West Bank and Gaza', 'Western Sahara', 'Yemen', 'Zambia', 'Zimbabwe'],
      dtype='object', name='country', length=188)

In [18]:
@interact
def show_by_country(country=confirmed_by_country.index): 
    """Show confirmed and deaths per country"""
    # Number of confirmed cases
    confirmed_df_y=confirmed_by_country.query(f"country=='{country}'").loc[:,'1/22/20'::].values[0]
    # X values - dates
    confirmed_df_x=confirmed_by_country.query(f"country=='{country}'").loc[:,'1/22/20'::].columns
    # Number of deads
    deaths_df_y=deaths_by_country.query(f"country=='{country}'").loc[:,'1/22/20'::].values[0]
    # X values - dates
    deaths_df_x=deaths_by_country.query(f"country=='{country}'").loc[:,'1/22/20'::].columns
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=confirmed_df_x,y=confirmed_df_y,mode='lines+markers', name='confirmed'))
    fig.add_trace(go.Scatter(x=deaths_df_x,y=deaths_df_y,mode='markers',name='deaths'))
    fig.show()

interactive(children=(Dropdown(description='country', options=('Afghanistan', 'Albania', 'Algeria', 'Andorra',…

In [23]:
# Plot top 10 worst hit countries

In [21]:
country_df.columns

Index(['country_region', 'last_update', 'lat', 'long_', 'confirmed', 'deaths',
       'recovered', 'active', 'incident_rate', 'people_tested',
       'people_hospitalized', 'mortality_rate', 'uid', 'iso3'],
      dtype='object')

In [25]:
#Confirmed Cases
top_10_confirmed = country_df.sort_values(by="confirmed", ascending=False).head(10)
fig = px.scatter(top_10_confirmed, 
                 x="confirmed", y="deaths",size="confirmed", color="country_region",
                 hover_name="country_region", log_x=True, size_max=60)
fig.show()

In [31]:
#Dead Cases
top_10_death = country_df.sort_values(by="deaths", ascending=False).head(10)
fig = px.scatter(top_10_death, 
                 x="deaths", y="confirmed",size="deaths", color="country_region",
                 hover_name="country_region", log_x=True, size_max=60)
fig.show()

In [30]:
#Active Cases
top_10_active = country_df.sort_values(by="active", ascending=False).head(10)
fig = px.scatter(top_10_active, 
                 x="active", y="confirmed",size="active", color="country_region",
                 hover_name="country_region", log_x=True, size_max=60)
fig.show()

In [32]:
#Active Cases
top_10_recovered = country_df.sort_values(by="recovered", ascending=False).head(10)
fig = px.scatter(top_10_recovered, 
                 x="recovered", y="confirmed",size="recovered", color="country_region",
                 hover_name="country_region", log_x=True, size_max=60)
fig.show()

In [34]:
country_df.sort_values(by="mortality_rate", ascending=False).head()

Unnamed: 0,country_region,last_update,lat,long_,confirmed,deaths,recovered,active,incident_rate,people_tested,people_hospitalized,mortality_rate,uid,iso3
185,Yemen,2020-09-23 23:23:49,15.55273,48.51639,2029.0,586.0,1245.0,198.0,6.8028,,,28.88122,887,YEM
104,MS Zaandam,2020-09-23 23:23:49,,,9.0,2.0,,7.0,,,,22.22222,8888,
85,Italy,2020-09-23 23:23:49,41.8719,12.5674,302537.0,35758.0,220665.0,46114.0,500.37687,,,11.81938,380,ITA
113,Mexico,2020-09-23 23:23:49,23.6345,-102.5528,705263.0,74348.0,598953.0,31962.0,551.88229,,,10.54188,484,MEX
178,United Kingdom,2020-09-23 23:23:49,55.0,-3.0,412240.0,41951.0,2239.0,368050.0,607.2533,,,10.17635,826,GBR


In [35]:
#Mortality Rate
top_10_mortality_rate = country_df.sort_values(by="mortality_rate", ascending=False).head(10)
fig = px.scatter(top_10_mortality_rate, 
                 x="mortality_rate", y="incident_rate",size="mortality_rate", color="country_region",
                 hover_name="country_region", log_x=True, size_max=60)
fig.show()

In [40]:
country_df.head()

Unnamed: 0,country_region,last_update,lat,long_,confirmed,deaths,recovered,active,incident_rate,people_tested,people_hospitalized,mortality_rate,uid,iso3
0,Afghanistan,2020-09-23 23:23:49,33.93911,67.70995,39145.0,1446.0,32610.0,5089.0,100.55656,,,3.69396,4,AFG
1,Albania,2020-09-23 23:23:49,41.1533,20.1683,12787.0,370.0,7139.0,5278.0,444.33248,,,2.89356,8,ALB
2,Algeria,2020-09-23 23:23:49,28.0339,1.6596,50400.0,1698.0,35428.0,13274.0,114.93455,,,3.36905,12,DZA
3,Andorra,2020-09-23 23:23:49,42.5063,1.5218,1753.0,53.0,1203.0,497.0,2268.81512,,,3.02339,20,AND
4,Angola,2020-09-23 23:23:49,-11.2027,17.8739,4363.0,159.0,1473.0,2731.0,13.27501,,,3.64428,24,AGO


In [105]:
country_df.columns

Index(['country_region', 'last_update', 'lat', 'long_', 'confirmed', 'deaths',
       'recovered', 'active', 'incident_rate', 'people_tested',
       'people_hospitalized', 'mortality_rate', 'uid', 'iso3'],
      dtype='object')

In [37]:
import folium

In [79]:
# Filter only those that have lat and long_ values
country_df = country_df[~country_df['lat'].isnull()]
country_df = country_df[~country_df['long_'].isnull()]

In [92]:
max_confirmed = country_df['confirmed'].max()
min_confirmed = country_df['confirmed'].min()
print(max_confirmed)
print(min_confirmed)
print(max_confirmed-min_confirmed)
normalized_radius = (39145-min_confirmed)/(max_confirmed-min_confirmed)
print(normalized_radius,39145-min_confirmed/(max_confirmed-min_confirmed))

6940721.0
10.0
6940711.0
0.005638471332403841 39144.99999855922


In [110]:
m = folium.Map()

for country_region, lat, long_, confirmed, deaths, recovered,mortality_rate in zip(country_df['country_region'],
                                      country_df['lat'],
                                      country_df['long_'],
                                      country_df['confirmed'],
                                      country_df['deaths'],
                                      country_df['recovered'],
                                      country_df['mortality_rate']
                                     ):

    folium.Circle(
        #Normalize and augment 
        radius=((confirmed-min_confirmed)/(max_confirmed-min_confirmed))*1000000,
        location=[lat, long_],
        popup='The Waterfront',
        color='crimson',
        fill=True,
        fill_color = 'red',
        tooltip = f"Country:{country_region}<br/>Confirmed:{confirmed}<br/>Death:{deaths}<br/>Death Rate:{mortality_rate}"
    ).add_to(m)

m