# Coronavirus Latest
> Updates on the respiratory illness that has infected more than one million people and killed tens of thousands.

- toc:false
- branch: master
- badges: false
- hide_colab_badge: true
- comments: false
- permalink:/covid-overview/


In [1]:
#hide
#@title Import modules and load datasets from source
import pandas as pd
import numpy as np
import altair as alt
from IPython.display import HTML
import plotly.graph_objects as go
from plotly.subplots import make_subplots

base_url = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data'

# confirmed, deaths, recovered
def get_time_series(type):
  _url = f'{base_url}/csse_covid_19_time_series/time_series_covid19_{type}_global.csv'
  _dff = pd.read_csv(_url)
  _cols = _dff.columns[~_dff.columns.isin(['Province/State', 'Country/Region', 'Lat', 'Long'])]
  _dff = (_dff.groupby('Country/Region')[_cols].sum().stack().reset_index(name = 'Cases')
        .rename(columns = {'level_1': 'Date', 'Country/Region': 'Country'}))
  _dff['Date'] = pd.to_datetime(_dff['Date'], format='%m/%d/%y')
  return _dff

df_confirmed = get_time_series('confirmed')
df_recovered = get_time_series('recovered')
df_deaths = get_time_series('deaths')
date_latest = df_confirmed['Date'].max()

df_last = lambda name, _df, n: _df[_df['Date'].isin(_df['Date'].tail(n))].set_index(['Country', 'Date']).rename(columns={'Cases': name})
df_summary = lambda last_n: pd.concat([df_last('Confirmed', df_confirmed, last_n), df_last('Deaths', df_deaths, last_n), df_last('Recovered', df_recovered, last_n)], axis=1)
df_global = df_summary(2).groupby(['Date']).sum().reset_index()
s_date_latest = date_latest.strftime("%B %d, %Y")
n_confirmed = df_global.loc[1]['Confirmed']
n_deaths = df_global.loc[1]['Deaths']
n_recovered = df_global.loc[1]['Recovered']

df_global_diff = df_global.diff()
n_confirmed_diff = df_global_diff.loc[1]['Confirmed']
n_deaths_diff = df_global_diff.loc[1]['Deaths']
n_recovered_diff = df_global_diff.loc[1]['Recovered']
countries = df_confirmed['Country'].unique()

## Overview

In [2]:
#hide_input
fig = go.Figure()

fig.add_trace(go.Indicator(
    mode = "number+delta",
    value = n_confirmed,
    # number = {'valueformat': ','},
    align = "left",
    title = {"text": f'Total cases reported worldwide in {len(countries) - 1}, {s_date_latest}', "align": "left", "font": {"family": "sans-serif"}},
    delta = {'reference': n_confirmed - n_confirmed_diff, 'position': 'right', 'valueformat': ','},
    domain = {'x': [0, .6], 'y': [.9, 1]}))

fig.add_trace(go.Indicator(
    mode = "number+delta",
    value = n_recovered,
    # number = {'valueformat': ','},
    # number = {'prefix': f'{n_recovered/n_confirmed*100:.2f}% or '},
    align = "left",
    title = {"text": f'total of {n_recovered/n_confirmed*100:.2f}% has recovered', "align": "left", "font": {"family": "sans-serif"}},
    delta = {'reference': n_recovered - n_recovered_diff, 'position': 'right', 'valueformat': ','},
    domain = {'x': [0, .6], 'y': [.6, .7]}))

fig.add_trace(go.Indicator(
    mode = "number+delta",
    value = n_deaths,
    # number = {'valueformat': ','},
    align = "left",
    title = {"text": f'total of {n_deaths/n_confirmed*100:.2f}% deaths reported', "align": "left", "font": {"family": "sans-serif"}},
    delta = {'reference': n_deaths - n_deaths_diff, 'position': 'right', 'valueformat': ','},
    domain = {'x': [0, .6], 'y': [.3, .4]}))

fig.add_trace(go.Indicator(
    mode = "number+delta",
    value = n_confirmed - n_deaths - n_recovered,
    # number = {'valueformat': ','},
    align = "left",
    title = {"text": f'total of {(n_confirmed - n_deaths - n_recovered)/n_confirmed*100:.2f}% remain active', "align": "left", "font": {"family": "sans-serif"}},
    delta = {'reference': (n_confirmed -  n_confirmed_diff) - (n_deaths - n_deaths_diff) - (n_recovered - n_recovered_diff), 'position': 'right', 'valueformat': ','},
    domain = {'x': [0, .6], 'y': [0, .1]}))


fig.show()

### Confirmed Cases

In [3]:
#hide_input
_df = df_confirmed.groupby(['Date']).sum().unstack().reset_index().drop(columns=['level_0']).rename(columns={0: 'Cummulative Confirmed'})
_df['New Confirm'] = _df[['Cummulative Confirmed']].diff()

_df_line = alt.Chart(_df).mark_line(color='#D32F2F').encode(
    alt.X('Date:T'),
    alt.Y('Cummulative Confirmed:Q', axis=alt.Axis(titleColor='#D32F2F'), scale=alt.Scale(type='log')),
    tooltip=list(_df)
)

_df_new_bar = alt.Chart(_df).mark_bar(opacity=0.3, color='#303F9F').encode(
    alt.X('Date:T'),
    alt.Y('New Confirm:Q', axis=alt.Axis(titleColor='#303F9F'), scale=alt.Scale(type='log')),
    tooltip=list(_df)
)


alt.layer(_df_line, _df_new_bar).resolve_scale(
    y='independent'
).properties(
    title = 'Confirmed Cases across the world',
    width = 600
)

In [4]:
#hide
def df_summary(_df):
  _summary = _df.groupby(['Date', 'Country']).sum().unstack().xs('Cases', axis=1).tail(2).reset_index(drop=True).transpose()
  _summary['New'] = _summary.diff(axis=1)[1]
  _df_new_mean =_df.groupby(['Date', 'Country']).sum().unstack().xs('Cases', axis=1).diff().rolling(window=7).mean()[-1:] \
                        .reset_index(drop=True).transpose().round(0).sort_values(by=0,ascending=False).rename(columns={0: 'New_Mean_7D'})                        
  _df_pct_chg = _df.groupby(['Date', 'Country']).sum().unstack().xs('Cases', axis=1).pct_change().rolling(window=7).mean()[-1:] \
                        .reset_index(drop=True).transpose().sort_values(by=0,ascending=False).rename(columns={0: 'Change_Mean_7D'})
  #df_confirmed_pct_mean_latest = df_confirmed_pct_mean
  _summary = pd.merge(_summary, _df_new_mean, on = 'Country')

  _summary['Change'] = _summary.pct_change(axis=1)[1]
  _summary = pd.merge(_summary, _df_pct_chg, on = 'Country')
  _summary['Trending'] = _summary['Change'] - _summary['Change_Mean_7D']
  _summary['Weight'] = _summary['New'] / _summary['New'].sum()
  _summary = _summary.drop(columns=0).rename(columns={1: 'Total'}).sort_values(by=['New', 'Total', 'Trending'], ascending=False).round(4)
  return _summary

df_summary_confirmed = df_summary(df_confirmed)

In [5]:
#hide_input
_data = df_summary_confirmed.reset_index().head(50)
_new_bar = alt.Chart().mark_bar().encode(
    alt.X('Country:N', sort='-y'),
    alt.Y('New:Q', scale=alt.Scale(type='log')),
    tooltip = list(df_summary_confirmed)
)

_mean_bar = alt.Chart().mark_tick(color='red').encode(
    alt.X('Country:N'),# axis=alt.Axis(labels=False, ticks=False)),
    alt.Y('New_Mean_7D:Q', scale=alt.Scale(type='log')),
    tooltip = list(df_summary_confirmed)
)


_new_text = _new_bar.mark_text(
    angle=270,
    align='left',
    baseline='middle',    
    dy=3,
    dx=3  # Nudges text to right so it doesn't appear on top of the bar
).encode(
    text='New:Q'
)

alt.layer(_new_bar + _new_text, data=_data).properties(
  title = f'New Confirmed Cases On {s_date_latest}',
  width= 600
)#.resolve_scale(x='independent')

In [6]:
#hide
df_summary_confirmed.sort_values(by='New', ascending=False).head(50)

Unnamed: 0_level_0,Total,New,New_Mean_7D,Change,Change_Mean_7D,Trending,Weight
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Brazil,17210969,88092.0,58214.0,0.0051,0.0034,0.0017,0.2375
Colombia,3665137,31656.0,25299.0,0.0087,0.0071,0.0016,0.0853
Argentina,4066156,27628.0,25958.0,0.0068,0.0066,0.0003,0.0745
US,33426310,14417.0,14268.0,0.0004,0.0004,0.0,0.0389
India,29196912,14380.0,88937.0,0.0005,0.0031,-0.0026,0.0388
Spain,3729458,14004.0,5207.0,0.0038,0.0014,0.0024,0.0377
Iran,3003112,12398.0,8287.0,0.0041,0.0028,0.0014,0.0334
Russia,5108217,11560.0,9690.0,0.0023,0.0019,0.0004,0.0312
South Africa,1722086,9147.0,5959.0,0.0053,0.0035,0.0018,0.0247
Indonesia,1885942,8892.0,6974.0,0.0047,0.0038,0.001,0.024


In [7]:
#hide_input
_data = df_summary_confirmed[df_summary_confirmed['Total'] > 1000].reset_index()

_new_bar = alt.Chart().mark_bar().encode(
    alt.X('Country:N', sort='-y'),
    alt.Y('Change:Q', axis=alt.Axis(format='%')),
    tooltip = list(df_summary_confirmed)
)

_mean_bar = alt.Chart().mark_tick(color='red').encode(
    alt.X('Country:N'),# axis=alt.Axis(labels=False, ticks=False)),
    alt.Y('Change_Mean_7D:Q',axis=alt.Axis(format='%')),
    tooltip = list(df_summary_confirmed)
)

_new_text = _new_bar.mark_text(
    size=10,
    angle=270,
    align='left',
    baseline='middle', 
    dy=3,
    dx=3  # Nudges text to right so it doesn't appear on top of the bar
).encode(
    text='Change:Q'
)

alt.layer(_new_bar + _new_text, data=_data).properties(
  title = f'Change of New Confirmed Cases (and 7 Days Average) On {s_date_latest}',
  width= 600
)#.resolve_scale(x='independent')

#alt.layer(_mean_bar, data=_data)

## Weight of Cases by Countries

In [8]:
#hide_input
df_confirmed_new = df_confirmed.set_index(['Date', 'Country']).unstack().diff().stack().reset_index()
df_confirmed_new_pct = df_confirmed_new.set_index(['Date', 'Country']).rename(columns={'Cases': 'New Confirm Weight'}).unstack().apply(lambda x: x/sum(x), axis=1).round(3).stack().reset_index()
selection = alt.selection_multi(fields=['Country'], bind='legend')
df_confirmed_new_pct = df_confirmed_new_pct[df_confirmed_new_pct['New Confirm Weight'] > 0.03]

_weight_bar = alt.Chart(df_confirmed_new_pct).mark_bar().encode(
    x='Date:T', 
    y=alt.Y('New Confirm Weight:Q', axis=alt.Axis(format='%'), title='Daily Confirm Weight'), 
    color='Country',
    tooltip=list(df_confirmed_new_pct),
    opacity=alt.condition(selection,alt.value(1), alt.value(0.2))
).add_selection(selection)
_weight_bar.properties(
    title = 'Weight of New Cases by Country (Country with weight more than 3%)',
    width = 600
)

In [9]:
#hide_input
df_summary_deaths = df_summary(df_deaths)
df_deaths_major = df_summary_deaths[(df_summary_deaths['New'] > 10) & (df_summary_deaths['Total'] > 100)].reset_index()

df_deaths_major.sort_values(by='New', ascending=False)


Unnamed: 0,Country,Total,New,New_Mean_7D,Change,Change_Mean_7D,Trending,Weight
0,India,359838,4133.0,2734.0,0.0116,0.0078,0.0038,0.3358
1,Brazil,482019,2504.0,1804.0,0.0052,0.0038,0.0014,0.2035
2,Peru,187847,690.0,415.0,0.0037,0.0022,0.0015,0.0561
3,Argentina,83941,669.0,581.0,0.008,0.0071,0.0009,0.0544
4,Colombia,94046,652.0,528.0,0.007,0.0057,0.0012,0.053
5,Russia,123178,376.0,368.0,0.0031,0.003,0.0,0.0306
6,Mexico,229580,227.0,174.0,0.001,0.0008,0.0002,0.0184
7,Indonesia,52373,211.0,183.0,0.004,0.0035,0.0005,0.0171
8,Chile,30339,198.0,106.0,0.0066,0.0035,0.003,0.0161
9,Iran,81672,153.0,145.0,0.0019,0.0018,0.0001,0.0124


In [10]:
#hide_input
alt.Chart(df_deaths_major).mark_bar().encode(
    alt.X('Country:N', sort='-y'),
    alt.Y('Change_Mean_7D:Q', axis=alt.Axis(format='%'), title='Mean Change'),
    tooltip=list(df_deaths_major)
).properties(
    width=600,
    title='Percentage Change (Mean of 7 Days) of Deaths Case'
)

In [11]:
#hide
df_confirmed_deaths = pd.merge(df_summary_confirmed.reset_index()[['Country', 'Total']].rename(columns={'Total': 'Confirmed'}), \
df_summary_deaths.reset_index()[['Country', 'Total']].rename(columns={'Total': 'Deaths'}), 
on='Country')

df_confirmed_deaths['Deaths Rate'] = df_confirmed_deaths['Deaths'] / df_confirmed_deaths['Confirmed']
df_confirmed_deaths[df_confirmed_deaths['Confirmed'] > 1000].sort_values(by='Deaths Rate', ascending=False).reset_index(drop=True).head(50)


Unnamed: 0,Country,Confirmed,Deaths,Deaths Rate
0,Yemen,6836,1338,0.195728
1,Peru,1995257,187847,0.094147
2,Mexico,2445538,229580,0.093877
3,Sudan,36203,2719,0.075104
4,Syria,24743,1804,0.07291
5,Egypt,271047,15510,0.057223
6,Somalia,14779,774,0.052372
7,Ecuador,436023,20949,0.048046
8,China,103285,4846,0.046919
9,Bosnia and Herzegovina,204567,9473,0.046308


## Cases in US

In [12]:
#hide 
def get_country(code):
  _df = [
          df_confirmed[df_confirmed['Country'] == code][['Date', 'Cases']].set_index('Date'), 
          df_deaths[df_deaths['Country'] == code][['Date', 'Cases']].set_index('Date').rename(columns={'Cases': 'Deaths'}),
          df_recovered[df_recovered['Country'] == code][['Date', 'Cases']].set_index('Date').rename(columns={'Cases': 'Recovered'})
        ]
  _df = pd.concat(_df, axis=1, join='inner')
  _df['Active'] = _df['Cases'] - _df['Deaths'] - _df['Recovered']
  _df = pd.concat( [_df, _df.diff().rename(columns={'Cases': 'New Cases', 'Deaths': 'New Deaths', 'Recovered': 'New Recovered', 'Active': 'New Active'})], axis=1, join='inner')
  _df['Deaths_Rate'] = _df['Deaths'] / _df['Cases']
  _df['Recovered_Rate'] = _df['Recovered'] / _df['Cases']
  return _df.reset_index()

country = 'US'
df_country_cases = get_country(country)
df_country_cases

Unnamed: 0,Date,Cases,Deaths,Recovered,Active,New Cases,New Deaths,New Recovered,New Active,Deaths_Rate,Recovered_Rate
0,2020-01-22,1,0,0,1,,,,,0.000000,0.0
1,2020-01-23,1,0,0,1,0.0,0.0,0.0,0.0,0.000000,0.0
2,2020-01-24,2,0,0,2,1.0,0.0,0.0,1.0,0.000000,0.0
3,2020-01-25,2,0,0,2,0.0,0.0,0.0,0.0,0.000000,0.0
4,2020-01-26,5,0,0,5,3.0,0.0,0.0,3.0,0.000000,0.0
...,...,...,...,...,...,...,...,...,...,...,...
501,2021-06-06,33362600,597628,0,32764972,5395.0,251.0,0.0,5144.0,0.017913,0.0
502,2021-06-07,33378096,597952,0,32780144,15496.0,324.0,0.0,15172.0,0.017915,0.0
503,2021-06-08,33393246,598332,0,32794914,15150.0,380.0,0.0,14770.0,0.017918,0.0
504,2021-06-09,33411893,598764,0,32813129,18647.0,432.0,0.0,18215.0,0.017921,0.0


In [13]:
#hide_input
_new_bar = alt.Chart(df_country_cases).mark_bar().encode(
    alt.X('Date:T'),
    alt.Y('New Cases:Q'),
    tooltip = list(df_country_cases)
)

_total_line = alt.Chart(df_country_cases[df_country_cases['Cases'] > 10]).mark_line(color='#57A44C').encode(
    alt.X('Date:T'),
    alt.Y('Cases:Q', axis=alt.Axis(titleColor='#57A44C'), scale=alt.Scale(type='log')),
    tooltip = list(df_country_cases)
)

alt.layer(_total_line, _new_bar).properties(
    width = 600,
    title = f'Number of Total Cases (Daily New Cases) for {country}'
).resolve_scale(y='independent')

In [14]:
#hide_input
_new_bar = alt.Chart(df_country_cases).mark_bar().encode(
    alt.X('Date:T'),
    alt.Y('New Deaths:Q'),
    tooltip = list(df_country_cases)
)

_total_line = alt.Chart(df_country_cases[df_country_cases['Deaths'] > 10]).mark_line(color='#57A44C').encode(
    alt.X('Date:T'),
    alt.Y('Deaths:Q', axis=alt.Axis(titleColor='#57A44C'), scale=alt.Scale(type='log')),
    tooltip = list(df_country_cases)
)

alt.layer(_total_line, _new_bar).properties(
    width = 600,
    title = f'Number of Total Deaths Cases (Daily Deaths Cases) for {country}'
).resolve_scale(y='independent')

In [15]:
#hide_input
_data = df_country_cases[(df_country_cases['New Cases']) > 0 & (df_country_cases['Cases'] > 100)]
alt.Chart(_data).mark_line().encode(
    alt.X('Cases:Q', scale=alt.Scale(type='log')),
    alt.Y('New Cases:Q', scale=alt.Scale(type='log', domain=[0.1, 100000]))
).properties(width = 600)

In [16]:
#hide_input
_rate1_line = alt.Chart(df_country_cases).mark_line(color='red').encode(
    alt.X('Date:T'),
    alt.Y('Deaths_Rate:Q', axis=alt.Axis(format='%', titleColor='red')),
    tooltip = list(df_country_cases)
) 

_rate2_line = alt.Chart(df_country_cases).mark_line(color='green').encode(
    alt.X('Date:T'),
    alt.Y('Recovered_Rate:Q', axis=alt.Axis(format='%', titleColor='green')),
    tooltip = list(df_country_cases)
)

(_rate1_line + _rate2_line).properties(
    width = 600,
    title = f'Rate of Deaths and Recovered for {country}'
).resolve_scale(y='independent')

## Deaths Cases Global


In [19]:
#hide_input
countries = ['Taiwan*', 'Malaysia', 'China'] #, 'Korea, South', 'Italy', 'Spain', 'US', 'Belgium']
_df = df_deaths.pivot(index='Country', columns='Date').diff(axis=1)['Cases'].stack().reset_index().rename(columns={0: 'New Deaths'})
_data = _df[_df['Country'].isin(countries)]

selection = alt.selection_multi(fields=['Country'], bind='legend')
alt.Chart(_data).mark_bar(opacity = 0.8).encode(
    alt.X('Date:T'),
    alt.Y('New Deaths:Q'),# scale=alt.Scale(type='log')),
    color='Country:N',
    tooltip = list(_data),
    opacity=alt.condition(selection,alt.value(1), alt.value(0.2))
).add_selection(selection).properties(
    width = 600
)

In [20]:
#hide_input
_df_objs = [
              df_deaths[df_deaths['Country'].isin(countries)].set_index(['Country', 'Date']).rename(columns={'Cases': 'Deaths'}),
              df_confirmed[df_confirmed['Country'].isin(countries)].set_index(['Country', 'Date']).rename(columns={'Cases': 'Confirmed'}) 
           ]
_df_countries = pd.concat(_df_objs, join='inner', axis=1).reset_index()
_df_countries['Deaths Rate'] = _df_countries['Deaths'] / _df_countries['Confirmed']
alt.Chart(_df_countries.fillna(0)).mark_line().encode(
    alt.X('Date:T'),
    alt.Y('Deaths Rate:Q', axis=alt.Axis(format='%')),
    color='Country',
    tooltip=list(_df_countries),
    opacity=alt.condition(selection,alt.value(1), alt.value(0.3))
).add_selection(selection).properties(
    width = 600
)

In [22]:
_df_countries

Unnamed: 0,Country,Date,Deaths,Confirmed,Deaths Rate
0,China,2020-01-22,17,548,0.031022
1,China,2020-01-23,18,643,0.027994
2,China,2020-01-24,26,920,0.028261
3,China,2020-01-25,42,1406,0.029872
4,China,2020-01-26,56,2075,0.026988
...,...,...,...,...,...
1513,Taiwan*,2021-06-06,260,11298,0.023013
1514,Taiwan*,2021-06-07,286,11491,0.024889
1515,Taiwan*,2021-06-08,308,11694,0.026338
1516,Taiwan*,2021-06-09,333,11968,0.027824
