# Coronavirus Latest
> Updates on the respiratory illness that has infected more than one million people and killed tens of thousands.

- toc:false
- branch: master
- badges: false
- hide_colab_badge: true
- comments: false


In [0]:
#hide
import pandas as pd
import numpy as np
import altair as alt

In [2]:
#hide
# COVID-19 Data from https://github.com/CSSEGISandData/COVID-19/tree/master/csse_covid_19_data/csse_covid_19_time_series
base_url = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data'

# confirmed, deaths, recovered
def get_time_series(type):
  _url = f'{base_url}/csse_covid_19_time_series/time_series_covid19_{type}_global.csv'
  _dff = pd.read_csv(_url)
  _cols = _dff.columns[~_dff.columns.isin(['Province/State', 'Country/Region', 'Lat', 'Long'])]
  _dff = (_dff.groupby('Country/Region')[_cols].sum().stack().reset_index(name = 'Cases')
        .rename(columns = {'level_1': 'Date', 'Country/Region': 'Country'}))
  _dff['Date'] = pd.to_datetime(_dff['Date'], format='%m/%d/%y')
  return _dff

# eg. 03-27-2020
def get_df_daily(date):
  _url = f'{base_url}/csse_covid_19_daily_reports/{date}.csv'
  return pd.read_csv(_url)

df_confirmed = get_time_series('confirmed')
df_recovered = get_time_series('recovered')
df_deaths = get_time_series('deaths')

date_latest = df_confirmed['Date'].max()

def countries_cases(df, n=5000, date_as_of=date_latest):
  df_latest = df[df['Date'].eq(date_as_of)]
  df_latest_n = df_latest[df_latest['Cases'] > n]
  return df_latest_n

# Last n days of type e.g. confirmed, recovered, deaths
df_last = lambda name, _df, n: _df[_df['Date'].isin(_df['Date'].tail(n))].set_index(['Country', 'Date']).rename(columns={'Cases': name})

# Last 2 days all global
df_summary = lambda last_n: pd.concat([df_last('Confirmed', df_confirmed, last_n), df_last('Deaths', df_deaths, last_n), df_last('Recovered', df_recovered, last_n)], axis=1)

#df_summary.xs('Malaysia', level = 0)

df_global = df_summary(2).groupby(['Date']).sum().reset_index()

df_global_summary = df_global.stack().reset_index().rename(columns={'level_1': 'Type', 0: 'Cases'})

#based_chart = alt.Chart(df_global).encode(x = 'monthdate(Date):O')
#closed_chart = based_chart.mark_bar().encode(y = 'Deaths:Q')
#confirmed_chart = based_chart.mark_bar().encode(y = 'Confirmed:Q')

print(date_latest)

2020-04-04 00:00:00


In [3]:
#hide
# List of Countries
countries = df_confirmed['Country'].unique()
df_counties = pd.DataFrame(countries)
countries

array(['Afghanistan', 'Albania', 'Algeria', 'Andorra', 'Angola',
       'Antigua and Barbuda', 'Argentina', 'Armenia', 'Australia',
       'Austria', 'Azerbaijan', 'Bahamas', 'Bahrain', 'Bangladesh',
       'Barbados', 'Belarus', 'Belgium', 'Belize', 'Benin', 'Bhutan',
       'Bolivia', 'Bosnia and Herzegovina', 'Botswana', 'Brazil',
       'Brunei', 'Bulgaria', 'Burkina Faso', 'Burma', 'Burundi',
       'Cabo Verde', 'Cambodia', 'Cameroon', 'Canada',
       'Central African Republic', 'Chad', 'Chile', 'China', 'Colombia',
       'Congo (Brazzaville)', 'Congo (Kinshasa)', 'Costa Rica',
       "Cote d'Ivoire", 'Croatia', 'Cuba', 'Cyprus', 'Czechia', 'Denmark',
       'Diamond Princess', 'Djibouti', 'Dominica', 'Dominican Republic',
       'Ecuador', 'Egypt', 'El Salvador', 'Equatorial Guinea', 'Eritrea',
       'Estonia', 'Eswatini', 'Ethiopia', 'Fiji', 'Finland', 'France',
       'Gabon', 'Gambia', 'Georgia', 'Germany', 'Ghana', 'Greece',
       'Grenada', 'Guatemala', 'Guinea', 'Guine

In [0]:
#hide
def chart_countries_timeline(_df, countries, since_date='1/23/2020'):
  _df = _df[_df['Country'].isin(countries)]
  _df = _df[_df['Date'].ge(since_date)]
  selection = alt.selection_multi(fields=['Country'], bind='legend')
  chart = alt.Chart(_df).mark_line().encode(
          x='Date:T', 
          y='Cases:Q',
          #y=alt.Y('Cases:Q', scale=alt.Scale(type='log')),
          color='Country',
          tooltip=list(_df),
          opacity=alt.condition(selection,alt.value(1), alt.value(0.2))
        ).properties(width=1000).add_selection(selection)
  return chart

def chart_countries_timeline_diff(df, country):
  _df = df[df['Country'] == country]
  _df = _df[['Date', 'Cases']].set_index('Date')
  source = _df.diff().reset_index()
  chart = alt.Chart(source).mark_bar().encode(
              x='Date:T',
              y='Cases:Q' #y=alt.Y('Cases:Q', scale=alt.Scale(type='log'))
          )
  return chart

def chart_countries_cases(df, n, date_as_of=date_latest):
    bars = alt.Chart(countries_cases(df, n, date_as_of)).mark_bar().encode(
        x=alt.X('Country:O', sort='-y'),
        y='Cases:Q'
    )

    text = bars.mark_text(
        align='center',
        baseline='bottom'
    ).encode(
        text='Cases:Q'
    )
    return (bars + text).properties(width=500)

s_date_latest = date_latest.strftime('%m/%d/%Y')
n_confirmed = df_global.loc[1]['Confirmed']
n_deaths = df_global.loc[1]['Deaths']
n_recovered = df_global.loc[1]['Recovered']

df_global_diff = df_global.diff()
n_confirmed_diff = df_global_diff.loc[1]['Confirmed']
n_deaths_diff = df_global_diff.loc[1]['Deaths']
n_recovered_diff = df_global_diff.loc[1]['Recovered']

weekDays = ("Monday","Tuesday","Wednesday","Thursday","Friday","Saturday","Sunday") 

# Global

In [5]:
#hide
print(
    f'According to the official count, the coronavirus pandemic has sickened more than\n\t{n_confirmed:,} (+{n_confirmed_diff:,.0f}) people.\n'
    f'As of {weekDays[date_latest.weekday()]} evening, at least'
    f'\n\t{n_recovered:,} (+{n_recovered_diff:,.0f}) or {n_recovered/n_confirmed*100:.2f}% people have recovered and'
    f'\n\t{n_deaths:,} (+{n_deaths_diff:,.0f}) or {n_deaths/n_confirmed*100:.2f}% people have died. '
    #f'and the virus has been detected in at least 171 countries, as these maps show.'
)

print('\n\n')

df_summary90 = df_summary(90).groupby('Date').sum().diff().reset_index()
df_summary90['Closed'] = df_summary90['Deaths'] + df_summary90['Recovered']

_base = alt.Chart(df_summary90).encode(x='Date:T')
_confirmed = _base.mark_bar(
    color='red'
).encode(
    y=alt.Y('Confirmed:Q', scale=alt.Scale(type='log')),
)

_closed = _base.mark_bar(
    color='green'
).encode(
    y=alt.Y('Closed:Q', scale=alt.Scale(type='log')),
)

(_confirmed + _closed).properties(
    title = 'Global Daily New Confirmed and Closed Cases',
    width = 1000
)

According to the official count, the coronavirus pandemic has sickened more than
	1,197,405 (+101,488) people.
As of Saturday evening, at least
	246,152 (+20,356) or 20.56% people have recovered and
	64,606 (+5,819) or 5.40% people have died. 






### Overview

In [0]:
chart_confirmed_2000 = chart_countries_cases(df_confirmed, 2000).properties(title = 'Countries with over 2000 confirmed cases', width = 1500)


In [7]:
chart_deaths_500 = chart_countries_cases(df_deaths, 500).properties(title = 'Countries with over 500 deaths cases')
df_deaths_500 = chart_deaths_500.data.sort_values('Cases', ascending=False)[['Country', 'Cases']].reset_index(drop=True)
print(df_deaths_500)
print('\n')
chart_deaths_500

           Country  Cases
0            Italy  15362
1            Spain  11947
2               US   8407
3           France   7574
4   United Kingdom   4320
5             Iran   3452
6            China   3330
7      Netherlands   1656
8          Germany   1444
9          Belgium   1283
10     Switzerland    666
11          Turkey    501




In [8]:
countries_5000 = countries_cases(df_confirmed, 5000)['Country'].unique()
chart_countries_timeline(df_confirmed, countries_5000).properties(title=f'Confirmed Cases around the world (>5000 cases)')

In [9]:
chart_countries_timeline(df_deaths, countries_5000).properties(title=f'Deaths Cases around the world (with >5000 confirmed cases)')

### United States

In [10]:
df_US = df_summary(60).loc['US']
df_US

Unnamed: 0_level_0,Confirmed,Deaths,Recovered
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2020-02-05,11,0,0
2020-02-06,11,0,0
2020-02-07,11,0,0
2020-02-08,11,0,0
2020-02-09,11,0,3
2020-02-10,11,0,3
2020-02-11,12,0,3
2020-02-12,12,0,3
2020-02-13,13,0,3
2020-02-14,13,0,3


In [11]:
_chart = chart_countries_timeline(df_confirmed, ['US']).properties(title = 'Confirmed cases in United States')
_diff = chart_countries_timeline_diff(df_confirmed, 'US')

(_chart + _diff)

### Japan

In [12]:
_chart = chart_countries_timeline(df_confirmed, ['Japan']).properties(title = 'Confirmed cases in Japan')
_diff = chart_countries_timeline_diff(df_confirmed, 'Japan')

(_chart + _diff)

In [13]:
_chart = chart_countries_timeline(df_confirmed, ['Korea, South']).properties(title = 'Confirmed cases in South Korea')
_diff = chart_countries_timeline_diff(df_confirmed, 'Korea, South')

(_chart + _diff)

# South East Asia

In [14]:
sea_countries = ['Malaysia', 'Indonesia', 'Singapore', 'Thailand', 'Vietnam', 'Cambodia', 'Brunei', 'Burma', 'Philippines', 'Laos']
df_rate = lambda df: df[df['Date'] == date_latest].set_index('Country')['Cases']

df_confirmed_sea = df_confirmed[df_confirmed['Country'].isin(sea_countries)]
chart_countries_cases(df_confirmed_sea, 0).properties(title = 'Confirmed Cases in South East Asia')

In [15]:
df_deaths_sea = df_deaths[df_deaths['Country'].isin(sea_countries)]

print((df_rate(df_deaths_sea)/df_rate(df_confirmed_sea)*100.0).to_frame().rename(columns={'Cases': 'Death_Rate'}).sort_values(by='Death_Rate', ascending=False))

chart_countries_cases(df_deaths_sea, 0).properties(title = 'Death Cases in South East Asia')


             Death_Rate
Country                
Indonesia      9.130019
Burma          4.761905
Philippines    4.654169
Malaysia       1.636520
Thailand       0.967586
Brunei         0.740741
Singapore      0.504626
Cambodia       0.000000
Laos           0.000000
Vietnam        0.000000


In [16]:
df_recoverd_sea = df_recovered[df_recovered['Country'].isin(sea_countries)]

print((df_rate(df_recoverd_sea)/df_rate(df_confirmed_sea)*100.0).to_frame().rename(columns={'Cases': 'Recovered_Rate'}).sort_values(by='Recovered_Rate', ascending=False))

chart_countries_cases(df_recoverd_sea, 0).properties(title = 'Recovered Cases in South East Asia')

             Recovered_Rate
Country                    
Brunei            48.888889
Cambodia          43.859649
Vietnam           37.500000
Thailand          32.607644
Malaysia          26.270457
Singapore         24.978974
Indonesia          7.170172
Philippines        1.842275
Burma              0.000000
Laos               0.000000


In [17]:
chart_countries_timeline(df_confirmed, sea_countries, '3/1/2020').properties(title=f'Confirmed Cases in South East Asia as of {date_latest}')

In [18]:
chart_countries_timeline(df_deaths, sea_countries, '3/1/2020').properties(title='Deaths Cases in South East Asia')

### Malaysia

In [19]:
_chart = chart_countries_timeline(df_confirmed, ['Malaysia']).properties(title = 'Confirmed cases in Malaysia')
_diff = chart_countries_timeline_diff(df_confirmed, 'Malaysia')

df_confirmed_my = _chart.data
df_confirmed_my_new  = _diff.data

alt.Chart(df_confirmed_my[df_confirmed_my['Cases'] > 100]).mark_line().encode(
    x='Date:T', 
    y=alt.Y('Cases:Q', scale=alt.Scale(type='log'))
).properties( title = 'Confirmed Cases in Malaysia (> 100)')

In [20]:
#_chart = chart_countries_timeline(df_recovered, ['Malaysia']).properties(title = 'Recovered cases in Malaysia')
_diff = chart_countries_timeline_diff(df_recovered, 'Malaysia')

#df_recovered_my = _chart.data
df_recovered_my_new = _diff.data

pd.merge(df_recovered_my_new, df_confirmed_my_new, on='Date').rename(columns={'Cases_x': 'Recovered', 'Cases_y': 'Confirmed'})

dff1 = df_confirmed.rename(columns={'Cases': 'Confirmed'})
dff1['Closed'] = (df_recovered['Cases'] + df_deaths['Cases']).to_frame()

dff1_my = dff1[dff1['Country'] == 'Malaysia']
dff1_my = dff1_my[['Date', 'Confirmed', 'Closed']].set_index('Date').reset_index()

_base = alt.Chart(dff1_my).encode(x='Date:T')
_confirmed = _base.mark_line(color='red').encode(y='Confirmed:Q')
_closed = _base.mark_line(color='green').encode(y='Closed:Q')

(_confirmed + _closed).properties(title='Confirmed and Closed (Recovered or Deaths) case in Malaysia', width = 1000)

In [21]:
_chart = chart_countries_timeline(df_deaths, ['Malaysia']).properties(title = 'Deaths cases in Malaysia')
_diff = chart_countries_timeline_diff(df_deaths, 'Malaysia')

(_chart + _diff)