# Cases in Malaysia
> Updates on the respiratory illness that has infected more than one million people and killed tens of thousands.

- toc:false
- branch: master
- badges: false
- hide: false
- comments: false
- permalink:/covid-my-overview/

In [0]:
#hide
import pandas as pd
import numpy as np
import altair as alt
import ipywidgets as widgets
#from jinja2 import Template
from IPython.display import HTML

#hide
# COVID-19 Data from https://github.com/CSSEGISandData/COVID-19/tree/master/csse_covid_19_data/csse_covid_19_time_series
base_url = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data'

# confirmed, deaths, recovered
def get_time_series(type):
  _url = f'{base_url}/csse_covid_19_time_series/time_series_covid19_{type}_global.csv'
  _dff = pd.read_csv(_url)
  _cols = _dff.columns[~_dff.columns.isin(['Province/State', 'Country/Region', 'Lat', 'Long'])]
  _dff = (_dff.groupby('Country/Region')[_cols].sum().stack().reset_index(name = 'Cases')
        .rename(columns = {'level_1': 'Date', 'Country/Region': 'Country'}))
  _dff['Date'] = pd.to_datetime(_dff['Date'], format='%m/%d/%y')
  return _dff

# eg. 03-27-2020
def get_df_daily(date):
  _url = f'{base_url}/csse_covid_19_daily_reports/{date}.csv'
  return pd.read_csv(_url)

df_confirmed = get_time_series('confirmed')
df_recovered = get_time_series('recovered')
df_deaths = get_time_series('deaths')

date_latest = df_confirmed['Date'].max()

def countries_cases(df, n=5000, date_as_of=date_latest):
  df_latest = df[df['Date'].eq(date_as_of)]
  df_latest_n = df_latest[df_latest['Cases'] > n]
  return df_latest_n

# Last n days of type e.g. confirmed, recovered, deaths
df_last = lambda name, _df, n: _df[_df['Date'].isin(_df['Date'].tail(n))].set_index(['Country', 'Date']).rename(columns={'Cases': name})

# Last 2 days all global
df_summary = lambda last_n: pd.concat([df_last('Confirmed', df_confirmed, last_n), df_last('Deaths', df_deaths, last_n), df_last('Recovered', df_recovered, last_n)], axis=1)

df_global = df_summary(2).groupby(['Date']).sum().reset_index()

df_global_summary = df_global.stack().reset_index().rename(columns={'level_1': 'Type', 0: 'Cases'})

#print(date_latest)

s_date_latest = date_latest.strftime('%m/%d/%Y')
n_confirmed = df_global.loc[1]['Confirmed']
n_deaths = df_global.loc[1]['Deaths']
n_recovered = df_global.loc[1]['Recovered']

df_global_diff = df_global.diff()
n_confirmed_diff = df_global_diff.loc[1]['Confirmed']
n_deaths_diff = df_global_diff.loc[1]['Deaths']
n_recovered_diff = df_global_diff.loc[1]['Recovered']

weekDays = ("Monday","Tuesday","Wednesday","Thursday","Friday","Saturday","Sunday") 

df_my = lambda _df: _df[_df['Country'] == 'Malaysia'][['Date', 'Cases']]

df_confirmed_my = df_my(df_confirmed).rename(columns={'Cases': 'Total number of Cases', 'Date': 'Reported Date'})
df_summary_my = df_summary(2).loc['Malaysia'].reset_index(drop=True)
n_confirmed_my = df_summary_my.loc[1]['Confirmed']
n_deaths_my = df_summary_my.loc[1]['Deaths']
n_recovered_my = df_summary_my.loc[1]['Recovered']

df_summary_my_diff = df_summary_my.diff()
n_confirmed_my_diff = df_summary_my_diff.loc[1]['Confirmed']
n_deaths_my_diff = df_summary_my_diff.loc[1]['Deaths']
n_recovered_my_diff = df_summary_my_diff.loc[1]['Recovered']



## COVID-19: Malaysia at a Glance

In [2]:
#hide_input
HTML(
    f'<div style="height: 30px; width: 600px; margin: 0 auto;"><span style="font-size:0.8em;">as of {str(date_latest)}</span></div>'
    f'<div style="width: 600px; margin: 0 auto;">'
    f'<div style="float: left; width: 200px">Confirmed Cases<br/><h1>{n_confirmed_my:,}</h1>(+{n_confirmed_my_diff:,.0f})</div>'
    f'<div style="float: right; width: 200px">Deaths<br/><h1>{n_deaths_my:,}</h1>{n_deaths_my/n_confirmed_my:.2%} (+{n_deaths_my_diff:,.0f})</div>'
    f'<div style="float: right; width: 200px">Recovered<br/><h1>{n_recovered_my:,}</h1>{n_recovered_my/n_confirmed_my:.2%} (+{n_recovered_my_diff:,.0f})</div>'
    f'</div>'
)

In [3]:
#hide_input
alt.Chart(df_confirmed_my).mark_bar().encode(
    x='Reported Date:T',
    y='Total number of Cases:Q'
).properties(
    width=600,
    title='Cummulative total number of COVID-19 cases in Malaysia'
)

In [4]:
#hide_input
df_confirmed_my[['Reported Date', 'Total number of Cases']].set_index('Reported Date').rename(columns={'Total number of Cases': 'Total'}).transpose()

Reported Date,2020-01-22,2020-01-23,2020-01-24,2020-01-25,2020-01-26,2020-01-27,2020-01-28,2020-01-29,2020-01-30,2020-01-31,2020-02-01,2020-02-02,2020-02-03,2020-02-04,2020-02-05,2020-02-06,2020-02-07,2020-02-08,2020-02-09,2020-02-10,2020-02-11,2020-02-12,2020-02-13,2020-02-14,2020-02-15,2020-02-16,2020-02-17,2020-02-18,2020-02-19,2020-02-20,2020-02-21,2020-02-22,2020-02-23,2020-02-24,2020-02-25,2020-02-26,2020-02-27,2020-02-28,2020-02-29,2020-03-01,2020-03-02,2020-03-03,2020-03-04,2020-03-05,2020-03-06,2020-03-07,2020-03-08,2020-03-09,2020-03-10,2020-03-11,2020-03-12,2020-03-13,2020-03-14,2020-03-15,2020-03-16,2020-03-17,2020-03-18,2020-03-19,2020-03-20,2020-03-21,2020-03-22,2020-03-23,2020-03-24,2020-03-25,2020-03-26,2020-03-27,2020-03-28,2020-03-29,2020-03-30,2020-03-31,2020-04-01,2020-04-02,2020-04-03,2020-04-04,2020-04-05,2020-04-06,2020-04-07,2020-04-08,2020-04-09
Total,0,0,0,3,4,4,4,7,8,8,8,8,8,10,12,12,12,16,16,18,18,18,19,19,22,22,22,22,22,22,22,22,22,22,22,22,23,23,25,29,29,36,50,50,83,93,99,117,129,149,149,197,238,428,566,673,790,900,1030,1183,1306,1518,1624,1796,2031,2161,2320,2470,2626,2766,2908,3116,3333,3483,3662,3793,3963,4119,4228


In [5]:
#hide_input
df_reported_my = df_confirmed_my[['Reported Date', 'Total number of Cases']].set_index('Reported Date').diff().rename(columns={'Total number of Cases': 'Reported Cases'})
alt.Chart(df_reported_my.reset_index()).mark_bar().encode(
    x='Reported Date:T',
    y='Reported Cases:Q'
).properties(
    title = 'Number of Reported Cases in Malaysia each day',
    width = 600
)

In [6]:
#hide_input
df_reported_my.transpose()

Reported Date,2020-01-22,2020-01-23,2020-01-24,2020-01-25,2020-01-26,2020-01-27,2020-01-28,2020-01-29,2020-01-30,2020-01-31,2020-02-01,2020-02-02,2020-02-03,2020-02-04,2020-02-05,2020-02-06,2020-02-07,2020-02-08,2020-02-09,2020-02-10,2020-02-11,2020-02-12,2020-02-13,2020-02-14,2020-02-15,2020-02-16,2020-02-17,2020-02-18,2020-02-19,2020-02-20,2020-02-21,2020-02-22,2020-02-23,2020-02-24,2020-02-25,2020-02-26,2020-02-27,2020-02-28,2020-02-29,2020-03-01,2020-03-02,2020-03-03,2020-03-04,2020-03-05,2020-03-06,2020-03-07,2020-03-08,2020-03-09,2020-03-10,2020-03-11,2020-03-12,2020-03-13,2020-03-14,2020-03-15,2020-03-16,2020-03-17,2020-03-18,2020-03-19,2020-03-20,2020-03-21,2020-03-22,2020-03-23,2020-03-24,2020-03-25,2020-03-26,2020-03-27,2020-03-28,2020-03-29,2020-03-30,2020-03-31,2020-04-01,2020-04-02,2020-04-03,2020-04-04,2020-04-05,2020-04-06,2020-04-07,2020-04-08,2020-04-09
Reported Cases,,0.0,0.0,3.0,1.0,0.0,0.0,3.0,1.0,0.0,0.0,0.0,0.0,2.0,2.0,0.0,0.0,4.0,0.0,2.0,0.0,0.0,1.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,2.0,4.0,0.0,7.0,14.0,0.0,33.0,10.0,6.0,18.0,12.0,20.0,0.0,48.0,41.0,190.0,138.0,107.0,117.0,110.0,130.0,153.0,123.0,212.0,106.0,172.0,235.0,130.0,159.0,150.0,156.0,140.0,142.0,208.0,217.0,150.0,179.0,131.0,170.0,156.0,109.0


In [12]:
#hide_input
df_closed_my = pd.merge(df_my(df_recovered), df_my(df_deaths), on='Date').rename(columns={'Cases_x': 'Recovered', 'Cases_y': 'Deaths'})
df_closed_my['Total'] = df_closed_my['Recovered'] + df_closed_my['Deaths']
df_closed_my['Recovered_Rate'] = df_closed_my['Recovered']/ df_closed_my['Total']
df_closed_my['Deaths_Rate'] = df_closed_my['Deaths']/ df_closed_my['Total']
_base = alt.Chart(df_closed_my.round(6)[['Date', 'Recovered_Rate', 'Deaths_Rate']]).encode(x='Date:T')
_recovered = _base.mark_line(color='green').encode(y=alt.Y('Recovered_Rate:Q', axis=alt.Axis(format='%')))
_deaths = _base.mark_line(color='red').encode(y=alt.Y('Deaths_Rate:Q', axis=alt.Axis(format='%')))

(_recovered + _deaths).properties(title='Outcome of Total Closed Case (Recovery Rate vs Death Rate)', width=600)

In [8]:
#hide_input
df_closed_my[['Date', 'Recovered_Rate', 'Deaths_Rate']].transpose()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78
Date,2020-01-22,2020-01-23,2020-01-24,2020-01-25,2020-01-26,2020-01-27,2020-01-28,2020-01-29,2020-01-30,2020-01-31,2020-02-01,2020-02-02,2020-02-03,2020-02-04,2020-02-05,2020-02-06,2020-02-07 00:00:00,2020-02-08 00:00:00,2020-02-09 00:00:00,2020-02-10 00:00:00,2020-02-11 00:00:00,2020-02-12 00:00:00,2020-02-13 00:00:00,2020-02-14 00:00:00,2020-02-15 00:00:00,2020-02-16 00:00:00,2020-02-17 00:00:00,2020-02-18 00:00:00,2020-02-19 00:00:00,2020-02-20 00:00:00,2020-02-21 00:00:00,2020-02-22 00:00:00,2020-02-23 00:00:00,2020-02-24 00:00:00,2020-02-25 00:00:00,2020-02-26 00:00:00,2020-02-27 00:00:00,2020-02-28 00:00:00,2020-02-29 00:00:00,2020-03-01 00:00:00,2020-03-02 00:00:00,2020-03-03 00:00:00,2020-03-04 00:00:00,2020-03-05 00:00:00,2020-03-06 00:00:00,2020-03-07 00:00:00,2020-03-08 00:00:00,2020-03-09 00:00:00,2020-03-10 00:00:00,2020-03-11 00:00:00,2020-03-12 00:00:00,2020-03-13 00:00:00,2020-03-14 00:00:00,2020-03-15 00:00:00,2020-03-16 00:00:00,2020-03-17 00:00:00,2020-03-18 00:00:00,2020-03-19 00:00:00,2020-03-20 00:00:00,2020-03-21 00:00:00,2020-03-22 00:00:00,2020-03-23 00:00:00,2020-03-24 00:00:00,2020-03-25 00:00:00,2020-03-26 00:00:00,2020-03-27 00:00:00,2020-03-28 00:00:00,2020-03-29 00:00:00,2020-03-30 00:00:00,2020-03-31 00:00:00,2020-04-01 00:00:00,2020-04-02 00:00:00,2020-04-03 00:00:00,2020-04-04 00:00:00,2020-04-05 00:00:00,2020-04-06 00:00:00,2020-04-07 00:00:00,2020-04-08 00:00:00,2020-04-09 00:00:00
Recovered_Rate,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0.960784,0.967742,0.974026,0.966667,0.966102,0.932886,0.908497,0.919598,0.908676,0.903361,0.908772,0.92219,0.917258,0.928295,0.925862,0.934783,0.9388,0.939773,0.941358,0.942777,0.952417,0.95448,0.958119,0.96
Deaths_Rate,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0392157,0.0322581,0.025974,0.0333333,0.0338983,0.0671141,0.0915033,0.080402,0.0913242,0.0966387,0.0912281,0.0778098,0.0827423,0.0717054,0.0741379,0.0652174,0.0611995,0.0602273,0.058642,0.0572233,0.0475825,0.0455202,0.0418814,0.04


In [9]:
#hide
df_states = pd.read_csv("https://docs.google.com/spreadsheets/d/e/2PACX-1vTzT9vUJNiKV2yN4sb_VvxKcq-B2triWGPE74rfUT4XOsF-5qsB1tM6OfMPVKiRHX95tE9tPubdTbxY/pub?gid=1726267961&single=true&output=csv", parse_dates=['Date'])
df_states.set_index('Date').sum(axis=1)

Date
3/27    130
3/28    159
3/29    150
3/30    156
3/31    140
4/1     142
4/2     208
4/3     217
4/4     150
4/5     179
4/6     131
4/7     170
4/8     156
dtype: int64

In [40]:
#hide
df_districts = pd.read_csv("https://docs.google.com/spreadsheets/d/e/2PACX-1vTzT9vUJNiKV2yN4sb_VvxKcq-B2triWGPE74rfUT4XOsF-5qsB1tM6OfMPVKiRHX95tE9tPubdTbxY/pub?gid=1667946793&single=true&output=csv")
df_districts = df_districts.set_index(['Districts', 'State']).transpose().tail(2)
df_districts_new = df_districts.diff().tail(1).transpose()
df_districts_new = df_districts_new.sort_values(by=df_districts_new.columns[0], ascending=False).head(10)
df_districts_new[df_districts_new[df_districts_new.columns[0]] > 0]

Unnamed: 0_level_0,Unnamed: 1_level_0,4/8
Districts,State,Unnamed: 2_level_1
Hulu Langat,SGR,16.0
Petaling,SGR,12.0
Johor Bahru,JHR,12.0
Jasin,MLK,11.0
Klang,SGR,11.0
Kuching,SRW,10.0
Hulu Selangor,SGR,7.0
Seremban,NSN,6.0
Gombak,SGR,6.0
Lembah Pantai,KUL,5.0
