In [83]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import plotly.graph_objects as go
from datetime import datetime
from plotly.subplots import make_subplots
from sklearn import linear_model

# Introduction

In this notebook, we aim to observe and analyze the trend of Covid-19 cases, deaths, vaccinations and other related information by cleaning, processing and visualizing the datasets.
The analysis will begin from worldwide, and slowly narrowing down our scope to ASEAN countries and finally Malaysia.
There will also be a prediction of vaccination rates of Malaysia near the end of the notebook

# [Worldwide]

## [Worldwide] Importing datasets
Dataset used is from [JHU CSSE COVID-19 Dataset](https://github.com/CSSEGISandData/COVID-19)

Dataset used and imported are:
1. confirmed_global.csv
2. deaths_global.csv
3. recovered_global.csv

In [84]:
# worldwide confirmed case in time series
df_confirmed = pd.read_csv("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv")
df_confirmed.sample(5)

Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,12/19/21,12/20/21,12/21/21,12/22/21,12/23/21,12/24/21,12/25/21,12/26/21,12/27/21,12/28/21
60,Chongqing,China,30.0572,107.874,6,9,27,57,75,110,...,610,610,610,610,610,610,610,610,611,611
166,,Lebanon,33.8547,35.8623,0,0,0,0,0,0,...,700943,701749,703555,705477,707123,709242,711259,712570,713670,715950
77,Liaoning,China,41.2956,122.6085,2,3,4,17,21,27,...,792,792,792,792,793,793,793,793,793,793
157,,Kazakhstan,48.0196,66.9237,0,0,0,0,0,0,...,1066934,1067309,1067811,1068363,1068899,1069409,1069864,1070215,1070569,1071035
161,,Kosovo,42.602636,20.902977,0,0,0,0,0,0,...,161262,161265,161274,161294,161298,161311,161327,161339,161343,161356


In [85]:
#deaths in time series
df_deaths = pd.read_csv("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv")
df_deaths.sample(5)

Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,12/19/21,12/20/21,12/21/21,12/22/21,12/23/21,12/24/21,12/25/21,12/26/21,12/27/21,12/28/21
106,,Djibouti,11.8251,42.5903,0,0,0,0,0,0,...,189,189,189,189,189,189,189,189,189,189
243,,Switzerland,46.8182,8.2275,0,0,0,0,0,0,...,11913,11953,11981,12002,12057,12065,12064,12065,12114,12152
155,,Japan,36.204824,138.252924,0,0,0,0,0,0,...,18375,18376,18378,18380,18382,18382,18383,18383,18383,18385
197,,Netherlands,52.1326,5.2913,0,0,0,0,0,0,...,20468,20504,20534,20581,20621,20661,20705,20716,20728,20803
93,,Comoros,-11.6455,43.3333,0,0,0,0,0,0,...,151,152,152,152,152,153,153,153,153,154


In [86]:
#recovered case in time series
df_recovered = pd.read_csv("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv")
df_recovered.sample(5)

Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,12/19/21,12/20/21,12/21/21,12/22/21,12/23/21,12/24/21,12/25/21,12/26/21,12/27/21,12/28/21
59,Jiangsu,China,32.9711,119.455,0,0,0,1,1,1,...,0,0,0,0,0,0,0,0,0,0
30,,Brazil,-14.235,-51.9253,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
71,Tianjin,China,39.3054,117.323,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
18,,Bahamas,25.025885,-78.035889,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
141,,Jordan,31.24,36.51,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


## [Worldwide] Data Cleaning
We will perform data cleaning for each of the dataframe

To ease the process, we will write a function to do the followings:

1. Replace missing values (NA/NAN)
2. Drop columns that are not required
3. Group Province/State into single country
4. Adding a status column

In [87]:
#Checking for missing values for each df
print("Missing values for df_confirmed: \n" + str(df_confirmed.isna().sum()) + "\n")
print("Missing values for df_deaths: \n" + str(df_deaths.isna().sum()) + "\n")
print("Missing values for df_recovered: \n" + str(df_recovered.isna().sum()))

Missing values for df_confirmed: 
Province/State    193
Country/Region      0
Lat                 2
Long                2
1/22/20             0
                 ... 
12/24/21            0
12/25/21            0
12/26/21            0
12/27/21            0
12/28/21            0
Length: 711, dtype: int64

Missing values for df_deaths: 
Province/State    193
Country/Region      0
Lat                 2
Long                2
1/22/20             0
                 ... 
12/24/21            0
12/25/21            0
12/26/21            0
12/27/21            0
12/28/21            0
Length: 711, dtype: int64

Missing values for df_recovered: 
Province/State    194
Country/Region      0
Lat                 1
Long                1
1/22/20             0
                 ... 
12/24/21            0
12/25/21            0
12/26/21            0
12/27/21            0
12/28/21            0
Length: 711, dtype: int64


In [88]:
#function to clean data and return a cleaned df
def clean_data(df, status):
  df = df.fillna("") #replace NaN value 
  df = df.drop(["Province/State", "Lat", "Long"], axis = 1) #drop unecessary columns
  df = df.rename(columns = {"Country/Region": "Country"}) #rename column name to just country
  df = df.groupby(["Country"], sort = True).sum() #merge countries together (previously seperated by province/state)
  df.insert(0, "Status", status, allow_duplicates = True) #add a Status column
  df = df.reset_index().set_index(["Country", "Status"]) #make country and status column a pair index
  return df

In [89]:
#cleaning df_confirmed
df_confirmed = clean_data(df_confirmed, "Confirmed")

#cleaning df_deaths
df_deaths = clean_data(df_deaths, "Deaths")

#cleaning df_recovered
df_recovered = clean_data(df_recovered, "Recovered")

#check again for missing values
print("Missing values for df_confirmed: \n" + str(df_confirmed.isna().sum()) + "\n")
print("Missing values for df_deaths: \n" + str(df_deaths.isna().sum()) + "\n")
print("Missing values for df_recovered: \n" + str(df_recovered.isna().sum()))

Missing values for df_confirmed: 
1/22/20     0
1/23/20     0
1/24/20     0
1/25/20     0
1/26/20     0
           ..
12/24/21    0
12/25/21    0
12/26/21    0
12/27/21    0
12/28/21    0
Length: 707, dtype: int64

Missing values for df_deaths: 
1/22/20     0
1/23/20     0
1/24/20     0
1/25/20     0
1/26/20     0
           ..
12/24/21    0
12/25/21    0
12/26/21    0
12/27/21    0
12/28/21    0
Length: 707, dtype: int64

Missing values for df_recovered: 
1/22/20     0
1/23/20     0
1/24/20     0
1/25/20     0
1/26/20     0
           ..
12/24/21    0
12/25/21    0
12/26/21    0
12/27/21    0
12/28/21    0
Length: 707, dtype: int64


## [Worldwide] Data Processing

We can now use the cleaned data to calculate total cases/deaths/recovery worldwide using sum.

Then, we can combine all the dataframes together grouping by the countries, and transform the columns into datetime format

In [90]:
#add a row for worldwide by summing each column to show global confirmed case
df_confirmed = df_confirmed.append(df_confirmed.sum(numeric_only=True).rename(('Worldwide', 'Confirmed'))) 

#add a row for worldwide by summing each column to show global death case
df_deaths = df_deaths.append(df_deaths.sum(numeric_only=True).rename(('Worldwide', 'Deaths')))

#add a row for worldwide by summing each column to show global confirmed case
df_recovered = df_recovered.append(df_recovered.sum(numeric_only=True).rename(('Worldwide', 'Recovered'))) 

In [91]:
#Combining the three dfs using "Country" and "Status" as key
df_combined = pd.concat([df_confirmed,df_deaths,df_recovered]).sort_values(by=["Country", "Status"])

#turning the columns into datetime format that can be interpreted easier
df_combined.columns = pd.to_datetime(df_combined.columns)
df_combined.tail(21)

Unnamed: 0_level_0,Unnamed: 1_level_0,2020-01-22,2020-01-23,2020-01-24,2020-01-25,2020-01-26,2020-01-27,2020-01-28,2020-01-29,2020-01-30,2020-01-31,...,2021-12-19,2021-12-20,2021-12-21,2021-12-22,2021-12-23,2021-12-24,2021-12-25,2021-12-26,2021-12-27,2021-12-28
Country,Status,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
Venezuela,Confirmed,0,0,0,0,0,0,0,0,0,0,...,441562,441562,441937,442178,442431,442999,442999,443332,443332,443983
Venezuela,Deaths,0,0,0,0,0,0,0,0,0,0,...,5285,5285,5291,5299,5306,5312,5312,5314,5314,5319
Venezuela,Recovered,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Vietnam,Confirmed,0,2,2,2,2,2,2,2,2,2,...,1540478,1555455,1571780,1588335,1604712,1620869,1636455,1651673,1666545,1680985
Vietnam,Deaths,0,0,0,0,0,0,0,0,0,0,...,29566,29791,30041,30251,30531,30766,31007,31214,31418,31632
Vietnam,Recovered,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
West Bank and Gaza,Confirmed,0,0,0,0,0,0,0,0,0,0,...,465094,466669,467058,467391,467682,467682,467682,467682,468619,469452
West Bank and Gaza,Deaths,0,0,0,0,0,0,0,0,0,0,...,4855,4870,4875,4882,4884,4884,4884,4884,4907,4912
West Bank and Gaza,Recovered,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Worldwide,Confirmed,557,655,941,1434,2118,2927,5578,6167,8235,9927,...,274751745,275504086,276293870,277194688,278177141,279011244,279523785,279996000,281486304,282800017


## [Worldwide] Visualizing Dataset on Worldwide Trend

Here, we will plot necessary graphs to be used in the insights section later

There are three graphs that are plotted:
1. Worldwide confirmed, deaths, and recovered cases.
2. Worldwide Confirmed vs Death Cases.
3. Worldwide Confirmed vs Recovered Cases

In [92]:
fig = go.FigureWidget(data = [
                              go.Bar(name = "Confirmed", x = df_combined.columns, y = df_combined.loc["Worldwide", "Confirmed"], marker_color = "blue"),
                              go.Bar(name = "Recovered", x = df_combined.columns, y = df_combined.loc["Worldwide", "Recovered"], marker_color = "green"),
                              go.Bar(name = "Deaths", x = df_combined.columns, y = df_combined.loc["Worldwide", "Deaths"], marker_color = "red"),
                              ],
                              layout = go.Layout(plot_bgcolor = "#EEEEEE"))
fig.update_yaxes(title='Cases')
fig.update_layout(title_text = "Worldwide Confirmed, Death and Recovered Cases")
fig.update_layout(barmode='stack')

fig_confirmed_deaths = make_subplots(specs=[[{"secondary_y" : True}]])
fig_confirmed_deaths.add_trace(go.Bar(x = df_combined.columns, y = df_combined.loc["Worldwide", "Confirmed"], name = "Confirmed"), secondary_y = False)
fig_confirmed_deaths.add_trace(go.Scatter(x = df_combined.columns, y = df_combined.loc["Worldwide", "Deaths"], name = "Deaths"), secondary_y = True)
fig_confirmed_deaths.update_layout(title_text = "Worldwide Confirmed Cases vs Death Cases")
fig_confirmed_deaths.update_yaxes(title_text="Confirmed Cases", secondary_y=False)
fig_confirmed_deaths.update_yaxes(title_text="Death Cases", secondary_y=True)

fig_confirmed_recovered = make_subplots(specs=[[{"secondary_y" : True}]])
fig_confirmed_recovered.add_trace(go.Bar(x = df_combined.columns, y = df_combined.loc["Worldwide", "Confirmed"], name = "Confirmed"), secondary_y = False)
fig_confirmed_recovered.add_trace(go.Scatter(x = df_combined.columns, y = df_combined.loc["Worldwide", "Recovered"], name = "Recovered"), secondary_y = True)
fig_confirmed_recovered.update_layout(title_text = "Worldwide Confirmed Cases vs Recovered Cases")
fig_confirmed_recovered.update_yaxes(title_text="Confirmed Cases", secondary_y=False)
fig_confirmed_recovered.update_yaxes(title_text="Recovered Cases", secondary_y=True)

## [Worldwide] Insights for Worldwide Trend

In [93]:
fig.show()

From the graph plotted, we can observe the global trend of the covid-19 confirmed, death, and recovered cases. It is observed that the global confirmed, death and also recovered case grows linearly.
It is also observed that the recovered cases have stopped being tracked since around August 2021.

In [94]:
fig_confirmed_deaths.show()

The pattern of confirmed and death cases are almost identical. However, right about December 2021, the death rate per confirmed case is starting to show sign of slowing down as we observes the gap between the confirmed and deaths are closing down.

In [95]:
fig_confirmed_recovered.show()

The same can be said to the recovered case where the trend of both confirmed and recovered case are identical, however, since the data set stopped keep track of the recovered case at around early-august, we cannot conclude the trend after the date. 

## [Worldwide] Visualizing Extended Observations on Worldwide Confirmed versus Death Rate 

We can observe the confirmed versus death rates for a few notable countries that has either very low or very high death rate

We will plot some graphs to observe confirmed versus death rates
1. United Kingdom Confirmed vs Death Cases
2. Singapore Confirmed vs Death Cases
3. Malaysia Confirmed vs Death Cases 

In [96]:
fig_confirmed_deaths_uk = make_subplots(specs=[[{"secondary_y" : True}]])
fig_confirmed_deaths_uk.add_trace(go.Bar(x = df_combined.columns, y = df_combined.loc["United Kingdom", "Confirmed"], name = "Confirmed"), secondary_y = False)
fig_confirmed_deaths_uk.add_trace(go.Scatter(x = df_combined.columns, y = df_combined.loc["United Kingdom", "Deaths"], name = "Deaths"), secondary_y = True)
fig_confirmed_deaths_uk.update_layout(title_text = "United Kingdom Confirmed Cases vs Death Cases")
fig_confirmed_deaths_uk.update_yaxes(title_text="Confirmed Cases", secondary_y=False)
fig_confirmed_deaths_uk.update_yaxes(title_text="Death Cases", secondary_y=True)

fig_confirmed_deaths_sg = make_subplots(specs=[[{"secondary_y" : True}]])
fig_confirmed_deaths_sg.add_trace(go.Bar(x = df_combined.columns, y = df_combined.loc["Singapore", "Confirmed"], name = "Confirmed"), secondary_y = False)
fig_confirmed_deaths_sg.add_trace(go.Scatter(x = df_combined.columns, y = df_combined.loc["Singapore", "Deaths"], name = "Deaths"), secondary_y = True)
fig_confirmed_deaths_sg.update_layout(title_text = "Singapore Confirmed Cases vs Death Cases")
fig_confirmed_deaths_sg.update_yaxes(title_text="Confirmed Cases", secondary_y=False)
fig_confirmed_deaths_sg.update_yaxes(title_text="Death Cases", secondary_y=True)

fig_confirmed_deaths_my = make_subplots(specs=[[{"secondary_y" : True}]])
fig_confirmed_deaths_my.add_trace(go.Bar(x = df_combined.columns, y = df_combined.loc["Malaysia", "Confirmed"], name = "Confirmed"), secondary_y = False)
fig_confirmed_deaths_my.add_trace(go.Scatter(x = df_combined.columns, y = df_combined.loc["Malaysia", "Deaths"], name = "Deaths"), secondary_y = True)
fig_confirmed_deaths_my.update_layout(title_text = "Malaysia Confirmed Cases vs Death Cases")
fig_confirmed_deaths_my.update_yaxes(title_text="Confirmed Cases", secondary_y=False)
fig_confirmed_deaths_my.update_yaxes(title_text="Death Cases", secondary_y=True)

## [Wordwide] Insights on Extended Observations on Worldwide Confirmed versus Death Rate 

In [97]:
fig_confirmed_deaths_uk.show()

From the graphs, we know that countries with very high fatality rate per case like United Kingdom, as the line representing the "deaths" are way above the confirmed case.

In [98]:
fig_confirmed_deaths_sg.show()

Countries with very low fatality rate per case like Singapore has the line below the confirmed case.
However, we observed a trend that death rate per confirmed cases are rising since the surge of cases at about October 2021.

In [99]:
fig_confirmed_deaths_my.show()

For Malaysia, we had a relatively low fatility rate per cases up until around Sep of 2021, the rate has raised higher since September as the confirmed cases had a sudden surge in number, and has recently begun to slow down as of December 2021.

## [Worldwide] Importing Vaccinations Dataset

The dataset of worldwide vaccination is from [Owid](https://github.com/owid/covid-19-data/tree/master/public/data)

The dataset used is vaccinations.csv

In [100]:
# retrieving worldwide vaccination dataset
df_vac = pd.read_csv("https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/vaccinations/vaccinations.csv")
df_vac.head()

Unnamed: 0,location,iso_code,date,total_vaccinations,people_vaccinated,people_fully_vaccinated,total_boosters,daily_vaccinations_raw,daily_vaccinations,total_vaccinations_per_hundred,people_vaccinated_per_hundred,people_fully_vaccinated_per_hundred,total_boosters_per_hundred,daily_vaccinations_per_million,daily_people_vaccinated,daily_people_vaccinated_per_hundred
0,Afghanistan,AFG,2021-02-22,0.0,0.0,,,,,0.0,0.0,,,,,
1,Afghanistan,AFG,2021-02-23,,,,,,1367.0,,,,,34.0,1367.0,0.003
2,Afghanistan,AFG,2021-02-24,,,,,,1367.0,,,,,34.0,1367.0,0.003
3,Afghanistan,AFG,2021-02-25,,,,,,1367.0,,,,,34.0,1367.0,0.003
4,Afghanistan,AFG,2021-02-26,,,,,,1367.0,,,,,34.0,1367.0,0.003


## [Worldwide] Cleaning Vaccination Dataset

Here, we will clean the dataset by perform the followings to retrieve only the information we need:
1. Dropping unecessary columns
2. Replacing null values
3. Pivot table to match the time-series format as others DF.

In [101]:
#drop unecessary columns
df_vac = df_vac.drop(['iso_code',
       'people_vaccinated', 'people_fully_vaccinated', 'total_boosters',
       'daily_vaccinations_raw', 'daily_vaccinations',
       'total_vaccinations_per_hundred', 'people_vaccinated_per_hundred',
       'people_fully_vaccinated_per_hundred', 'total_boosters_per_hundred',
       'daily_vaccinations_per_million', 'daily_people_vaccinated',
       'daily_people_vaccinated_per_hundred'], axis = 1)

#Then, pivot table to match the format of global data
df_vac = df_vac.pivot(index="location", columns='date', values='total_vaccinations')

#replace NaN value 
df_vac = df_vac.fillna("") 
df_vac.head()

date,2020-12-01,2020-12-02,2020-12-03,2020-12-04,2020-12-05,2020-12-06,2020-12-07,2020-12-08,2020-12-09,2020-12-10,...,2021-12-19,2021-12-20,2021-12-21,2021-12-22,2021-12-23,2021-12-24,2021-12-25,2021-12-26,2021-12-27,2021-12-28
location,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Afghanistan,,,,,,,,,,,...,,,,,,,,,,
Africa,,,,,,,,,,,...,282887918.0,288271279.0,288850288.0,290505698.0,296343489.0,298821526.0,299044187.0,300575076.0,301777747.0,302585007.0
Albania,,,,,,,,,,,...,,,2270522.0,,2286235.0,,,,,
Algeria,,,,,,,,,,,...,,,12545356.0,,,,,,,
Andorra,,,,,,,,,,,...,,,,,,,,,,


## [Worldwide] Visualizing Worldwide Vaccinations

We can now compare the vaccinations trend to the confirmed and death cases globablly by plotting the following graphs: 

1. Overall worldwide vaccinations.
2. Worldwide vaccination vs confirmed cases.
3. Worldwide vaccination vs death cases

In [102]:
fig_world_vac = go.FigureWidget(data = [
                              go.Scatter(name = "Vaccination", x = df_vac.columns, y = df_vac.loc["World"], marker_color = "blue")
                              ],
                              layout = go.Layout(plot_bgcolor = "#EEEEEE"))
fig_world_vac.update_layout(title_text = "Worldwide Vaccinations")
fig_world_vac.update_yaxes(title_text = "Vaccination Count")

fig_vac_confirm = make_subplots(specs=[[{"secondary_y" : True}]])
fig_vac_confirm.add_trace(go.Scatter(x = df_vac.columns, y = df_vac.loc["World"], name = "Vaccinations"), secondary_y = False) 
fig_vac_confirm.add_trace(go.Scatter(x = df_combined.columns, y = df_combined.loc["Worldwide", "Confirmed"], name = "Confirmed"), secondary_y = True)
fig_vac_confirm.update_layout(title_text = "Worldwide Vaccinations vs Confirmed Cases")
fig_vac_confirm.update_yaxes(title_text="Vaccinations", secondary_y=False)
fig_vac_confirm.update_yaxes(title_text="Confirmed", secondary_y=True)

fig_vac_death = make_subplots(specs=[[{"secondary_y" : True}]])
fig_vac_death.add_trace(go.Scatter(x = df_vac.columns, y = df_vac.loc["World"], name = "Vaccinations"), secondary_y = False)
fig_vac_death.add_trace(go.Scatter(x = df_combined.columns, y = df_combined.loc["Worldwide", "Deaths"], name = "Deaths"), secondary_y = True)
fig_vac_death.update_layout(title_text = "Worldwide Vaccinations vs Death Cases")
fig_vac_death.update_yaxes(title_text="Vaccinations", secondary_y=False)
fig_vac_death.update_yaxes(title_text="Deaths", secondary_y=True)

## [Worldwide] Insights on Worldwide Vaccinations


In [103]:
fig_world_vac.show()

Generally, the global vaccinations began to speed up and grow linearly since around Mar 2021.


In [104]:
fig_vac_confirm.show()

However, since the vaccination numbers begin to grow, the confirmed cases is only ever so slightly slowed down.


In [105]:
fig_vac_death.show()

Nonetheless, the death cases did show a relatively subtle trend of slowing down when the vaccinations reaches 5B globablly. 

# [ASEAN]

## [ASEAN] Comparison among ASEAN Countires 

The confirmed cases, death cases, and vaccinations among ASEAN countries are compared by visualizing the following graphs:

1. Confirmed Cases among ASEAN countries.
2. Death Cases among ASEAN countries.
3. Vaccinations among ASEAN countries

In [106]:
#Code here for plots here
fig_confirmed = go.FigureWidget(data = [
                              go.Scatter(name = "Malaysia", x = df_combined.columns, y = df_combined.loc["Malaysia", "Confirmed"]),
                              go.Scatter(name = "Singapore", x = df_combined.columns, y = df_combined.loc["Singapore", "Confirmed"]),
                              go.Scatter(name = "Thailand", x = df_combined.columns, y = df_combined.loc["Thailand", "Confirmed"]),
                              go.Scatter(name = "Indonesia", x = df_combined.columns, y = df_combined.loc["Indonesia", "Confirmed"]),
                              go.Scatter(name = "Philippines", x = df_combined.columns, y = df_combined.loc["Philippines", "Confirmed"]),
                              go.Scatter(name = "Laos", x = df_combined.columns, y = df_combined.loc["Laos", "Confirmed"]),
                              go.Scatter(name = "Cambodia", x = df_combined.columns, y = df_combined.loc["Cambodia", "Confirmed"]),
                              go.Scatter(name = "Brunei", x = df_combined.columns, y = df_combined.loc["Brunei", "Confirmed"]),
                              go.Scatter(name = "Vietnam", x = df_combined.columns, y = df_combined.loc["Vietnam", "Confirmed"]),
                              go.Scatter(name = "Burma", x = df_combined.columns, y = df_combined.loc["Burma", "Confirmed"]),
                              ],
                              layout = go.Layout(plot_bgcolor = "#EEEEEE"))
fig_confirmed.update_layout(title_text = "Confirmed Cases among ASEAN countries")

fig_deaths = go.FigureWidget(data = [
                              go.Scatter(name = "Malaysia", x = df_combined.columns, y = df_combined.loc["Malaysia", "Deaths"]),
                              go.Scatter(name = "Singapore", x = df_combined.columns, y = df_combined.loc["Singapore", "Deaths"]),
                              go.Scatter(name = "Thailand", x = df_combined.columns, y = df_combined.loc["Thailand", "Deaths"]),
                              go.Scatter(name = "Indonesia", x = df_combined.columns, y = df_combined.loc["Indonesia", "Deaths"]),
                              go.Scatter(name = "Philippines", x = df_combined.columns, y = df_combined.loc["Philippines", "Deaths"]),
                              go.Scatter(name = "Laos", x = df_combined.columns, y = df_combined.loc["Laos", "Deaths"]),
                              go.Scatter(name = "Cambodia", x = df_combined.columns, y = df_combined.loc["Cambodia", "Deaths"]),
                              go.Scatter(name = "Brunei", x = df_combined.columns, y = df_combined.loc["Brunei", "Deaths"]),
                              go.Scatter(name = "Vietnam", x = df_combined.columns, y = df_combined.loc["Vietnam", "Deaths"]),
                              go.Scatter(name = "Burma", x = df_combined.columns, y = df_combined.loc["Burma", "Deaths"]),
                              ],
                              layout = go.Layout(plot_bgcolor = "#EEEEEE"))
fig_deaths.update_layout(title_text = "Death Cases among ASEAN countries")

fig_vac = go.FigureWidget(data = [
                              go.Scatter(x = df_vac.columns, y = df_vac.loc["Malaysia"], name = "Malaysia"),
                              go.Scatter(x = df_vac.columns, y = df_vac.loc["Singapore"], name = "Singapore"),
                              go.Scatter(x = df_vac.columns, y = df_vac.loc["Thailand"], name = "Thailand"),
                              go.Scatter(x = df_vac.columns, y = df_vac.loc["Indonesia"], name = "Indonesia"),
                              go.Scatter(x = df_vac.columns, y = df_vac.loc["Philippines"], name = "Philippines"),
                              go.Scatter(x = df_vac.columns, y = df_vac.loc["Laos"], name = "Laos"),
                              go.Scatter(x = df_vac.columns, y = df_vac.loc["Cambodia"], name = "Cambodia"),
                              go.Scatter(x = df_vac.columns, y = df_vac.loc["Brunei"], name = "Brunei"),
                              go.Scatter(x = df_vac.columns, y = df_vac.loc["Vietnam"], name = "Vietnam"),
                              go.Scatter(x = df_vac.columns, y = df_vac.loc["Myanmar"], name = "Burma")
                              ],
                              layout = go.Layout(plot_bgcolor = "#EEEEEE"))
fig_vac.update_layout(title_text = "Vaccinations among ASEAN countries")

FigureWidget({
    'data': [{'name': 'Malaysia',
              'type': 'scatter',
              'uid': '0454ce…

## [ASEAN] Insights on Comparison among ASEAN countries.

In [107]:
fig_confirmed.show()

The trend of the confirmed cases for ASEAN countries are similar, where the cases are growing linearly before july 2021, and there's a surge of cases around Jul 2021. The increase of cases begin to slow down at around Oct 2021.

In [108]:
fig_deaths.show()

The death cases for most of the countires are growing linearly. 
However, Indonesia has a relatively huge surge in death cases following the trend of the confirmed cases at around Jul 2021.
For Malaysia, Thailand, Vietnam and Burma (Myanmar), there's also a noticable increase in cases at around Jul 2021.

In [109]:
fig_vac.show()

Most of the countries began their vaccination at around February 2021.
However, it is only after May that the countires began to speed up their vaccination rates.
As of Dec 2021, Indonesia, Vietnam, Philiphines, Thailand are still speeding up their vaccinations.
Whilst the other countries like Malaysia, Cambodia, Singapore, Laos, and Brunei has began to slow down.

# [Malaysia]

## [Malaysia] Importing Datasets

Dataset used is from [MoH Malaysia](https://github.com/MoH-Malaysia/covid19-public)

Dataset extracted are
1. Malaysia Cases
2. Malaysia Vaccination rate
3. Malaysia Death Cases
4. Malaysia Covid Tests
5. Malaysia Population
6. Malaysia Covid Cluster details
7. Malaysia States Cases
8. Malasia States Death cCases
9. Malaysia Vaccination rate
10. Malaysia MySejahtera Checkins
11. Malaysia States MySejahtera Checkins
12. Malaysia Hospital Data

In [110]:
# Set URLs for respective datasets
url_cases_malaysia = "https://raw.githubusercontent.com/MoH-Malaysia/covid19-public/main/epidemic/cases_malaysia.csv"
url_vax_malaysia= "https://raw.githubusercontent.com/CITF-Malaysia/citf-public/main/vaccination/vax_malaysia.csv"
url_death_malaysia = "https://raw.githubusercontent.com/MoH-Malaysia/covid19-public/main/epidemic/deaths_malaysia.csv"
url_test_malaysia = "https://raw.githubusercontent.com/MoH-Malaysia/covid19-public/main/epidemic/tests_malaysia.csv"
url_pop = "https://raw.githubusercontent.com/MoH-Malaysia/covid19-public/main/static/population.csv"
url_cluster = "https://raw.githubusercontent.com/MoH-Malaysia/covid19-public/main/epidemic/clusters.csv"
url_cases_states = "https://raw.githubusercontent.com/MoH-Malaysia/covid19-public/main/epidemic/cases_state.csv"
url_death_states = "https://raw.githubusercontent.com/MoH-Malaysia/covid19-public/main/epidemic/deaths_state.csv"
url_vax_states = "https://github.com/CITF-Malaysia/citf-public/raw/main/vaccination/vax_state.csv"
url_liquidity_malaysia = "https://github.com/MoH-Malaysia/covid19-public/raw/main/mysejahtera/checkin_malaysia.csv"
url_liquidity_states = "https://github.com/MoH-Malaysia/covid19-public/raw/main/mysejahtera/checkin_state.csv"
url_hosp_states = "https://raw.githubusercontent.com/MoH-Malaysia/covid19-public/main/epidemic/hospital.csv"

# Obtaining the datasets into df
df_cases = pd.read_csv(url_cases_malaysia, usecols=['date','cases_new', 'cases_import', 'cases_active', 'cases_unvax', 'cases_pvax', 'cases_child', 'cases_adult', 'cases_elderly'])
df_vax = pd.read_csv(url_vax_malaysia)
df_death = pd.read_csv(url_death_malaysia, usecols=['date','deaths_new'])
df_pop = pd.read_csv(url_pop)
df_cluster = pd.read_csv(url_cluster)
df_cases_states = pd.read_csv(url_cases_states)
df_test = pd.read_csv(url_test_malaysia, usecols=['date','rtk-ag', 'pcr'])
df_death_states = pd.read_csv(url_death_states)
df_vax_states = pd.read_csv(url_vax_states, usecols=['date','state', 'cumul_full'])
df_liquidity_malaysia = pd.read_csv(url_liquidity_malaysia)
df_liquidity_states = pd.read_csv(url_liquidity_states, usecols=['date','state', 'checkins'])
df_hosp_states = pd.read_csv(url_hosp_states, usecols=['date','state', 'beds_covid', 'hosp_covid'])

In [111]:
# Obtain total population in Malaysia
pop_total = df_pop[df_pop['state'] == "Malaysia"]['pop'].values[0]

## [Malaysia] Overall Cases Trend



In [112]:
# Group by date and convert date into datetime format
df_cases['date'] = pd.to_datetime(df_cases['date'])
df_cases = df_cases.groupby(['date']).sum()
df_cases

Unnamed: 0_level_0,cases_new,cases_import,cases_active,cases_unvax,cases_pvax,cases_child,cases_adult,cases_elderly
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2020-01-25,4,4,4,4,0,0,1,0
2020-01-26,0,0,4,0,0,0,0,0
2020-01-27,0,0,4,0,0,0,0,0
2020-01-28,0,0,4,0,0,0,0,0
2020-01-29,3,3,7,3,0,1,2,0
...,...,...,...,...,...,...,...,...
2021-12-24,3528,103,46808,771,27,531,2409,420
2021-12-25,3160,156,45532,673,13,447,2131,431
2021-12-26,2778,129,44764,639,12,432,1855,370
2021-12-27,2757,147,42894,590,15,411,1854,343


In [113]:
fig = go.Figure()

fig.add_trace(go.Scatter(x=df_cases.index, y=df_cases['cases_new'],
                    mode='lines',
                    name='New Cases'))
fig.update_layout(
    title="Daily New Confirmed Covid-19 Cases",
    xaxis_title="Time",
    yaxis_title="Count")
fig.show()

Figure above shows the datily new confirmed covid-19 cases

It can be observed that there are major uptrend of number cases from July to August until it reach its peak on 26 August 2021. Since then it goes down.

In [114]:
fig = go.Figure()

fig.add_trace(go.Scatter(x=df_cases.index, y=df_cases['cases_active'],
                    mode='lines',
                    name='Active Cases'))
fig.update_layout(
    title="Active Confirmed Covid-19 Cases",
    xaxis_title="Time",
    yaxis_title="Count")
fig.show()

It can be observed that daily active cases have the same trend as daily cases with fewer fluctuation

In [115]:
fig = go.Figure()
for col in df_cases.columns[-3:]:
  fig.add_trace(go.Scatter(x=df_cases.index, y=df_cases[col],
                      mode='lines',
                      name=col))
fig.update_layout(
    title="Daily New Confirmed Covid-19 Cases for adult, child and elderly")
fig.show()

It can be observed that majority of the the covid-19 patients are adults follow with childs and elderly

## [Malaysia] Vaccination Rate

In [116]:
df_vax['date'] = pd.to_datetime(df_vax['date'])
df_vax = df_vax.groupby(['date']).sum()

# booster, full, partial
fig = go.Figure(
    data=[
          go.Bar(name="Partially Vaccinated",x=df_vax.index, y=df_vax['daily_partial']),
          go.Bar(name="Fully Vaccinated",x=df_vax.index, y=df_vax['daily_full']),
          go.Bar(name="Booster",x=df_vax.index, y=df_vax['daily_booster'])
    ]
)

fig.update_layout(
    barmode='stack',
    title="Daily Covid-19 Vaccinations in Malaysia",
    xaxis_title="Date",
    yaxis_title="Count")
fig.show()

It can be observed that before august, most of the vaccination are first dose and after augest, most of them are second dose. Since October, it can be observed that most of the vaccinations are booster vaccination. 

Therefore, we can know that majority of the people had received their first and second dose, and most of them are getting their booster dose now

In [117]:
df_vax['pfizer'] = df_vax['pfizer1'] +df_vax['pfizer2'] +df_vax['pfizer3']
df_vax['sinovac'] = df_vax['sinovac1'] +df_vax['sinovac2'] +df_vax['sinovac3']
df_vax['astra'] = df_vax['astra1'] +df_vax['astra2'] +df_vax['astra3']
df_vax['sinopharm'] = df_vax['sinopharm1'] +df_vax['sinopharm2'] +df_vax['sinopharm3']
df_vax['cansino'] = df_vax['cansino']+df_vax['cansino3']
fig = go.Figure()
for col in ['pfizer', 'sinovac', 'astra', 'sinopharm', 'cansino']:
  fig.add_trace(go.Scatter(x=df_cases.index, y=df_vax[col],
                      mode='lines',
                      name=col))
fig.update_layout(
    title="Vaccine Brands vaccinated in Malaysia")
fig.show()

It can be observed that:

1.   Before Mid of July, most of the vaccines are sinovac
2.   After mid of july, most of the vaccines are pfizer, and since then pfizer stay dominance
3.   Since mid of July, there sinovac vaccine had been reduced, therefore, we can know that Malaysia had reduce the quotation of Sinovac vaccine
4.   Overall, most of the vaccines in Malaysia are Pfizer, following with Sinovac, astra, cansino and sinopharm

## [Malaysia] Vaccination vs Confirmed Cases

In [118]:
df_cases_vax = df_cases.copy()
df_cases_vax['cumul'] = df_vax['cumul']

# Plot chart
fig = make_subplots(specs=[[{"secondary_y": True}]])
fig.add_trace(go.Scatter(x=df_cases_vax.index, y=df_cases_vax['cumul'],
                    mode='lines',
                    name="Commulative Vaccination Count"),secondary_y=False,)
fig.add_trace(go.Scatter(x=df_cases_vax.index, y=df_cases_vax['cases_new'],
                    mode='lines',
                    name='Daily New Cases'),
              secondary_y=True,)
# Set y-axes titles
fig.update_yaxes(title_text="Daily New Cases", secondary_y=False)
fig.update_yaxes(title_text="Vaccination Count", secondary_y=True)
fig.update_layout(
    title="Daily New Cases vs Cummulative Vaccinated")
fig.show()

From the chart above, it can be observed that when the vaccination count increase, after a certain amount of time, the number of daily new Covid-19 cases dropped. Therefore, there are some lag for the vaccination to works and to reduce the number of covid-19 cases by reducing infection rate.

In [119]:
df_death['date'] = pd.to_datetime(df_death['date'])
df_death = df_death.groupby(['date']).sum()

fig = go.Figure()

fig.add_trace(go.Scatter(x=df_death.index, y=df_death['deaths_new'],
                    mode='lines',
                    name='Death Cases'))
fig.update_layout(
    title="Covid-19 Death Cases in Malaysia",
    xaxis_title="Time",
    yaxis_title="Number of Death Cases")
fig.show()

From the chart above, it can be observed that there are major uptrend in the death cases from July to September 2021. 

## [Malaysia] Death Cases vs Vaccinations

In [120]:
df_death_vax = df_death.copy()
df_death_vax['cumul'] = df_vax['cumul']

# Plot chart
fig = make_subplots(specs=[[{"secondary_y": True}]])
fig.add_trace(go.Scatter(x=df_death_vax.index, y=df_death_vax['deaths_new'],
                    mode='lines',
                    name='Daily New Death Cases'),
              secondary_y=True,)
fig.add_trace(go.Scatter(x=df_death_vax.index, y=df_death_vax['cumul'],
                    mode='lines',
                    name="Commulative Vaccination Count"),secondary_y=False,)
# Set y-axes titles
fig.update_yaxes(title_text="Daily New Death Cases", secondary_y=False)
fig.update_yaxes(title_text="Vaccination Count", secondary_y=True)
fig.update_layout(
    title="Daily New Death Cases vs Cummulative Vaccinated")
fig.show()

We can obseve that when commulative vaccination count reach the treshold of about 42 million in this case, there are a major downtrend in death cases. Therefore, it can prove that the statement by WHO which stated Covid-19 vaccination reduce death probability is actually valid

## [Malaysia] Confirmed vs Death Cases

In [121]:
df_death_cases = df_cases.copy()
df_death_cases['deaths_new'] = df_death['deaths_new']

# Plot chart
fig = make_subplots(specs=[[{"secondary_y": True}]])
fig.add_trace(go.Scatter(x=df_death_cases.index, y=df_death_cases['deaths_new'],
                    mode='lines',
                    name='Daily New Death Cases'),
              secondary_y=True,)
fig.add_trace(go.Scatter(x=df_death_cases.index, y=df_death_cases['cases_new'],
                    mode='lines',
                    name="Daily New Cases"),secondary_y=False,)
# Set y-axes titles
fig.update_yaxes(title_text="Daily New Death Cases", secondary_y=False)
fig.update_yaxes(title_text="Daily New Cases", secondary_y=True)
fig.update_layout(
    title="Daily New Death Cases vs Daily New Confirmed Cases")
fig.show()

It can be observed that covid-19 cases have strong correlation with covid-19 death cases as the trend highly identical. Therefore, we can know that when covid-19 cases increase, death cases will increase too.

## [Malaysia] Confirmed Cases vs Liquidity

In [122]:
df_liquidity_malaysia['date'] = pd.to_datetime(df_liquidity_malaysia['date'])
df_liquidity_malaysia = df_liquidity_malaysia.set_index('date')

df_case_liq = df_cases.copy()
df_case_liq['checkins'] = df_liquidity_malaysia['checkins'] 
df_case_liq = df_case_liq[df_case_liq['checkins'].notna()]

fig = make_subplots(specs=[[{"secondary_y": True}]])
fig.add_trace(go.Scatter(x=df_case_liq.index, y=df_case_liq['checkins'],
                    mode='lines',
                    name='Number of Checkins'),
              secondary_y=True,)
fig.add_trace(go.Scatter(x=df_case_liq.index, y=df_case_liq['cases_new'],
                    mode='lines',
                    name="Daily New Cases"),secondary_y=False,)
# Set y-axes titles
fig.update_yaxes(title_text="Number of Checkins", secondary_y=True)
fig.update_yaxes(title_text="Daily New Cases", secondary_y=False)
fig.update_layout(
    title="Liquidity vs Daily New Confirmed Cases")
fig.show()

Liquidity is represented by the number of checkins based on MySejahtera which represent how often people go out from their house to other places. It can be observed that there are trend that represent there are correlation between liquidity and daily new Covid-19 cases with some time lag. We can observe that there are huge humber of checkins in around April and about a month later the number of cases increase and following with another major increases of cases. Therefore, we can hypothesized that higher liquidity (ignoring vaccination), could lead to increasing number of cases after a certain duration

## [Malaysia] Positivity Rate vs Vaccination Rate

In [123]:
df_test['date'] = pd.to_datetime(df_test['date'])
df_test = df_test.groupby(['date']).sum()
df_test['test_new'] = df_test['rtk-ag'] + df_test['pcr']
df_test_cases = df_cases.copy()
df_test_cases['cumul'] = df_vax['cumul']
df_test_cases['test_new'] = df_test['test_new']
df_test_cases

Unnamed: 0_level_0,cases_new,cases_import,cases_active,cases_unvax,cases_pvax,cases_child,cases_adult,cases_elderly,cumul,test_new
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2020-01-25,4,4,4,4,0,0,1,0,,5.0
2020-01-26,0,0,4,0,0,0,0,0,,14.0
2020-01-27,0,0,4,0,0,0,0,0,,24.0
2020-01-28,0,0,4,0,0,0,0,0,,53.0
2020-01-29,3,3,7,3,0,1,2,0,,71.0
...,...,...,...,...,...,...,...,...,...,...
2021-12-24,3528,103,46808,771,27,531,2409,420,56739472.0,100062.0
2021-12-25,3160,156,45532,673,13,447,2131,431,56765160.0,83398.0
2021-12-26,2778,129,44764,639,12,432,1855,370,56829830.0,86301.0
2021-12-27,2757,147,42894,590,15,411,1854,343,56958916.0,


In [124]:
df_test_cases['positivity_rate'] =  df_test_cases['cases_new']/df_test_cases['test_new']
df_test_cases

Unnamed: 0_level_0,cases_new,cases_import,cases_active,cases_unvax,cases_pvax,cases_child,cases_adult,cases_elderly,cumul,test_new,positivity_rate
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2020-01-25,4,4,4,4,0,0,1,0,,5.0,0.800000
2020-01-26,0,0,4,0,0,0,0,0,,14.0,0.000000
2020-01-27,0,0,4,0,0,0,0,0,,24.0,0.000000
2020-01-28,0,0,4,0,0,0,0,0,,53.0,0.000000
2020-01-29,3,3,7,3,0,1,2,0,,71.0,0.042254
...,...,...,...,...,...,...,...,...,...,...,...
2021-12-24,3528,103,46808,771,27,531,2409,420,56739472.0,100062.0,0.035258
2021-12-25,3160,156,45532,673,13,447,2131,431,56765160.0,83398.0,0.037891
2021-12-26,2778,129,44764,639,12,432,1855,370,56829830.0,86301.0,0.032190
2021-12-27,2757,147,42894,590,15,411,1854,343,56958916.0,,


In [125]:
fig = make_subplots(specs=[[{"secondary_y": True}]])
fig.add_trace(go.Scatter(x=df_test_cases.index, y=df_test_cases['positivity_rate'],
                    mode='lines',
                    name="Positivity Rate"),secondary_y=False,)
fig.add_trace(go.Bar(x=df_test_cases.index, y=df_test_cases['cumul'],opacity=0.4,
                    name='Cummulative vaccinated'),
              secondary_y=True,)
# Set y-axes titles
fig.update_yaxes(title_text="Commulative Vaccinated", secondary_y=True)
fig.update_yaxes(title_text="Positivity Rate", secondary_y=False)
fig.update_layout(
    title="Vaccination vs Positivity Rate")
fig.show()

It can be observed when the number of vaccination increases, the number of positivity rate decreases as observed around October 2021.

## [Malaysia] Vaccination Rate

In [126]:
df_vax_rate = df_vax.copy()
df_vax_rate['vax_rate'] = df_vax_rate['cumul_full']/pop_total *100

fig = go.Figure()

fig.add_trace(go.Bar(x=df_vax_rate.index, y=df_vax_rate['vax_rate'],
                    name='Vaccination Rate'))
fig.update_layout(
    title="Covid-19 Vaccination Rate in Malaysia",
    xaxis_title="Time",
    yaxis_title="Percentage")
fig.show()

In august, the vaccination rate received major increment as government is encouraging public to take their vaccination. Till 24th of December, the vaccination rate is approaching 80% in Malaysia

## [Malaysia] Prediction on time taken to reach 80% Fully vaccinated rate

In [127]:
df_vax_rate_predict = df_vax_rate.copy()
df_vax_rate['days_from_start'] = (df_vax_rate.index - df_vax_rate.index[0]).days

X = df_vax_rate['days_from_start'].values.reshape(-1, 1)
y = df_vax_rate['vax_rate'].values

model = linear_model.LinearRegression().fit(X, y)

df_vax_rate

Unnamed: 0_level_0,daily_partial,daily_full,daily,daily_partial_child,daily_full_child,daily_booster,cumul_partial,cumul_full,cumul,cumul_partial_child,...,cansino3,pending1,pending2,pending3,pfizer,sinovac,astra,sinopharm,vax_rate,days_from_start
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2021-02-24,62,3,65,0,0,0,62,3,65,0,...,0,1,0,0,62,2,0,0,0.000009,0
2021-02-25,1149,2,1151,0,0,0,1211,5,1216,0,...,0,2,0,0,1147,2,0,0,0.000015,1
2021-02-26,4067,3,4070,0,0,0,5278,8,5286,0,...,0,7,0,0,4060,2,1,0,0.000024,2
2021-02-27,6711,6,6717,0,0,0,11989,14,12003,0,...,0,18,0,0,6694,5,0,0,0.000043,3
2021-02-28,6712,5,6717,0,0,0,18701,19,18720,0,...,0,4,0,0,6709,4,0,0,0.000058,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-12-24,2249,3976,141937,808,1479,135712,25977288,25584178,56739472,2839868,...,0,0,1,1852,120731,16560,2561,178,78.341136,303
2021-12-25,520,1281,25688,99,598,23887,25977812,25585459,56765160,2839967,...,1,0,0,148,21134,3140,1147,114,78.345058,304
2021-12-26,851,1426,64670,304,465,62393,25978678,25586885,56829830,2840271,...,0,0,3,253,53948,8259,2085,107,78.349425,305
2021-12-27,3248,5145,129086,1348,2061,120693,25982068,25592030,56958916,2841619,...,32,34,2,1578,107108,17120,2840,230,78.365179,306


In [128]:
predict_list = []
for day in np.arange(df_vax_rate['days_from_start'][-1]+1,df_vax_rate['days_from_start'][-1]+50):
  predict_list.append([day])
predictions = model.predict(predict_list)
predictions

array([ 85.5136366 ,  85.85438769,  86.19513877,  86.53588986,
        86.87664095,  87.21739204,  87.55814313,  87.89889422,
        88.2396453 ,  88.58039639,  88.92114748,  89.26189857,
        89.60264966,  89.94340075,  90.28415183,  90.62490292,
        90.96565401,  91.3064051 ,  91.64715619,  91.98790728,
        92.32865836,  92.66940945,  93.01016054,  93.35091163,
        93.69166272,  94.03241381,  94.37316489,  94.71391598,
        95.05466707,  95.39541816,  95.73616925,  96.07692034,
        96.41767142,  96.75842251,  97.0991736 ,  97.43992469,
        97.78067578,  98.12142687,  98.46217795,  98.80292904,
        99.14368013,  99.48443122,  99.82518231, 100.1659334 ,
       100.50668448, 100.84743557, 101.18818666, 101.52893775,
       101.86968884])

In [129]:
datelist = pd.date_range(df_vax_rate.index[-1], periods=50)[1:].values
# df_pred
datelist
prediction_data = {
    'date': datelist,
    'vax_rate': predictions
}
df_prediction = pd.DataFrame(prediction_data).groupby(['date']).sum()
df_prediction

Unnamed: 0_level_0,vax_rate
date,Unnamed: 1_level_1
2021-12-29,85.513637
2021-12-30,85.854388
2021-12-31,86.195139
2022-01-01,86.53589
2022-01-02,86.876641
2022-01-03,87.217392
2022-01-04,87.558143
2022-01-05,87.898894
2022-01-06,88.239645
2022-01-07,88.580396


In [130]:
fig = go.Figure()

fig.add_trace(go.Scatter(x=df_vax_rate.index, y=df_vax_rate['vax_rate'],
                    name='Vaccination Rate'))
fig.add_trace(go.Scatter(x=df_prediction.index, y=df_prediction['vax_rate'],
                    name='Prediction: Vaccination Rate'))
fig.update_layout(
    title="Covid-19 Vaccination Rate in Malaysia",
    xaxis_title="Time",
    yaxis_title="Percentage")
fig.show()

Based on linear regression prediction, the vaccination rate might reach near to 100% in around February. (in the most ideal case)

## [Malaysia] Covid-19 Clusters

In [131]:
df_cluster =  df_cluster.groupby(['category']).sum()

fig = go.Figure()

fig.add_trace(go.Bar(x=df_cluster.index, y=df_cluster['cases_total'],
                    name='Number of Cases'))
fig.update_layout(
    title="Covid-19 Clusters Categories in Malaysia",
    xaxis_title="Time",
    yaxis_title="Cluster Categories in Malaysia")
fig.show()

It can be observed that most of the Covid-19 clusters are from workplace, following with community, detention centre and more. Therefore, to prevent more cluster from hapenning, workplace is the most important aspect to consider. 

## [Malaysia] Confirmed Cases per State

In [132]:
df_cases_states['date'] =pd.to_datetime(df_cases_states['date'])
df_cases_states = df_cases_states.set_index('date')

df_cases_states = df_cases_states.pivot_table(index='date', columns='state', values='cases_new')

In [133]:
fig = go.Figure()
for col in list(df_cases_states.columns):
  fig.add_trace(go.Scatter(x=df_cases_states.index, y=df_cases_states[col],
                      mode='lines',
                      name=col))
fig.update_layout(
    title="Number of Daily Covid-19 Cases in Malaysia")
fig.show()

It can be observed that Selangor have the highest daily covid-19 cases in about August with double to triple scale compare to other state. This is due to the high population in Selangor area

## [Malaysia] Death Cases per State

In [134]:
# df_death_states
df_death_states['date'] = pd.to_datetime(df_death_states['date'])
df_death_states = df_death_states.set_index('date')

df_death_states = df_death_states.pivot_table(index='date', columns='state', values='deaths_new')

In [135]:
fig = go.Figure()
for col in list(df_cases_states.columns):
  fig.add_trace(go.Scatter(x=df_death_states.index, y=df_death_states[col],
                      mode='lines',
                      name=col))
fig.update_layout(
    title="Number of Daily Covid-19 Death Cases in Malaysia")
fig.show()

Same as cases in each cases, Selongor have a high number of death rate from August to September during the period of where there are spike in Covid-19 cases and death cases in Malaysia. Therefore, we can know that Selangor is the state with most death case during that period of time

## [Malaysia] Hospital Admission Rate per State

In [136]:
df_hosp_states['date'] = pd.to_datetime(df_hosp_states['date'])
df_hosp_states = df_hosp_states.set_index('date')
df_hosp_states['hosp_admission_rate'] = df_hosp_states['hosp_covid']/df_hosp_states['beds_covid']*100
df_hosp_states=df_hosp_states.pivot_table(index='date', columns='state', values='hosp_admission_rate')

In [137]:
fig = go.Figure()
for col in list(df_hosp_states.columns):
  fig.add_trace(go.Scatter(x=df_hosp_states.index, y=df_hosp_states[col],
                      mode='lines',
                      name=col))
fig.update_layout(
    title="Hospital Admission Rate of each state in Malaysia")
fig.show()