In [None]:
import plotly.express as px
import plotly.graph_objects as go

import pandas as pd

from io import StringIO
import requests

In [None]:
url_enhanced_sur_covid_19_eng = 'https://www.chp.gov.hk/files/misc/enhanced_sur_covid_19_eng.csv'
url_large_clusters_eng = 'https://www.chp.gov.hk/files/misc/large_clusters_eng.csv'

r = requests.get(url_enhanced_sur_covid_19_eng)
df_covid = pd.read_csv(StringIO(r.text))

r = requests.get(url_large_clusters_eng)
df_cluster = pd.read_csv(StringIO(r.text))

#Cleaning data

# Remove all the capital, non capital confusion
df_covid["HK/Non-HK resident"] = df_covid["HK/Non-HK resident"].str.upper()

# Convert Date to datetime format
df_covid["Report date"] = pd.to_datetime(df_covid["Report date"], infer_datetime_format=True)

# Cannot convert due to Asymptomatic entity
# df["Date of onset"] = pd.to_datetime(df["Date of onset"], infer_datetime_format=True)

## Second file
df_cluster["Case no."] = df_cluster[["Involved case number"]].applymap(lambda x: int(x.split(",")[0]))
# df_cluster[["Involved case number count"]] = df_cluster[["Involved case number"]].applymap(lambda x: len(x.split(","))) Done

df_cluster2 = pd.merge(df_covid, df_cluster, left_on="Case no.", right_on="Case no.")[["Cluster", "Report date", "Number of cases","Case no."]]

In [None]:
df_cluster2

Unnamed: 0,Cluster,Report date,Number of cases,Case no.


# Cleansing the dataset

In [None]:
df_covid.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 15414 entries, 0 to 15413
Data columns (total 10 columns):
 #   Column                            Non-Null Count  Dtype         
---  ------                            --------------  -----         
 0   Case no.                          15414 non-null  int64         
 1   Report date                       15414 non-null  datetime64[ns]
 2   Date of onset                     15395 non-null  object        
 3   Gender                            15408 non-null  object        
 4   Age                               15408 non-null  object        
 5   Name of hospital admitted         0 non-null      float64       
 6   Hospitalised/Discharged/Deceased  15408 non-null  object        
 7   HK/Non-HK resident                15408 non-null  object        
 8   Classification*                   15408 non-null  object        
 9   Case status*                      15414 non-null  object        
dtypes: datetime64[ns](1), float64(1), int64(1), ob

In [None]:
df_cluster2.info()

<class 'pandas.core.frame.DataFrame'>
Index: 0 entries
Data columns (total 4 columns):
 #   Column           Non-Null Count  Dtype         
---  ------           --------------  -----         
 0   Cluster          0 non-null      object        
 1   Report date      0 non-null      datetime64[ns]
 2   Number of cases  0 non-null      object        
 3   Case no.         0 non-null      int64         
dtypes: datetime64[ns](1), int64(1), object(2)
memory usage: 0.0+ bytes


# Function Declaration

In [None]:
def df_column_draw_pie_chart(df, columnName, figTitle):

    df_nona = df[[columnName]].dropna()

    labels = [arr for arr in df_nona[columnName].unique()]
    values = [df_nona[columnName].value_counts()[label] for _,label in enumerate(labels)]
    fig = go.Figure(data=[go.Pie(labels=labels, 
                                    values=values, 
                                    title=figTitle,
                                )
                        ]
                    )
    return fig

def df_column_count_line_chart(df, columnName, figTitle, mode="count"):
    df_countByDate = df_covid[columnName].value_counts().sort_index()
    if mode == "count":
        fig = px.line(df_countByDate, title=figTitle)
    elif mode == "cumsum":
        fig = px.line(df_countByDate.cumsum(), title=figTitle)
    return fig

def df_column_histogram(df, columnName, figTitle):
    fig = px.histogram(df, x=columnName, title=figTitle)
    return fig

def df_bubble_chart(df, dfx, dfy, dfsize, hoverName, titleName):
    fig = px.scatter(df, x=dfx,y=dfy,
	                    size=dfsize,  
                        hover_name=hoverName, size_max=60, title=titleName)
    return fig

In [None]:
def go_df_plot(df,dfx,dfy,lineName):
    go_obj = go.Scatter(x=df[dfx].to_list(), y=df[dfy].to_list(),
                    mode='lines',
                    name=lineName)
    return go_obj

def go_df_count_plot(df,dfx,lineName,mode="count"):
    df_countByDate = df[dfx].value_counts().sort_index()
    if mode == 'cumsum':
        df_countByDate = df_countByDate.cumsum()
    go_obj = go.Scatter(x=df_countByDate.index.to_list(),y=df_countByDate.to_list(),
                    mode='lines',
                    name=lineName)
    return go_obj

def go_df_bubble_chart(df,dfx,dfy,dfsize,dfhover, lineName):
    go_obj = go.Scatter(
        x=df[dfx].to_list(), y=df[dfy].to_list(),
        mode='markers',
        marker=dict(
            size=df[dfsize].to_list(),
            sizemode='area',
            sizeref=0.2,
            sizemin=1,
        ),
        hovertext=df[dfhover].to_list(),
        name=lineName,
    )
    return go_obj



# Application

## Total of the Infected

In [None]:
df_covid.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 15414 entries, 0 to 15413
Data columns (total 10 columns):
 #   Column                            Non-Null Count  Dtype         
---  ------                            --------------  -----         
 0   Case no.                          15414 non-null  int64         
 1   Report date                       15414 non-null  datetime64[ns]
 2   Date of onset                     15395 non-null  object        
 3   Gender                            15408 non-null  object        
 4   Age                               15408 non-null  object        
 5   Name of hospital admitted         0 non-null      float64       
 6   Hospitalised/Discharged/Deceased  15408 non-null  object        
 7   HK/Non-HK resident                15408 non-null  object        
 8   Classification*                   15408 non-null  object        
 9   Case status*                      15414 non-null  object        
dtypes: datetime64[ns](1), float64(1), int64(1), ob

## Ratio of Male and Female

In [None]:
df_nona = df_covid[['Gender']].dropna()
df_nona

Unnamed: 0,Gender
0,M
1,M
2,F
3,F
4,M
...,...
15409,F
15410,M
15411,F
15412,M


In [None]:
df_column_draw_pie_chart(df_covid,'Gender', 'Gender of the infected').show()


## Citizenship

In [None]:
df_column_draw_pie_chart(df_covid, 'HK/Non-HK resident', 'Origin of the infected').show()

# Classification of the cases in Hong Kong

In [None]:
df_covid

Unnamed: 0,Case no.,Report date,Date of onset,Gender,Age,Name of hospital admitted,Hospitalised/Discharged/Deceased,HK/Non-HK resident,Classification*,Case status*
0,1,2020-01-23,21/01/2020,M,39,,Discharged,NON-HK RESIDENT,Imported case,Confirmed
1,2,2020-01-23,18/01/2020,M,56,,Discharged,HK RESIDENT,Imported case,Confirmed
2,3,2020-01-24,20/01/2020,F,62,,Discharged,NON-HK RESIDENT,Imported case,Confirmed
3,4,2020-01-24,23/01/2020,F,62,,Discharged,NON-HK RESIDENT,Imported case,Confirmed
4,5,2020-01-24,23/01/2020,M,63,,Discharged,NON-HK RESIDENT,Imported case,Confirmed
...,...,...,...,...,...,...,...,...,...,...
15409,15410,2022-02-06,04/02/2022,F,16,,Pending admission,HK RESIDENT,Locally acquired case,Confirmed
15410,15411,2022-02-06,04/02/2022,M,35,,Hospitalised,HK RESIDENT,Locally acquired case,Confirmed
15411,15412,2022-02-06,03/02/2022,F,14,,Hospitalised,HK RESIDENT,Locally acquired case,Confirmed
15412,15413,2022-02-06,31/01/2022,M,48,,Pending admission,HK RESIDENT,Locally acquired case,Confirmed


In [None]:
df_column_draw_pie_chart(df_covid, 'Classification*', 'Classification of the cases').show()

## Hisogram of the infected age

In [None]:
df_column_histogram(df_covid, "Age", "Histogram of the infected age")

## Number of infected case

In [None]:
df_column_count_line_chart(df_covid, 'Report date', 'Infected People Daily')

In [None]:
df_column_count_line_chart(df_covid, 'Report date', 'Infected People Daily', 'cumsum')

## Clustering of the cases

In [None]:
df_cluster2

Unnamed: 0,Cluster,Report date,Number of cases,Case no.


In [None]:
df_bubble_chart(df_cluster2, "Report date", "Case no.", "Number of cases", "Cluster", "Cluster of the infected cases").show()

# Summary

In [None]:
from plotly.subplots import make_subplots

# Create figure with secondary y-axis
fig = make_subplots(specs=[[{"secondary_y": True}]])

# fig.add_trace(go_df_bubble_chart(df_cluster2,"Report date","Case no.","Number of cases","Cluster", lineName="Big Cluster"))
fig.add_trace(go_df_count_plot(df_covid,"Report date", lineName="Total Infected Case",mode="cumsum"))
fig.add_trace(go_df_count_plot(df_covid,"Report date", lineName="Daily Infected Case",mode="count"), secondary_y=True)

# Add figure title
fig.update_layout(
    title_text="Summary of Hong Kong COVID19 Cases"
)

# Set x-axis title
fig.update_xaxes(title_text="Date")

# Set y-axes titles
fig.update_yaxes(title_text="<b>Total</b> Cases", secondary_y=False)
fig.update_yaxes(title_text="<b>Daily</b> Cases", secondary_y=True)

fig.show()

In [None]:
from plotly.subplots import make_subplots

# Create figure with secondary y-axis
fig = make_subplots(specs=[[{"secondary_y": True}]])

fig.add_trace(go_df_bubble_chart(df_cluster2,"Report date","Case no.","Number of cases","Cluster", lineName="Big Cluster"))
fig.add_trace(go_df_count_plot(df_covid,"Report date", lineName="Total Infected Case",mode="cumsum"))
fig.add_trace(go_df_count_plot(df_covid,"Report date", lineName="Daily Infected Case",mode="count"), secondary_y=True)

# Add figure title
fig.update_layout(
    title_text="Summary of Hong Kong COVID19 Cases"
)

# Set x-axis title
fig.update_xaxes(title_text="Date")

# Set y-axes titles
fig.update_yaxes(title_text="<b>Total</b> Cases", secondary_y=False)
fig.update_yaxes(title_text="<b>Daily</b> Cases", secondary_y=True)

fig.show()


# Basic Machine Learning

In [None]:
# TODO

<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=d780ad21-48be-4756-8c0d-30311a37d59d' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>