### COVID-19 - HOSPITALISATIONS BY DATE AND PROVINCE

###### IMPORTING LIBRARIES

In [327]:
import pandas as pd
import plotly
import plotly.express as px

###### READING DATA

In [328]:
data = pd.read_csv('./data/COVID19BE_HOSP.csv')

###### EXPLORING DATA

In [329]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3520 entries, 0 to 3519
Data columns (total 10 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   DATE           3520 non-null   object
 1   PROVINCE       3520 non-null   object
 2   REGION         3520 non-null   object
 3   NR_REPORTING   3520 non-null   int64 
 4   TOTAL_IN       3520 non-null   int64 
 5   TOTAL_IN_ICU   3520 non-null   int64 
 6   TOTAL_IN_RESP  3520 non-null   int64 
 7   TOTAL_IN_ECMO  3520 non-null   int64 
 8   NEW_IN         3520 non-null   int64 
 9   NEW_OUT        3520 non-null   int64 
dtypes: int64(7), object(3)
memory usage: 275.1+ KB


In [330]:
data.head()

Unnamed: 0,DATE,PROVINCE,REGION,NR_REPORTING,TOTAL_IN,TOTAL_IN_ICU,TOTAL_IN_RESP,TOTAL_IN_ECMO,NEW_IN,NEW_OUT
0,2020-03-15,Antwerpen,Flanders,14,50,9,4,0,8,8
1,2020-03-15,Brussels,Brussels,14,58,11,8,0,7,2
2,2020-03-15,Hainaut,Wallonia,15,56,13,11,1,26,1
3,2020-03-15,Limburg,Flanders,7,20,6,3,0,9,3
4,2020-03-15,Liège,Wallonia,12,22,2,1,0,4,1


In [331]:
data.tail()

Unnamed: 0,DATE,PROVINCE,REGION,NR_REPORTING,TOTAL_IN,TOTAL_IN_ICU,TOTAL_IN_RESP,TOTAL_IN_ECMO,NEW_IN,NEW_OUT
3515,2021-01-28,Namur,Wallonia,6,26,8,3,0,5,1
3516,2021-01-28,OostVlaanderen,Flanders,14,290,53,21,2,32,20
3517,2021-01-28,VlaamsBrabant,Flanders,6,92,8,3,0,8,4
3518,2021-01-28,BrabantWallon,Wallonia,2,26,2,1,0,2,2
3519,2021-01-28,WestVlaanderen,Flanders,11,265,45,22,0,18,26


In [332]:
nan_counts = [(i, data[i].isna().sum()) for i in data.columns]
nan_counts

[('DATE', 0),
 ('PROVINCE', 0),
 ('REGION', 0),
 ('NR_REPORTING', 0),
 ('TOTAL_IN', 0),
 ('TOTAL_IN_ICU', 0),
 ('TOTAL_IN_RESP', 0),
 ('TOTAL_IN_ECMO', 0),
 ('NEW_IN', 0),
 ('NEW_OUT', 0)]

In [333]:
cols = ['PROVINCE', 'REGION']
unique_values = [(i, data[i].unique()) for i in cols]
unique_values

[('PROVINCE',
  array(['Antwerpen', 'Brussels', 'Hainaut', 'Limburg', 'Liège',
         'Luxembourg', 'Namur', 'OostVlaanderen', 'VlaamsBrabant',
         'BrabantWallon', 'WestVlaanderen'], dtype=object)),
 ('REGION', array(['Flanders', 'Brussels', 'Wallonia'], dtype=object))]

###### CLEANING DATA

In [334]:
data.dropna(inplace=True)

In [335]:
data.reset_index(drop=True, inplace=True)

In [336]:
nan_counts = [(i, data[i].isna().sum()) for i in data.columns]
nan_counts

[('DATE', 0),
 ('PROVINCE', 0),
 ('REGION', 0),
 ('NR_REPORTING', 0),
 ('TOTAL_IN', 0),
 ('TOTAL_IN_ICU', 0),
 ('TOTAL_IN_RESP', 0),
 ('TOTAL_IN_ECMO', 0),
 ('NEW_IN', 0),
 ('NEW_OUT', 0)]

In [337]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3520 entries, 0 to 3519
Data columns (total 10 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   DATE           3520 non-null   object
 1   PROVINCE       3520 non-null   object
 2   REGION         3520 non-null   object
 3   NR_REPORTING   3520 non-null   int64 
 4   TOTAL_IN       3520 non-null   int64 
 5   TOTAL_IN_ICU   3520 non-null   int64 
 6   TOTAL_IN_RESP  3520 non-null   int64 
 7   TOTAL_IN_ECMO  3520 non-null   int64 
 8   NEW_IN         3520 non-null   int64 
 9   NEW_OUT        3520 non-null   int64 
dtypes: int64(7), object(3)
memory usage: 275.1+ KB


In [338]:
data['DATE'] = pd.to_datetime(data['DATE'])

In [339]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3520 entries, 0 to 3519
Data columns (total 10 columns):
 #   Column         Non-Null Count  Dtype         
---  ------         --------------  -----         
 0   DATE           3520 non-null   datetime64[ns]
 1   PROVINCE       3520 non-null   object        
 2   REGION         3520 non-null   object        
 3   NR_REPORTING   3520 non-null   int64         
 4   TOTAL_IN       3520 non-null   int64         
 5   TOTAL_IN_ICU   3520 non-null   int64         
 6   TOTAL_IN_RESP  3520 non-null   int64         
 7   TOTAL_IN_ECMO  3520 non-null   int64         
 8   NEW_IN         3520 non-null   int64         
 9   NEW_OUT        3520 non-null   int64         
dtypes: datetime64[ns](1), int64(7), object(2)
memory usage: 275.1+ KB


###### DATA ANALYSIS

In [340]:
# total number of hospitalized patients per date
# total number of hospitalized patients in the ICU per date

In [341]:
data_by_date = data.resample('D', on='DATE')[['TOTAL_IN', 'TOTAL_IN_ICU', 'NEW_IN', 'NEW_OUT']].sum().reset_index()
data_by_date['PCT_IN_ICU'] = (data_by_date['TOTAL_IN_ICU'] / data_by_date['TOTAL_IN']) * 100
data_by_date['NEW_DIFF'] = data['NEW_IN'] - data['NEW_OUT']

In [359]:
fig_total_per_date = px.line(
    data_frame = data_by_date,
    x='DATE',
    y=['TOTAL_IN', 'TOTAL_IN_ICU'],
    title='Total Number of Hospitalized Patients and Patients in the ICU',
    labels={'DATE' : 'Date'}
)

fig_total_per_date.update_yaxes({
    'title' : {'text' : 'Number of Patients'},
    'rangemode' : 'nonnegative'
})

fig_total_per_date.show()

In [343]:
# % of total number of all hospitalized patients who are in the ICU per date

In [360]:
fig_pct_in_icu_per_date = px.line(
    data_frame=data_by_date,
    x='DATE',
    y='PCT_IN_ICU',
    title='Percentage of all Hospitalized patients in the ICU',
    labels={'DATE' : 'Date', 'PCT_IN_ICU' : 'Pct in ICU'}
)

fig_pct_in_icu_per_date.show()

In [345]:
# total number of intakes per date
# total number of discharges per date

In [346]:
fig_int_dis_per_date = px.line(
    data_frame=data_by_date,
    x='DATE',
    y=['NEW_IN', 'NEW_OUT'],
    title='Number of Intakes and Discharges per 24 Hours',
    labels={'DATE' : 'Date'}
)

fig_int_dis_per_date.update_yaxes({
    'title' : {'text' : 'Number of Patients'},
    'rangemode' : 'nonnegative',
})

fig_int_dis_per_date

In [347]:
# difference between total number of intakes and discharges per date

In [361]:
fig_int_dis_diff_per_date = px.line(
    data_frame=data_by_date,
    x='DATE',
    y='NEW_DIFF',
    title='Difference Between Intakes and Discharges per 24 Hours',
    labels={'DATE' : 'Date', 'NEW_DIFF' : 'Number of Patients'}
)

fig_int_dis_diff_per_date.show()

In [349]:
# total number of hospitalized patients per date per province
# total number of hospitalized patients in the ICU per date per province

In [350]:
fig_total_per_province = px.line(
    data_frame=data,
    x='DATE',
    y=['TOTAL_IN', 'TOTAL_IN_ICU'],
    color='PROVINCE',
    title='Total Number of Hospitalized Patients and Patients in the ICU per Province',
    labels={'DATE' : 'Date', 'PROVINCE' : 'Province'}
)

fig_total_per_province.update_yaxes({
    'title' : {'text' : 'Number of Patients'},
    'rangemode' : 'nonnegative'
})

fig_total_per_province.show()

In [351]:
# % of total number of all hospitalized patients who are in the ICU per date per province

In [352]:
data['PCT_IN_ICU'] = (data['TOTAL_IN_ICU'] / data['TOTAL_IN']) * 100

In [353]:
fig_pct_in_icu_per_date_per_province = px.line(
    data_frame=data,
    x='DATE',
    y='PCT_IN_ICU',
    color='PROVINCE',
    title='Percentage of all Hospitalized patients in the ICU',
    labels={'DATE' : 'Date', 'PCT_IN_ICU' : 'Pct in ICU'}
)

fig_pct_in_icu_per_date_per_province.show()

In [354]:
# total number of intakes per date per province
# total number of discharges per date per province

In [365]:
data['NEW_DIFF'] = data['NEW_IN'] - data['NEW_OUT']

In [356]:
fig_int_dis_per_date_per_province = px.line(
    data_frame=data,
    x='DATE',
    y=['NEW_IN', 'NEW_OUT'],
    color='PROVINCE',
    title='Number of Intakes and Discharges per 24 Hours per Province',
    labels={'DATE' : 'Date'}
)

fig_int_dis_per_date_per_province.update_yaxes({
    'title' : {'text' : 'Number of Patients'},
    'rangemode' : 'nonnegative',
})

fig_int_dis_per_date_per_province.show()

In [363]:
# difference between total number of intakes and discharges per date per province

In [366]:
fig_int_dis_diff_per_date_per_province = px.line(
    data_frame=data,
    x='DATE',
    y='NEW_DIFF',
    color='PROVINCE',
    title='Difference Between Intakes and Discharges per Day per 24 Hours per Province',
    labels={'DATE' : 'Date', 'NEW_DIFF' : 'Number of Patients'}
)

fig_int_dis_diff_per_date_per_province.show()