In [2]:
import pandas as pd
import numpy as np
import datetime
import string
import os
from matplotlib import pyplot as plt
from pathlib import Path

In [4]:
DATA = os.path.join(Path.cwd().parents[0], 'Data/out.csv')
dataset = pd.read_csv(DATA, keep_default_na=False)

In [15]:
# total = [number of severe cases, number of non severe cases]
total = np.flip(dataset['Clinical classification'].value_counts().to_numpy(),0)
print(total)
print(total / dataset.shape[0] * 100)

[ 849 1223]
[40.97490347 59.02509653]


In [16]:
table = np.zeros((3, 2))

for i, year in enumerate(['2017', '2018', '2019']):
    for j in range(2):
        table[i][j] = len(dataset[ (dataset['Clinical classification'] == 1 - j) & (dataset['admission_date'].str.contains(year)) ])

print(table)
print(np.round(100 * table/total, 1))
print(np.sum(table, axis=1))

[[ 23. 109.]
 [246. 375.]
 [580. 739.]]
[[ 2.7  8.9]
 [29.  30.7]
 [68.3 60.4]]
[ 132.  621. 1319.]


In [18]:
table = np.zeros((2,2))

for i in range(2):
    for j in range(2):
        table[i][j] = len(dataset[ (dataset['Clinical classification'] == 1 - j) & (dataset['Female'] == i) ])

print(table)
print(np.round(100 * table/total, 1))
print(np.sum(table, axis=1))

[[542. 755.]
 [307. 468.]]
[[63.8 61.7]
 [36.2 38.3]]
[1297.  775.]


In [20]:
age_group = ['0 - <3','3 - <6','6 - <9','9 - <12','12 - <24','24 - <36','36 - <48','48 - <60','>= 60']
table = np.zeros((9,2))

for i in range(9):
    for j in range(2):
        table[i][j] = len(dataset[ (dataset['Clinical classification'] == 1 - j) & (dataset['Age group (in months)'] == age_group[i]) ])

print(table)
print(np.round(100 * table/total, 1))
print(np.sum(table, axis=1))

[[ 64.  23.]
 [166. 126.]
 [220. 303.]
 [177. 279.]
 [128. 187.]
 [ 28.  78.]
 [ 21.  51.]
 [  9.  26.]
 [ 36. 150.]]
[[ 7.5  1.9]
 [19.6 10.3]
 [25.9 24.8]
 [20.8 22.8]
 [15.1 15.3]
 [ 3.3  6.4]
 [ 2.5  4.2]
 [ 1.1  2.1]
 [ 4.2 12.3]]
[ 87. 292. 523. 456. 315. 106.  72.  35. 186.]


In [21]:
vac = ['0','1','2','3','NA']
table = np.zeros((5,2))

for i in range(5):
    for j in range(2):
        table[i][j] = len(dataset[ (dataset['Clinical classification'] == 1 - j) & (dataset['Vaccination'] == vac[i])])

np.set_printoptions(suppress=True)
print(table)
print(np.round(100 * table/total,1))
print(np.sum(table, axis=1))

[[ 787. 1021.]
 [  29.  143.]
 [   5.   24.]
 [   2.    1.]
 [  26.   34.]]
[[92.7 83.5]
 [ 3.4 11.7]
 [ 0.6  2. ]
 [ 0.2  0.1]
 [ 3.1  2.8]]
[1808.  172.   29.    3.   60.]


In [30]:
place = ['community','Another hospital','NHP','']
table = np.zeros((4,2))

for i in range(4):
    for j in range(2):
        table[i][j] = len(dataset[ (dataset['Clinical classification'] == 1 - j) & (dataset['place_of_exposure'] == place[i])])

np.set_printoptions(suppress=True)
print(table)
print(np.round(100 * table/total,1))
print(np.sum(table, axis=1))

[[385. 896.]
 [131. 110.]
 [332. 215.]
 [  1.   2.]]
[[45.3 73.3]
 [15.4  9. ]
 [39.1 17.6]
 [ 0.1  0.2]]
[1281.  241.  547.    3.]


In [31]:
table = np.zeros((4,2))
distance = ['0 - <20','20 - <200','200 - <500','>= 500']

for i in range(4):
    for j in range(2):
        table[i][j] = len(dataset[ (dataset['Clinical classification'] == 1 - j) & 
                                    (dataset['Distance from the hospital (km)'] == distance[i])])

print(table)
print(np.round(100 * table/total,1))
print(np.sum(table, axis=1))

[[125. 343.]
 [567. 718.]
 [153. 152.]
 [  4.  10.]]
[[14.7 28. ]
 [66.8 58.7]
 [18.  12.4]
 [ 0.5  0.8]]
[ 468. 1285.  305.   14.]


In [32]:
table = np.zeros((6,2))
part_of_vietnam = ['Ha Noi','Northeastern','Northwestern','Red River Delta (except Hanoi)','Central','Southern']

for i in range(6):
    for j in range(2):
        table[i][j] = len(dataset[ (dataset['Clinical classification'] == 1 - j) & (dataset['region_of_address'] == part_of_vietnam[i]) ])

print(table)
print(np.round(100 * table/total,1))
print(np.sum(table, axis=1))

[[239. 475.]
 [ 98. 109.]
 [ 49.  51.]
 [306. 416.]
 [155. 166.]
 [  2.   6.]]
[[28.2 38.8]
 [11.5  8.9]
 [ 5.8  4.2]
 [36.  34. ]
 [18.3 13.6]
 [ 0.2  0.5]]
[714. 207. 100. 722. 321.   8.]


In [33]:
table = np.zeros((2,2))

for i in range(2):
    for j in range(2):
        table[i][j] = len(dataset[ (dataset['Clinical classification'] == 1 - j) & (dataset['outcome_died'] == i)])

print(table)
print(np.round(100 * table/total,1))
print(np.sum(table, axis=1))

[[ 819. 1223.]
 [  30.    0.]]
[[ 96.5 100. ]
 [  3.5   0. ]]
[2042.   30.]


In [36]:
table = np.zeros((6,2))
duration = ['<0','0 - <3','3 - <7','7 - <14', '>= 14', 'Unknown']

for i in range(6):
    for j in range(2):
        table[i][j] = len(dataset[ (dataset['Clinical classification'] == 1 - j) & 
                            (dataset['Duration between onset and admission'] == duration[i])])

print(table)
print(np.round(100 * table/total,1))
print(np.sum(table, axis=1))

[[339. 272.]
 [318. 596.]
 [156. 296.]
 [ 22.  25.]
 [ 13.  33.]
 [  1.   1.]]
[[39.9 22.2]
 [37.5 48.7]
 [18.4 24.2]
 [ 2.6  2. ]
 [ 1.5  2.7]
 [ 0.1  0.1]]
[611. 914. 452.  47.  46.   2.]


In [37]:
table = np.zeros((3,2))
duration = ['0 - <7','7 - <21', '>= 21']

for i in range(3):
    for j in range(2):
        table[i][j] = len(dataset[ (dataset['Clinical classification'] == 1 - j) & 
                                    (dataset['Duration of stay within the hospital'] == duration[i])])

print(table)
print(np.round(100 * table/total,1))
print(np.sum(table, axis=1))

[[226. 816.]
 [391. 348.]
 [232.  59.]]
[[26.6 66.7]
 [46.1 28.5]
 [27.3  4.8]]
[1042.  739.  291.]


In [38]:
table = np.zeros((9,2))

underlying_condition = ['Underlying conditions - Respiratory system','Underlying conditions - Cardiovascular system',
    'Underlying condition - Gastrointestinal system','Underlying condition - Kidney and urology system',
    'Underlying condition - Immunodeficiency','Underlying condition - Neurological system','Underlying condition - Inherited metabolic disorders',
    'Underlying condition - No underlying diseases','Underlying condition - Other underlying conditions']

for i in range(9):
    for j in range(2):
        table[i][j] = len(dataset[ (dataset['Clinical classification'] == 1 - j) &
                                     (dataset[underlying_condition[i]] == 1)])

print(table)
print(np.round(100 * table/total,1))
print(np.sum(table, axis=1))

[[  23.   19.]
 [  45.   27.]
 [  73.   21.]
 [  20.   26.]
 [   6.    6.]
 [  44.   29.]
 [   7.    8.]
 [ 554. 1031.]
 [  90.   69.]]
[[ 2.7  1.6]
 [ 5.3  2.2]
 [ 8.6  1.7]
 [ 2.4  2.1]
 [ 0.7  0.5]
 [ 5.2  2.4]
 [ 0.8  0.7]
 [65.3 84.3]
 [10.6  5.6]]
[  42.   72.   94.   46.   12.   73.   15. 1585.  159.]


In [39]:
form = ['oxygen_cannula','CPAP','conventional_mechanical_ventilation','hfo_ventilation','ECMO']
table = np.zeros((5,2))

for i in range(5):
    for j in range(2):
        table[i][j] = len(dataset[ (dataset['Clinical classification'] == 1 - j) &
                                 (dataset['highest_ventilation_mode'] == form[i])])

print(table)
print(np.round(100 * table/total,1))
print(np.sum(table, axis=1))

[[685.   0.]
 [  8.   0.]
 [144.   0.]
 [ 11.   0.]
 [  1.   0.]]
[[80.7  0. ]
 [ 0.9  0. ]
 [17.   0. ]
 [ 1.3  0. ]
 [ 0.1  0. ]]
[685.   8. 144.  11.   1.]


In [40]:
dataset['Duration between onset and test (detection time) (in hours)'].value_counts()

>= 48       1057
0 - <24      554
24 - <48     438
< 0           23
Name: Duration between onset and test (detection time) (in hours), dtype: int64

In [44]:
duration = ['< 0', '0 - <24','24 - <48','>= 48']
table = np.zeros((4,2))

for i in range(4):
    for j in range(2):
        table[i][j] = len(dataset[ (dataset['Clinical classification'] == 1 - j)
        & (dataset['Duration between onset and test (detection time) (in hours)'] == duration[i])])

print(table)
print(np.round(100 * table/total,1))
print(np.sum(table, axis=1))

[[  8.  15.]
 [240. 314.]
 [201. 237.]
 [400. 657.]]
[[ 0.9  1.2]
 [28.3 25.7]
 [23.7 19.4]
 [47.1 53.7]]
[  23.  554.  438. 1057.]


In [45]:
diagnosis = ['Measles','Pneumonia','Bronchopneumonia','Other diagnosis']
table = np.zeros((4,2))

for i in range(4):
    for j in range(2):
        table[i][j] = len(dataset[ (dataset['Clinical classification'] == 1 - j) &
                                     (dataset[diagnosis[i]] == 1)])

print(table)
print(np.round(100 * table/total,1))
print(np.sum(table, axis=1))

[[272. 576.]
 [361. 300.]
 [343. 406.]
 [177. 214.]]
[[32.  47.1]
 [42.5 24.5]
 [40.4 33.2]
 [20.8 17.5]]
[848. 661. 749. 391.]


In [46]:
prefix = 'complication-'
column_name = ['gastroentiritis','middle-ear-infec','conjunctivitis','laryngitis','pneumonia-bronchitis',
'febrile-seizures','septic-shock-sepsis']
table = np.zeros((7,2))

for i in range(7):
    for j in range(2):
        table[i][j] = len(dataset[ (dataset['Clinical classification'] == 1 - j) &
                                 (dataset[prefix+column_name[i]] == 1)])

print(table)
print(np.round(100 * table/total,1))
print(np.sum(table, axis=1))

[[ 14.  37.]
 [  4.  26.]
 [  1.   1.]
 [  0.   7.]
 [721. 692.]
 [  9.   1.]
 [  2.   2.]]
[[ 1.6  3. ]
 [ 0.5  2.1]
 [ 0.1  0.1]
 [ 0.   0.6]
 [84.9 56.6]
 [ 1.1  0.1]
 [ 0.2  0.2]]
[  51.   30.    2.    7. 1413.   10.    4.]


In [47]:
prefix = 'co-infection-'
column_name = ['influenza-a','influenza-b','streptococus-aerius','streptococus-pneumonia']
table = np.zeros((4,2))

for i in range(4):
    for j in range(2):
        table[i][j] = len(dataset[ (dataset['Clinical classification'] == 1 - j) & 
                            (dataset[prefix+column_name[i]] == 1)])

print(table)
print(np.round(100 * table/total,1))
print(np.sum(table, axis=1))

[[8. 8.]
 [3. 1.]
 [1. 2.]
 [2. 0.]]
[[0.9 0.7]
 [0.4 0.1]
 [0.1 0.2]
 [0.2 0. ]]
[16.  4.  3.  2.]


In [48]:
column_name = ['respiratory_syncytical_virus', 'adenovirus', 'pertussis', 'healthcare_associated_infection']
table = np.zeros((4,2))

for i in range(4):
    for j in range(2):
        table[i][j] = len(dataset[ (dataset['Clinical classification'] == 1 - j) & 
                                    (dataset[column_name[i]] == 1)])

print(table)
print(np.round(100 * table/total,1))
print(np.sum(table, axis=1))

[[18. 12.]
 [73. 22.]
 [12.  5.]
 [15.  3.]]
[[2.1 1. ]
 [8.6 1.8]
 [1.4 0.4]
 [1.8 0.2]]
[30. 95. 17. 18.]
