In [52]:
import pandas as pd
import numpy as np
import datetime
import string
import os
from matplotlib import pyplot as plt
from pathlib import Path

dataset = pd.read_csv('severe_out.csv', keep_default_na=True)
N = 849

In [53]:
# Number of patients 
x = np.zeros(2)

for i in range(2):
    x[i] = len(dataset[dataset['outcome_died'] == 1 - i])
print(x)
print(np.round(x*100/N,1))

[ 30. 819.]
[ 3.5 96.5]


In [54]:
# Year of admission 
x = np.zeros((3,2))
total = np.array([30,819])

for i in range(3):
    for j in range(2):
        x[i,j] = len(dataset[(dataset['admission_date'].str.contains(str(2017+i)) == True) & (dataset['outcome_died'] == 1 - j)])

print(x)
print(np.round(x*100/total,1))

[[  2.  21.]
 [  6. 240.]
 [ 22. 558.]]
[[ 6.7  2.6]
 [20.  29.3]
 [73.3 68.1]]


In [55]:
# Sex
x = np.zeros((2,2))
total = np.array([30,819])

for i in range(2):
    for j in range(2):
        x[i,j] = len(dataset[(dataset['Female'] == i) & (dataset['outcome_died'] == 1 - j)])

print(x)
print(np.round(x*100/total,1))

[[ 18. 524.]
 [ 12. 295.]]
[[60. 64.]
 [40. 36.]]


In [56]:
age_deceased = dataset[dataset['outcome_died'] == 1]['age_at_admission']
age_discharged = dataset[dataset['outcome_died'] == 0]['age_at_admission']

print(round(age_deceased.mean(),1))
print(round(age_discharged.mean(),1))
print(round(age_deceased.std(),1))
print(round(age_discharged.std(),1))

32.6
13.1
43.1
19.0


In [57]:
# Age group (in months)

age_group = ['0 - <3','3 - <6','6 - <9','9 - <12','12 - <24','24 - <36','36 - <48','48 - <60','>= 60']
x = np.zeros((9,2))
total = np.array([30,819])

for i in range(9):
    for j in range(2):
        x[i,j] = len(dataset[(dataset['Age group (in months)'] == age_group[i]) & (dataset['outcome_died'] == 1 - j)])

print(x)
print(np.round(x*100/total,1))

[[  0.  64.]
 [  6. 160.]
 [  6. 214.]
 [  6. 171.]
 [  3. 125.]
 [  1.  27.]
 [  1.  20.]
 [  1.   8.]
 [  6.  30.]]
[[ 0.   7.8]
 [20.  19.5]
 [20.  26.1]
 [20.  20.9]
 [10.  15.3]
 [ 3.3  3.3]
 [ 3.3  2.4]
 [ 3.3  1. ]
 [20.   3.7]]


In [81]:
# Vaccination status

x = np.zeros((5,2))
total = np.array([30,819])

for i in range(5):
    for j in range(2):
        if i == 4:
            x[i,j] = len(dataset[(pd.isna(dataset['Vaccination'])) & (dataset['outcome_died'] == 1 - j)])
        else:
            x[i,j] = len(dataset[(dataset['Vaccination'] == i) & (dataset['outcome_died'] == 1 - j)])

print(x)
print(np.round(x*100/total,1))

[[ 26. 761.]
 [  2.  27.]
 [  0.   5.]
 [  1.   1.]
 [  1.  25.]]
[[86.7 92.9]
 [ 6.7  3.3]
 [ 0.   0.6]
 [ 3.3  0.1]
 [ 3.3  3.1]]


In [59]:
# Place of exposure
place = ['NHP', 'Another hospital', 'community', 'Unknown']
x = np.zeros((4,2))
total = np.array([30,819])

for i in range(4):
    for j in range(2):
        x[i,j] = len(dataset[(dataset['place_of_exposure'] == place[i]) & (dataset['outcome_died'] == 1 - j)])

print(x)
print(np.round(x*100/total,1))

[[ 12. 320.]
 [  9. 122.]
 [  9. 376.]
 [  0.   0.]]
[[40.  39.1]
 [30.  14.9]
 [30.  45.9]
 [ 0.   0. ]]


In [60]:
# Distance from the hospital (km)
distance_group = ['0 - <20','20 - <200','200 - <500','>= 500']
x = np.zeros((4,2))
total = np.array([30,819])

for i in range(4):
    for j in range(2):
        x[i,j] = len(dataset[(dataset['Distance from the hospital (km)'] == distance_group[i]) & (dataset['outcome_died'] == 1 - j)])

print(x)
print(np.round(x*100/total,1))

[[  2. 123.]
 [ 22. 545.]
 [  6. 147.]
 [  0.   4.]]
[[ 6.7 15. ]
 [73.3 66.5]
 [20.  17.9]
 [ 0.   0.5]]


In [61]:
# Region of residence
part_of_vietnam = ['Ha Noi','Northeastern','Northwestern','Red River Delta (except Hanoi)','Central','Southern']
x = np.zeros((6,2))
total = np.array([30,819])

for i in range(6):
    for j in range(2):
        x[i,j] = len(dataset[(dataset['region_of_address'] == part_of_vietnam[i]) & (dataset['outcome_died'] == 1 - j)])

print(x)
print(np.round(x*100/total,1))

[[  4. 235.]
 [  5.  93.]
 [  3.  46.]
 [ 11. 295.]
 [  7. 148.]
 [  0.   2.]]
[[13.3 28.7]
 [16.7 11.4]
 [10.   5.6]
 [36.7 36. ]
 [23.3 18.1]
 [ 0.   0.2]]


In [82]:
# By duration between onset and admission
duration_groups = {
    '<0': lambda x: x < 0,
    '0-3': lambda x: x >= 0 and x < 3,
    '3-7': lambda x: x >= 3 and x < 7,
    '7-14': lambda x: x >= 7 and x < 14,
    '>=14': lambda x: x >= 14,
    'Unknown': lambda x: pd.isna(x)
}

x = np.zeros((len(duration_groups), 2))
total = np.array([30,819])

for i, group in enumerate(duration_groups):
    for j, case in enumerate((0, 1)):
        if pd.isna(group):
            x[i, j] = len(dataset[(pd.isna(dataset['duration time onset to admission'])) & (dataset['outcome_died'] == 1 - j)])
        else:
            x[i, j] = len(dataset[(dataset['duration time onset to admission'].apply(duration_groups[group])) & (dataset['outcome_died'] == 1 - j)])

print(x)
print(np.round(x*100/total,1))

[[ 16. 323.]
 [  7. 311.]
 [  4. 152.]
 [  1.  21.]
 [  2.  11.]
 [  0.   1.]]
[[53.3 39.4]
 [23.3 38. ]
 [13.3 18.6]
 [ 3.3  2.6]
 [ 6.7  1.3]
 [ 0.   0.1]]


In [63]:
# Duration of stay within the hospital
duration_group = ['0 - <7','7 - <21','>= 21']
x = np.zeros((3,2))
total = np.array([30,819])

for i in range(3):
    for j in range(2):
        x[i,j] = len(dataset[(dataset['Duration of stay within the hospital'] == duration_group[i]) & (dataset['outcome_died'] == 1 - j)])

print(x)
print(np.round(x*100/total,1))

[[  5. 221.]
 [ 13. 378.]
 [ 12. 220.]]
[[16.7 27. ]
 [43.3 46.2]
 [40.  26.9]]


In [64]:
# Underlying conditions
x = np.zeros((9,2))
total = np.array([30,819])
underlying_condition = ['Underlying conditions - Respiratory system','Underlying conditions - Cardiovascular system',
    'Underlying condition - Gastrointestinal system','Underlying condition - Kidney and urology system',
    'Underlying condition - Immunodeficiency','Underlying condition - Neurological system','Underlying condition - Inherited metabolic disorders',
    'Underlying condition - No underlying diseases','Underlying condition - Other underlying conditions']

for i in range(9):
    for j in range(2):
        x[i,j] = len(dataset[(dataset[underlying_condition[i]] == 1) & (dataset['outcome_died'] == 1 - j)])
print(x)
print(np.round(100*x/total,1))

[[  1.  22.]
 [  1.  44.]
 [  2.  71.]
 [  0.  20.]
 [  1.   5.]
 [  2.  42.]
 [  0.   7.]
 [ 15. 539.]
 [  8.  82.]]
[[ 3.3  2.7]
 [ 3.3  5.4]
 [ 6.7  8.7]
 [ 0.   2.4]
 [ 3.3  0.6]
 [ 6.7  5.1]
 [ 0.   0.9]
 [50.  65.8]
 [26.7 10. ]]


In [65]:
# Maximal form of respiratory support used 
b = ['oxygen_cannula','CPAP','conventional_mechanical_ventilation','hfo_ventilation','ECMO']
x = np.zeros((5,2))
total = np.array([30,819])

for i in range(5):
    for j in range(2):
        x[i][j] = len(dataset[(dataset['highest_ventilation_mode'] == b[i]) & (dataset['outcome_died'] == 1 - j)])

print(x)
print(np.round(100*x/total,1))

[[  2. 683.]
 [  0.   8.]
 [ 21. 123.]
 [  7.   4.]
 [  0.   1.]]
[[ 6.7 83.4]
 [ 0.   1. ]
 [70.  15. ]
 [23.3  0.5]
 [ 0.   0.1]]


In [83]:
# Duration between onset and test (detection time) (in hours)
duration_groups = {
    '<0': lambda x: x < 0,
    '0-<24': lambda x: x >= 0 and x < 24,
    '24-<48': lambda x: x >= 24 and x < 48,
    '>=48': lambda x: x >= 48,
    'Unknown': lambda x: pd.isna(x)
}

x = np.zeros((len(duration_groups), 2))
total = np.array([30,819])

for i, group in enumerate(duration_groups):
    for j, case in enumerate((0, 1)):
        if pd.isna(group):
            x[i, j] = len(dataset[(pd.isna(dataset['onset to test'])) & (dataset['outcome_died'] == 1 - j)])
        else:
            x[i, j] = len(dataset[((dataset['onset to test'] * 24).apply(duration_groups[group])) & (dataset['outcome_died'] == 1 - j)])

print(x)
print(np.round(x*100/total,1))

[[  1.   5.]
 [ 10. 175.]
 [  4. 198.]
 [ 15. 440.]
 [  0.   1.]]
[[ 3.3  0.6]
 [33.3 21.4]
 [13.3 24.2]
 [50.  53.7]
 [ 0.   0.1]]


In [67]:
# Diagnosis upon admission
b = ['Measles','Pneumonia','Bronchopneumonia','Other diagnosis']
x = np.zeros((4,2))
total = np.array([30,819])

for i in range(4):
    for j in range(2):
        x[i][j] = len(dataset[(dataset[b[i]] == 1) & (dataset['outcome_died'] == 1 - j)])

print(x)
print(np.round(100*x/total,1))

[[  6. 266.]
 [ 15. 346.]
 [ 11. 332.]
 [ 10. 167.]]
[[20.  32.5]
 [50.  42.2]
 [36.7 40.5]
 [33.3 20.4]]


In [68]:
prefix = 'complication-'
column_name = ['gastroentiritis','middle-ear-infec','conjunctivitis','laryngitis','pneumonia-bronchitis',
'febrile-seizures','septic-shock-sepsis']
x = np.zeros((7,2))
total = np.array([30,819])

for i in range(7):
    for j in range(2):
        x[i][j] = len(dataset[(dataset[prefix+column_name[i]] == 1) & (dataset['outcome_died'] == 1 - j)])

print(x)
print(np.round(100*x/total,1))

[[  0.  14.]
 [  0.   4.]
 [  0.   1.]
 [  0.   0.]
 [ 25. 696.]
 [  0.   9.]
 [  1.   1.]]
[[ 0.   1.7]
 [ 0.   0.5]
 [ 0.   0.1]
 [ 0.   0. ]
 [83.3 85. ]
 [ 0.   1.1]
 [ 3.3  0.1]]


In [69]:
# Co-infections
prefix = 'co-infection-'
column_name = ['influenza-a','influenza-b','streptococus-aerius','streptococus-pneumonia']
x = np.zeros((4,2))
total = np.array([30,819])

for i in range(4):
    for j in range(2):
        x[i,j] = len(dataset[(dataset[prefix+column_name[i]] == 1) & (dataset['outcome_died'] == 1 - j)])

print(x)
print(np.round(100*x/total,1))

[[0. 8.]
 [0. 3.]
 [0. 1.]
 [0. 2.]]
[[0.  1. ]
 [0.  0.4]
 [0.  0.1]
 [0.  0.2]]


In [70]:
# Healthcare-associated infection

column_name = ['respiratory_syncytical_virus', 'adenovirus', 'pertussis', 'healthcare_associated_infection']
x = np.zeros((4,2))
total = np.array([30,819])

for i in range(4):
    for j in range(2):
        x[i][j] = len(dataset[((dataset[column_name[i]] == 1) & (dataset['outcome_died'] == 1 - j))])

print(x)
print(np.round(100*x/total,1))

[[ 1. 17.]
 [11. 62.]
 [ 0. 12.]
 [ 3. 12.]]
[[ 3.3  2.1]
 [36.7  7.6]
 [ 0.   1.5]
 [10.   1.5]]
