In [4]:
import numpy as np
import pandas as pd
from scipy.stats import chi2_contingency
from scipy.stats import f_oneway
from scipy.stats import ttest_ind

dataset = pd.read_csv('out.csv', keep_default_na=False)

In [16]:
# Year of admission
table = np.array([[61, 8, 61], [382, 67, 171], [838, 166, 315]])
print(table)
stat, pvalue, dof, expected_freq = chi2_contingency(table)
print(pvalue)

[[ 61   8  61]
 [382  67 171]
 [838 166 315]]
6.692994327832307e-07


In [17]:
# Sex
table = np.array([[787, 161, 348], [494, 80, 199]])
print(table)
stat, pvalue, dof, expected_freq = chi2_contingency(table)
print(pvalue)

[[787 161 348]
 [494  80 199]]
0.2461350496254834


In [20]:
# Age in months
a = [[], [],[]]
place = ['community','Another hospital','NHP']
for i in range(len(dataset)):
    x = dataset.at[i,'admission_date']
    y = pd.to_datetime(x)
    z = dataset.at[i,'dob']
    t = pd.to_datetime(z)
    decimal_month_at_admission = (y-t).days / 30
    place_exposure = dataset.at[i,'place_of_exposure']
    if place_exposure == '': continue
    id =  place.index(place_exposure)
    a[id].append(decimal_month_at_admission)

f_oneway(a[0], a[1], a[2])

F_onewayResult(statistic=19.594341813242558, pvalue=3.715240652193271e-09)

In [19]:
# Vaccination status 
table = np.array([[1074,222,509],[142,11,19],[24,2,3],[2,0,1],[39,6,15]])
print(table)
stat, pvalue, dof, expected_freq = chi2_contingency(table)
print(pvalue)

[[1074  222  509]
 [ 142   11   19]
 [  24    2    3]
 [   2    0    1]
 [  39    6   15]]
1.3960795570253065e-06


In [20]:
# Region of residence
table = np.array([[503,67,144], [117,29,61], [63,9,28], [424,81,214], [169,54,98],[5,1,2]])
print(table)
stat, pvalue, dof, expected_freq = chi2_contingency(table)
print(pvalue)

[[503  67 144]
 [117  29  61]
 [ 63   9  28]
 [424  81 214]
 [169  54  98]
 [  5   1   2]]
3.992796106388303e-06


In [21]:
# Clinical outcome 
table = np.array([[1272,232,535],[9,9,12]])
print(table)
stat, pvalue, dof, expected_freq = chi2_contingency(table)
print(pvalue)

[[1272  232  535]
 [   9    9   12]]
0.00034788832737828163


In [22]:
# Underlying conditions 
table = np.array([[16,3,23], [37,7,28],[42,2,50],[11,2,33],[4,1,7],[22,5,46],[9,1,4],[1092,206,285],[55,14,90]])
print(table)
stat, pvalue, dof, expected_freq = chi2_contingency(table)
print(pvalue)

[[  16    3   23]
 [  37    7   28]
 [  42    2   50]
 [  11    2   33]
 [   4    1    7]
 [  22    5   46]
 [   9    1    4]
 [1092  206  285]
 [  55   14   90]]
3.830949106314939e-53


In [23]:
# Maximal form of respiratory support used 
table = np.array([[[335,93,256],[2,1,5],[46,32,66],[1,5,5],[1,0,0]]])
print(table)
stat, pvalue, dof, expected_freq = chi2_contingency(table)
print(pvalue)

[[[335  93 256]
  [  2   1   5]
  [ 46  32  66]
  [  1   5   5]
  [  1   0   0]]]
0.00039757778334743095


In [24]:
# Complications 
table = np.array([[[34,4,12],[19,3,8],[2,0,0],[5,0,2],[823,196,393],[2,1,7],[4,0,0]]])
print(table)
stat, pvalue, dof, expected_freq = chi2_contingency(table)
print(pvalue)

[[[ 34   4  12]
  [ 19   3   8]
  [  2   0   0]
  [  5   0   2]
  [823 196 393]
  [  2   1   7]
  [  4   0   0]]]
0.14563525626399523


In [25]:
# Co-infections 
table = np.array([[[10,1,5],[2,0,2],[1,0,2],[0,1,1]]])
print(table)
stat, pvalue, dof, expected_freq = chi2_contingency(table)
print(pvalue)

[[[10  1  5]
  [ 2  0  2]
  [ 1  0  2]
  [ 0  1  1]]]
0.2685603500299233


In [26]:
# Healthcare-associated infection
table = np.array([[[9,1,20],[37,19,38],[5,3,9],[1,4,13]]])
print(table)
stat, pvalue, dof, expected_freq = chi2_contingency(table)
print(pvalue)

[[[ 9  1 20]
  [37 19 38]
  [ 5  3  9]
  [ 1  4 13]]]
0.022038927613224197


In [27]:
# Clinical classification
table = np.array([[[385,131,332],[896,110,215]]])
print(table)
stat, pvalue, dof, expected_freq = chi2_contingency(table)
print(pvalue)

[[[385 131 332]
  [896 110 215]]]
2.0627038367635563e-37


In [46]:
# Diagnosis upon admission 
table = np.array([[612,85,150],[374,109,178],[440,100,209],[185,35,169]])
print(table)
stat, pvalue, dof, expected_freq = chi2_contingency(table)
print(pvalue)

[[612  85 150]
 [374 109 178]
 [440 100 209]
 [185  35 169]]
1.2832922667767798e-22


In [41]:
# Duration between onset and admission 
table = np.array([[261,52,296], [652,107,156], [303,64,85], [29,10,8], [36,8,2]])
f_oneway(table[0], table[1], table[2], table[3], table[4])

F_onewayResult(statistic=1.8494929739545392, pvalue=0.19611462986765033)

In [43]:
# Distance from the hospital
table = np.array([[347,45,76],[764,143,375],[164,51,90],[6,2,6]])
f_oneway(table[0], table[1], table[2], table[3])

F_onewayResult(statistic=3.0424810076570608, pvalue=0.09253932687496695)

In [44]:
# Duration of stay within the hospital 
table = np.array([[817,78,146],[404,125,209],[60,38,192]])
f_oneway(table[0], table[1], table[2])

F_onewayResult(statistic=0.7348695702906481, pvalue=0.518247772858704)

In [45]:
# Duration between onset and test
table = np.array([[14,3,5],[315,45,193],[264,47,127],[688,146,222]])
f_oneway(table[0], table[1], table[2], table[3])

F_onewayResult(statistic=2.0722970755681738, pvalue=0.18236102913006402)