In [3]:
import numpy as np
import pandas as pd
asyncs = ["no asynchrony","unassisted breath","early trigger","late trigger",
          "failed trigger","false trigger","multiple trigger","early cycling",
          "late cycling", "ambiguous", "expiratory work", "artefact", "work shifting"]
dev = pd.read_csv("../data/train/combined.csv").rename({"splinting" : "ambiguous"}, axis=1)
test = pd.read_csv("../data/test/combined.csv").rename({"splinting" : "ambiguous"}, axis=1)

In [4]:
# Supplementary Table 2
dev_label_counts = dev.groupby("Sample")[asyncs].sum()
dev_label_counts["Data Partition"] = "Development"
test_label_counts = test.groupby("Sample")[asyncs].sum()
test_label_counts["Data Partition"] = "Test"
label_counts = pd.concat((dev_label_counts, test_label_counts))
label_counts["Mode"] = "AC"
label_counts.loc[["DG204","DG212","DG220"], "Mode"] = "SIMV"
label_counts.loc["DG222", "Mode"] = "PSV"
label_counts

Unnamed: 0_level_0,no asynchrony,unassisted breath,early trigger,late trigger,failed trigger,false trigger,multiple trigger,early cycling,late cycling,ambiguous,expiratory work,artefact,work shifting,Data Partition,Mode
Sample,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
DG201,353,13,13,20,36,3,0,3,12,3,66,6,0,Development,AC
DG202,239,30,0,8,4,0,6,5,38,3,200,9,0,Development,AC
DG203,154,19,9,32,31,1,3,13,51,5,253,23,0,Development,AC
DG204,18,349,1,1,27,0,0,12,1,9,125,1,17,Development,SIMV
DG205,118,1,19,10,27,0,3,1,286,17,123,11,0,Development,AC
DG206,372,0,6,8,6,2,0,2,44,1,76,5,0,Development,AC
DG207,311,21,5,23,4,0,3,3,53,2,107,10,0,Development,AC
DG210,181,63,9,20,23,0,9,7,40,12,181,30,0,Development,AC
DG212,108,131,2,22,40,0,2,8,7,4,201,36,2,Development,SIMV
DG213,134,152,4,26,34,2,24,12,38,10,137,21,14,Development,AC


In [5]:
# Supplementary Table 3
vb_counts = 500 - label_counts["unassisted breath"]
async_counts = vb_counts - label_counts[["no asynchrony", "artefact"]].sum(axis=1)
ai = pd.DataFrame({"Asynchrony Index" : async_counts / vb_counts, 
                   "Number of Breaths" : vb_counts,
                   "Mode" : label_counts["Mode"]}).reset_index()
ai.sort_values(["Mode", "Sample"])

Unnamed: 0,Sample,Asynchrony Index,Number of Breaths,Mode
0,DG201,0.262834,487,AC
1,DG202,0.47234,470,AC
2,DG203,0.632017,481,AC
4,DG205,0.741483,499,AC
5,DG206,0.246,500,AC
6,DG207,0.329854,479,AC
7,DG210,0.517162,437,AC
9,DG213,0.554598,348,AC
10,DG214,0.551198,459,AC
11,DG215,0.672165,485,AC


In [6]:
ai.mean()

  ai.mean()


Asynchrony Index       0.525326
Number of Breaths    438.652174
dtype: float64

In [7]:
ai.groupby("Mode").mean()

Unnamed: 0_level_0,Asynchrony Index,Number of Breaths
Mode,Unnamed: 1_level_1,Unnamed: 2_level_1
AC,0.502015,467.0
PSV,0.419878,493.0
SIMV,0.708107,241.0


In [8]:
# Table 3 Overall
pd.DataFrame({"Percentage" : (label_counts[asyncs].sum(axis=0) / 11500),
              "Count" : label_counts[asyncs].sum(axis=0)}).sort_values("Percentage", ascending=False)

Unnamed: 0,Percentage,Count
no asynchrony,0.402435,4628
expiratory work,0.311826,3586
unassisted breath,0.122696,1411
late cycling,0.099304,1142
late trigger,0.059391,683
failed trigger,0.042696,491
artefact,0.031913,367
ambiguous,0.016087,185
multiple trigger,0.01313,151
early trigger,0.012261,141


In [9]:
# Table 3 Development
pd.DataFrame({"Percentage" : (label_counts.loc[label_counts["Data Partition"] == "Development"][asyncs].sum(axis=0) / 9000),
              "Count" : label_counts.loc[label_counts["Data Partition"] == "Development"][asyncs].sum(axis=0)}).sort_values("Percentage", ascending=False)

Unnamed: 0,Percentage,Count
no asynchrony,0.384556,3461
expiratory work,0.296222,2666
unassisted breath,0.153111,1378
late cycling,0.106222,956
late trigger,0.062556,563
failed trigger,0.040333,363
artefact,0.032111,289
ambiguous,0.015444,139
early cycling,0.010667,96
early trigger,0.009778,88


In [10]:
# Table 3 Test
pd.DataFrame({"Percentage" : (label_counts.loc[label_counts["Data Partition"] == "Test"][asyncs].sum(axis=0) / 2500),
              "Count" : label_counts.loc[label_counts["Data Partition"] == "Test"][asyncs].sum(axis=0)}).sort_values("Percentage", ascending=False)

Unnamed: 0,Percentage,Count
no asynchrony,0.4668,1167
expiratory work,0.368,920
late cycling,0.0744,186
failed trigger,0.0512,128
late trigger,0.048,120
artefact,0.0312,78
multiple trigger,0.0296,74
early trigger,0.0212,53
ambiguous,0.0184,46
unassisted breath,0.0132,33


In [11]:
combined = pd.concat((dev, test))
asyncs = np.array(["early trigger","late trigger", "failed trigger",
          "multiple trigger","early cycling", "late cycling", "expiratory work"])
asyncs_df = combined.loc[(combined["unassisted breath"] == 0) & 
             (combined["no asynchrony"] == 0)]

In [12]:
asyncs_df[asyncs].sum(axis=1).value_counts()

1    4222
2     851
0     271
3     105
4      12
6       1
dtype: int64

In [13]:
# Table 4
from scipy.stats import fisher_exact
pvi_pairs = []
for i in range(len(asyncs)-1):
    for j in range(i+1, len(asyncs)):
        a = asyncs[i]
        b = asyncs[j]
        mat = np.zeros((2,2))
        mat[0,0] = sum((combined[a] == 0) & (combined[b] == 0))
        mat[0,1] = sum((combined[a] == 0) & (combined[b] == 1))
        mat[1,0] = sum((combined[a] == 1) & (combined[b] == 0))
        mat[1,1] = sum((combined[a] == 1) & (combined[b] == 1))
        fish = fisher_exact(mat)
        pvi_pairs.append({"PVI Pair" : "{}-{}".format(a,b), "Odd's Ratio" : fish[0], "p-value" : fish[1]})
pd.DataFrame(pvi_pairs).sort_values("p-value")

Unnamed: 0,PVI Pair,Odd's Ratio,p-value
16,multiple trigger-late cycling,9.000024,6.304235e-34
14,failed trigger-expiratory work,2.622457,6.735500000000001e-25
1,early trigger-failed trigger,6.971678,3.654605e-15
9,late trigger-late cycling,2.229657,4.6364e-13
15,multiple trigger-early cycling,13.606652,1.497556e-12
20,late cycling-expiratory work,0.65755,3.576275e-09
19,early cycling-expiratory work,2.148755,6.266874e-05
17,multiple trigger-expiratory work,1.823695,0.0003683944
0,early trigger-late trigger,0.0,0.0004118949
13,failed trigger-late cycling,0.537165,0.0008656578
