# Analysis of `alarm_violations.csv`

For each parameter ...
* Concerning alarm violations ...
  * Create a boxplot and stripplot based on the VALUENUM
  * Clean the data if necessary
  * Create histogram for cleaned VALUENUM  
* Concerning alarm thresholds ...
  * Create a boxplot and stripplot based on the THRESHOLD_VALUE (stratified by THRESHOLD_TYPE)
  * Clean the data if necessary
  * Create histogram for cleaned THRESHOLD_VALUE (stratified by THRESHOLD_TYPE)


## Import Data

In [None]:
# Overview: Import all libraries used.
import numpy as np
import pandas as pd
#import scipy
#import matplotlib.pyplot as plt
import seaborn as sns
#import sklearn

In [None]:
import pandas as pd
alarm_violations = pd.read_csv('./alarm_violations.csv')
alarm_violations.head()

## Parameter: Non Invasive Blood Pressure systolic

* `220179` **Non Invasive Blood Pressure systolic** (NBPs), metavision, in mmHg (numeric)
* `223751` **Non-Invasive Blood Pressure Alarm - High** (NBP Alarm - High), metavision, in mmHg (numeric)
* `223752` **Non-Invasive Blood Pressure Alarm - Low** (NBP Alarm - Low), metavision, in mmHg (numeric)

In [None]:
NBPs_violations = alarm_violations[(alarm_violations["ITEMID"] == 220179)]
display(NBPs_violations)
NBPs_violations.VALUENUM.describe()

Among the VALUENUM values is at least one negative NBPs (min = -69) and a maximum heart rate of 141146.040000 mmHg.
These appear to be implausible outliers.

Let's check whether the unit (VALUEUOM) is "mmHg" for all heart rates (what is expected according to `D_ITEMS.csv`).

In [None]:
NBPs_violations.VALUEUOM.unique()

All NBPs are given in mmHg as expected.

Let's check the literature to see what NBPs can be expected based on medical knowledge.

* insert
* "The highest pressure recorded in an individual was **370**/360." (Influence of breathing technique on arterial blood pressure during heavy weight lifting, https://pubmed.ncbi.nlm.nih.gov/7741618/)
  * "Under normal circumstances (presumably a BP patient not doing heavy exercise), blood pressure approaching 300 is very dangerous. In fact, it is very rare to have any recorded history of 300+ mm Hg of blood pressure." (https://www.scienceabc.com/eyeopeners/how-high-can-a-blood-pressure-go.html)
* "That means you can find really low BP, very close to 0/0, on a 'living' person, but it’s absolutely an emergency situation which requires intensive medical care, even CPR." (https://www.quora.com/What-is-the-lowest-blood-pressure-recorded-on-a-living-person)

Decision for now: Consider systolic blood pressures below 0 and above 370 mmHg as implausible values to be removed before further analysis.

In [None]:
NBPs_violations[(NBPs_violations["VALUENUM"] < 1000)].sort_values(by="VALUENUM")

In [None]:
NBPs_violations_clean = NBPs_violations[(NBPs_violations["VALUENUM"] >= 0) & (NBPs_violations["VALUENUM"] <= 370)]
NBPs_violations_removed_too_low = NBPs_violations[(NBPs_violations["VALUENUM"] < 0)]
NBPs_violations_removed_too_high = NBPs_violations[(NBPs_violations["VALUENUM"] > 370)]

In [None]:
# Check rows that were removed because of too low VALUENUM
display(NBPs_violations_removed_too_low.sort_values(by=['VALUENUM']))
NBPs_violations_removed_too_low.VALUENUM.describe()
# To me, seems to make sense to remove them.

In [None]:
display(NBPs_violations_removed_too_low[["VALUENUM","THRESHOLD_VALUE","THRESHOLD_TYPE"]].sort_values(by=['VALUENUM']))

In [None]:
# Check rows that were removed because of too high VALUENUM
display(NBPs_violations_removed_too_high.sort_values(by=['VALUENUM']))
NBPs_violations_removed_too_high.VALUENUM.describe()
# To me, seems to make sense to remove them.

In [None]:
display(NBPs_violations_removed_too_high[["VALUENUM","THRESHOLD_VALUE","THRESHOLD_TYPE"]].sort_values(by=['VALUENUM']))

In [None]:
# Check cleaned HR_violations
display(NBPs_violations_clean.sort_values(by=['VALUENUM']))
NBPs_violations_clean.VALUENUM.describe()

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
sns.set_style("whitegrid")

fig, (fig_box, fig_hist) = plt.subplots(2, sharex=True, gridspec_kw={"height_ratios": (.15, .85)}, figsize=(10, 5))
fig.suptitle("NBPs alarm violations in cleaned data set", fontsize=18)
sns.boxplot(data=NBPs_violations_clean, x="VALUENUM", ax=fig_box)
fig_box.set(xlabel="")
sns.histplot(data=NBPs_violations_clean, x="VALUENUM", kde=True, ax=fig_hist)
fig_hist.set_xlabel("VALUENUM (Systolic blood pressure in mmHg)", fontsize=12)
fig_hist.set_ylabel("Count", fontsize=12)

plt.show()

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

sns.set_style("whitegrid")
fig, axs = plt.subplots(1, 2, figsize=(20, 5))
fig.suptitle("Distribution of NBPs alarm violations stratified by threshold type in cleaned data set", fontsize=18)

sns.histplot(ax=axs[0], data=NBPs_violations_clean, x="VALUENUM", hue="THRESHOLD_TYPE", palette=["darkblue", "darkgreen"])
axs[0].set_title("Histogram", fontsize=12)
axs[0].set_xlabel("VALUENUM (Systolic blood pressure in mmHg)", fontsize=12)
axs[0].set_ylabel("Count", fontsize=12)

sns.kdeplot(ax=axs[1], data=NBPs_violations_clean, x="VALUENUM", hue="THRESHOLD_TYPE", palette=["darkblue", "darkgreen"])
axs[1].set_title("Kernel density estimate (KDE)", fontsize=12)
axs[1].set_xlabel("VALUENUM (Systolic blood pressure in mmHg)", fontsize=12)
axs[1].set_ylabel("Density", fontsize=12)

plt.show(fig)

### NBPs - Alarm Thresholds

In [None]:
# Check NBPs Alarm - High threshold
NBPs_violations[(NBPs_violations["THRESHOLD_TYPE"] == "HIGH")].THRESHOLD_VALUE.describe()

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
sns.set_style("whitegrid")

fig, (fig_box, fig_hist) = plt.subplots(2, sharex=True, gridspec_kw={"height_ratios": (.15, .85)}, figsize=(7, 5))
fig.suptitle("NBPs alarm thresholds of type HIGH", fontsize=16)
sns.boxplot(data=NBPs_violations[(NBPs_violations["THRESHOLD_TYPE"] == "HIGH")], x="THRESHOLD_VALUE", ax=fig_box)
fig_box.set(xlabel="")
sns.histplot(data=NBPs_violations[(NBPs_violations["THRESHOLD_TYPE"] == "HIGH")], x="THRESHOLD_VALUE", kde=True, ax=fig_hist)
fig_hist.set_xlabel("THRESHOLD_VALUE (Systolic blood pressure in mmHg)", fontsize=12)
fig_hist.set_ylabel("Count", fontsize=12)

plt.show()

The minimum value of the HIGH alarm thresholds is 0, which is surprisingly low.

In [None]:
# Check Heart rate Alarm - Low threshold
NBPs_violations[(NBPs_violations["THRESHOLD_TYPE"] == "LOW")].THRESHOLD_VALUE.describe()

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
sns.set_style("whitegrid")

fig = sns.stripplot(data=NBPs_violations[(NBPs_violations["THRESHOLD_TYPE"] == "LOW")], x="THRESHOLD_VALUE")
fig.set_title("Scatterplot for NBPs alarm thresholds of type LOW", fontsize=14)
fig.set_xlabel("THRESHOLD_VALUE (Systolic blood pressure in mmHg)", fontsize=12)

plt.show(fig)

In [None]:
NBPs_threshold_check_low_01 = NBPs_violations[(NBPs_violations["THRESHOLD_TYPE"] == "LOW") & (NBPs_violations["THRESHOLD_VALUE"] > 121) & (NBPs_violations["THRESHOLD_VALUE"] <= 400)].sort_values(by=['THRESHOLD_VALUE'])
NBPs_threshold_check_low_02 = NBPs_violations[(NBPs_violations["THRESHOLD_TYPE"] == "LOW") & (NBPs_violations["THRESHOLD_VALUE"] > 400) & (NBPs_violations["THRESHOLD_VALUE"] <= 100000)].sort_values(by=['THRESHOLD_VALUE'])

import matplotlib.pyplot as plt
import seaborn as sns

sns.set_style("whitegrid")
fig, axs = plt.subplots(1, 2, figsize=(15, 5))
fig.suptitle("A closer look at the suspiciously high LOW thresholds", fontsize=16)

sns.stripplot(ax=axs[0], data=NBPs_threshold_check_low_01, x="THRESHOLD_VALUE")
axs[0].set_title("Scatterplot for NBPs alarm thresholds of type LOW", fontsize=14)
axs[0].set_xlabel("THRESHOLD_VALUE (Systolic blood pressure in mmHg)", fontsize=12)

sns.stripplot(ax=axs[1], data=NBPs_threshold_check_low_02, x="THRESHOLD_VALUE")
axs[1].set_title("Scatterplot for NBPs alarm thresholds of type LOW", fontsize=14)
axs[1].set_xlabel("THRESHOLD_VALUE (Systolic blood pressure in mmHg)", fontsize=12)

plt.show(fig)

In [None]:
NBPs_threshold_check_low_01.THRESHOLD_VALUE.describe()

In [None]:
NBPs_threshold_check_low_02.THRESHOLD_VALUE.describe()