In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from collections import defaultdict
import seaborn as sns

CLINICAL_DATA = './../DATA/Clean Data/IMPALA_Clinical_Data_202308211019_Raw.csv'

### Load data

In [None]:

def read_clinical_df(path):
    """ Load clinical data into a Pandas DataFrame. """
    return pd.read_csv(path, low_memory=False)


In [None]:

clinical_df = read_clinical_df(CLINICAL_DATA)


### Analysis

#### Circulatory events

In [None]:
# Boolean

adrenaline_columns = ['record_id', 'recru_circulatory', 'dly_circulatory', 'dis_outcome']
df = clinical_df[clinical_df.columns[clinical_df.columns.str.startswith(tuple(adrenaline_columns))]]

dead, alive = np.array([0, 0, 0, 0]), np.array([0, 0, 0, 0])

for k, v in df.groupby('record_id'):
    
    outcome = v['dis_outcome'].iloc[0]

    if outcome in [1, 2]:

        temp = [False, False, False, False]

        # Recru
        if 1 in v.iloc[:, 1:5].values:
            temp[0] = True
        elif 2 in v.iloc[:, 1:5].values:
            temp[1] = True
        elif 3 in v.iloc[:, 1:5].values:
            temp[2] = True
        elif 4 in v.iloc[:, 1:5].values:
            temp[3] = True
        
        # Dly
        if 1 in v.iloc[:, 6:].values:
            temp[0] = True
        elif 2 in v.iloc[:, 6:].values:
            temp[1] = True
        elif 3 in v.iloc[:, 6:].values:
            temp[2] = True
        elif 4 in v.iloc[:, 6:].values:
            temp[3] = True
        
        
        if outcome == 1:
            alive += temp
        else:
            dead += temp

print(alive, dead)

sns.heatmap([alive, dead],
            annot=True, fmt='.4g')

plt.xticks(np.arange(0.5, 4.5, 1), ['Transfusion', 'Intravenous', 'Adrenaline', 'CPR'])
plt.yticks(np.arange(0.5, 2.5, 1), ['Alive', 'Died'])
plt.title('Patient outcome vs. circulatory event')
plt.show()


In [None]:
# Counts

adrenaline_columns = ['record_id', 'recru_circulatory', 'dly_circulatory', 'dis_outcome']
df = clinical_df[clinical_df.columns[clinical_df.columns.str.startswith(tuple(adrenaline_columns))]]

dead, alive = np.array([0, 0, 0, 0]), np.array([0, 0, 0, 0])

for k, v in df.groupby('record_id'):
    
    outcome = v['dis_outcome'].iloc[0]

    if outcome in [1, 2]:

        temp = [0, 0, 0, 0]

        # Recru
        if 1 in v.iloc[:, 1:5].values:
            temp[0] += 1
        elif 2 in v.iloc[:, 1:5].values:
            temp[1] += 1
        elif 3 in v.iloc[:, 1:5].values:
            temp[2] += 1
        elif 4 in v.iloc[:, 1:5].values:
            temp[3] += 1

        # Dly
        uniques, counts = np.unique(v.iloc[:, 6:].values, return_counts=True)
        for i in range(1, 5):
            if i in uniques:
                temp[i-1] = counts[np.where(uniques == i)][0]
        
        if outcome == 1:
            alive += temp
        else:
            dead += temp

print(alive, dead)

sns.heatmap([alive, dead],
            annot=True, fmt='.4g')

plt.xticks(np.arange(0.5, 4.5, 1), ['Transfusion', 'Intravenous', 'Adrenaline', 'CPR'])
plt.yticks(np.arange(0.5, 2.5, 1), ['Alive', 'Died'])
plt.title('Patient outcome vs. circulatory event')
plt.show()

#### Sepsis

In [None]:
# Boolean

sepsis_columns = ['record_id', 'recru_infectious', 'dly_infectious', 'dis_outcome']
df = clinical_df[clinical_df.columns[clinical_df.columns.str.startswith(tuple(sepsis_columns))]]

dead, alive = np.array([0, 0, 0]), np.array([0, 0, 0])

for k, v in df.groupby('record_id'):
    
    outcome = v['dis_outcome'].iloc[0]

    if outcome in [1, 2]:

        temp = [False, False, False]

        # Recru
        if 1 in v.iloc[:, 1:5].values:
            temp[1] = True
        elif 2 in v.iloc[:, 1:5].values:
            temp[2] = True

        # Dly
        if 1 in v.iloc[:, 6:].values:
            temp[0] = True
        elif 2 in v.iloc[:, 6:].values:
            temp[1] = True


        if True not in temp:
            temp[0] = True


        if outcome == 1:
            alive += temp
        else:
            dead += temp


print(alive, dead)

sns.heatmap([alive, dead],
            annot=True, fmt='.4g')

plt.xticks(np.arange(0.5, 3.5, 1), ['No sepsis', 'Suspected sepsis', 'Anti-malaria'])
plt.yticks(np.arange(0.5, 2.5, 1), ['Alive', 'Died'])
plt.title('Patient outcome vs. sepsis')
plt.show()


In [None]:
# Counts

sepsis_columns = ['record_id', 'recru_infectious', 'dly_infectious', 'dis_outcome']
df = clinical_df[clinical_df.columns[clinical_df.columns.str.startswith(tuple(sepsis_columns))]]

dead, alive = np.array([0, 0, 0]), np.array([0, 0, 0])

for k, v in df.groupby('record_id'):
    
    outcome = v['dis_outcome'].iloc[0]

    if outcome in [1, 2]:

        temp = [0, 0, 0]

        # Recru
        if 1 in v.iloc[:, 1:5].values:
            temp[1] += 1
        elif 2 in v.iloc[:, 1:5].values:
            temp[2] += 1

        # Dly
        uniques, counts = np.unique(v.iloc[:, 6:].values, return_counts=True)
        if 1 in uniques:
            temp[1] = counts[np.where(uniques == 1)][0]
        elif 2 in uniques:
            temp[2] = counts[np.where(uniques == 2)][0]

        if 1 not in temp:
            temp[0] = 1

        if outcome == 1:
            alive += temp
        else:
            dead += temp


print(alive, dead)

sns.heatmap([alive, dead],
            annot=True, fmt='.4g')

plt.xticks(np.arange(0.5, 3.5, 1), ['No sepsis', 'Suspected sepsis', 'Anti-malaria'])
plt.yticks(np.arange(0.5, 2.5, 1), ['Alive', 'Died'])
plt.title('Patient outcome vs. sepsis')
plt.show()
