In [None]:
# Imports
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
# Demo heatmap
flights = sns.load_dataset("flights")
flights = flights.pivot("month", "year", "passengers")
ax = sns.heatmap(flights, cmap="YlGnBu")

In [None]:
flights

In [None]:
# Define dictionary mapping column name to outcome description
outcomes = {
    "mtopd": "death",
    "cnstrokp": "stroke", 
    "crenfail": "renal_failure",
    "cpvntlng": "prolong_vent",
    "deepsterninf": "sternal_inf",
    "reop": "reoperation",
    "anymorbidity": "any_morbidity",
    "llos": "long_stay",
}

In [None]:
# Load data from CSV into dataframe
fpath = "/Users/erik/Dropbox (Partners HealthCare)/sts-data/mgh-all-features-labels.csv"
df = pd.read_csv(fpath)

In [None]:
# Isolate just binary outcomes
df = df[list(outcomes.keys())]
df

In [None]:
# Count number of unique values
import numpy as np
unique_outcome_counts = {}
for outcome in outcomes:
    unique_outcome_counts[outcome] = {}
    unique_values_this_outcome = df[outcome].unique()
    for unique_value in unique_values_this_outcome:
        if np.isnan(unique_value):
            unique_outcome_counts[outcome]['nan'] = df[outcome].isna().sum()
        else:
            unique_outcome_counts[outcome][unique_value] = sum(df[outcome] == unique_value)

In [None]:
unique_outcome_counts

In [None]:
df_unique = pd.DataFrame(unique_outcome_counts).transpose()
df_unique['total'] = df_unique.sum(axis=1)
df_unique.to_csv('mgh-all-labels-unique-counts.csv', index=True)
df_unique

In [None]:
# Initialize nested dict of outcomes to store counts
outcome_counts = {outcome: {} for outcome in outcomes}

for outcome_outer in outcome_counts:
    outcome_counts[outcome_outer] = {outcome: {} for outcome in outcomes}

In [None]:
# Iterate through each outcome
for outcome in outcomes:
    # Isolate all patients with the desired outcome
    df_this_outcome = df[df[outcome] != 0]
    print(f"outcome outer: {outcome} / counts = {df_this_outcome.shape[0]}")
#     outcomes_inner = [outcome_inner for outcome_inner in outcomes if outcome_inner is not outcome]
    for outcome_inner in outcomes:
        counts = sum(df_this_outcome[outcome_inner] != 0)
        print(f"    outcome inner: {outcome_inner} / counts = {counts}")
        outcome_counts[outcome][outcome_inner] = counts

In [None]:
df_counts = pd.DataFrame(outcome_counts).astype(int)
df_counts = df_counts.rename(outcomes, axis='columns')
df_counts = df_counts.rename(outcomes, axis='index')
df_counts

In [None]:
# Plot heatmap of counts
sns.set_context("talk")
fig, ax = plt.subplots(figsize=(13, 10))
sns.heatmap(ax=ax, data=df_counts, annot=True, fmt="d", cmap="YlGnBu")
ax.figure.tight_layout()
ax.set_title("Correlation between MGH STS outcomes: counts")
plt.setp(ax.get_xticklabels(), rotation=45, ha="right",
         rotation_mode="anchor")
plt.savefig("sts_outcomes_counts.png")

In [None]:
# Plot heatmap of percent
df_percentage = df_counts.div(df.shape[0]).multiply(100).round(2)
sns.set_context("talk")
fig, ax = plt.subplots(figsize=(13, 10))
sns.heatmap(ax=ax, data=df_percentage, annot=True, fmt=".2f", cmap="YlGnBu")
ax.figure.tight_layout()
ax.set_title("Correlation between MGH STS outcomes: percentage")
plt.setp(ax.get_xticklabels(), rotation=45, ha="right",
         rotation_mode="anchor")
plt.savefig("sts_outcomes_percentage.png")