In [14]:
import pickle as pkl
import os
import pandas as pd

# !pip install matplotlib altair
import matplotlib.pyplot as plt
import altair as alt

In [15]:
# DIRS
pwd = os.path.dirname("")

# If no args are passed to overwrite these values, use repo structure to construct
data_dir = os.path.abspath(os.path.join(pwd, "..", "data", "data", ""))
output_dir = os.path.abspath(os.path.join(pwd, "..", "output", ""))
pkl_dir = os.path.join(output_dir, "pkl")


# Data load
with open(os.path.join(pkl_dir, "trimmed_seqs.pkl"), "rb") as f:
    inputs = pkl.load(f)

with open(os.path.join(pkl_dir, "all_ftrs_dict.pkl"), "rb") as f:
    vocab = pkl.load(f)

with open(os.path.join(pkl_dir, "feature_lookup.pkl"), "rb") as f:
    all_feats = pkl.load(f)

with open(os.path.join(pkl_dir, "demog_dict.pkl"), "rb") as f:
    demog_lookup = pkl.load(f)

cohort = pd.read_csv(os.path.join(output_dir, "cohort.csv"))

In [16]:
cohort_melt = cohort.melt(id_vars=["key", "age", "length"], value_vars=["misa_pt", "icu", "death"])
cohort_melt["variable"] = cohort_melt.variable.map({"icu": "ICU", "death": "Death", "misa_pt": "HS"})
cohort_melt["value"] = cohort_melt.value.map({0: "No", 1: "Yes"})

In [17]:
assert len(inputs) == cohort.shape[0]

In [18]:
lengths = pd.DataFrame([(len(a), b) for a,_,b in inputs], columns=["pD", "labels"])

In [19]:
lengths[["icu", "death", "misa_pt"]] = pd.DataFrame(lengths["labels"].tolist(), index = lengths.index)
lengths.drop(columns="labels", inplace=True)
lengths_melt = lengths.melt(id_vars="pD", value_vars=["icu", "death", "misa_pt"])

In [20]:
lengths_melt["variable"] = lengths_melt.variable.map({"icu": "ICU", "death": "Death", "misa_pt": "HS"})
lengths_melt["value"] = lengths_melt.value.map({0: "No", 1: "Yes"})

## Days of Observation

In [21]:
alt.data_transformers.enable('json')

# Chart with Outliers (that is entirely useless because of that)
alt.Chart(lengths_melt).mark_boxplot(outliers=True).encode(
        y=alt.Y("value:O", title = ""),
        x=alt.X("pD:Q", title="Days of Follow-up"),
        color=alt.Color('variable:N', title = "Outcome")
).facet(
    row=alt.X('variable:N', title = "")
)

In [22]:
# Same chart without outliers

alt.Chart(lengths_melt).mark_boxplot(outliers=False).encode(
        y=alt.Y("value:O", title = ""),
        x=alt.X("pD:Q", title="Days of Follow-up"),
        color=alt.Color('variable:N', title = "Outcome")
).facet(
    row=alt.X('variable:N', title = "")
)

## Age

In [23]:
alt.data_transformers.enable('json')

alt.Chart(cohort_melt).mark_boxplot(outliers=True).encode(
        y=alt.Y("value:O", title = ""),
        x=alt.X("age:Q", title="Age"),
        color=alt.Color('variable:N', title = "Outcome")
).facet(
    row=alt.X('variable:N', title = "")
)