# Revision

## Change Venn diagram

### Prepare data for Venn plot

In [93]:
import os
import json
import pandas as pd
from mousechd.utils.analyzer import (load_metadata,
                                     get_kingdom_df)

HOME = os.environ.get("HOME")
DATADIR = f"{HOME}/DATA/INCEPTION_2020-CHD/Mice/DATA/CTs"
EVALDIR = f"{HOME}/DATA/INCEPTION_2020-CHD/Mice/PAPER/METADATA"

initial_df = load_metadata(f"{DATADIR}/processed/Imagine/metadata_20210203.csv")
prospective_df = pd.read_csv(f"{EVALDIR}/prospective.csv")
divergent_df = pd.read_csv(f"{EVALDIR}/divergent.csv")

terms = pd.read_csv(f'{DATADIR}/raw/Imagine/metadata/terminology_20201217.csv', sep=';')
terms["Kingdom"] = terms["Kingdom"].map({"Septal Defects": "Septal defects",
                                         "Atrial isomerism": "Atrial situs defects"
                                         }).fillna(terms["Kingdom"])

### Initial cohort

In [100]:
kingdom_df = get_kingdom_df(terms, initial_df)
kingdom_df = kingdom_df[kingdom_df["Normal heart"]==0].drop("Normal heart", axis=1)
kingdom_df = kingdom_df[kingdom_df.sum(axis=1)!=0]
diseases = kingdom_df.columns.tolist()

result = {}
for i, d in enumerate(diseases):
    result[f"({i+1}) {d}"] = kingdom_df[kingdom_df[d]==1].index.tolist()

json.dump(result, open(f"{EVALDIR}/VennData_initial_cohort.json", "w"), indent=4)


### Prospective cohort

In [105]:
kingdom_df = prospective_df[["heart_name"] + diseases].set_index("heart_name")

result = {}
for i, d in enumerate(diseases):
    result[f"({i+1}) {d}"] = kingdom_df[kingdom_df[d]==1].index.tolist()

json.dump(result, open(f"{EVALDIR}/VennData_prospective_cohort.json", "w"), indent=4)

### Divergent cohort

In [112]:
kingdom_df = divergent_df[["heart_name"] + diseases + ["Situs inversus totalis"]].set_index("heart_name")

result = {}
for i, d in enumerate(kingdom_df.columns):
    result[f"({i+1}) {d}"] = kingdom_df[kingdom_df[d]==1].index.tolist()

json.dump(result, open(f"{EVALDIR}/VennData_divergent_cohort.json", "w"), indent=4)