In [None]:
%load_ext autoreload
%autoreload 2

import sys
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

sys.path.append("../")

from superr_venn import supervenn, create_error_profile, oddsratio_venn, create_oddratio_profile, create_stratified_error_profile

# Small fixed example

In [None]:
sets = [{1, 2, 3, 4}, {3, 4, 5}, {1, 6, 7, 8}]
plt.figure()
venn = supervenn(
    sets,
    square_cell=True,
)

# Random example

In [None]:
models = 5
set_range = 150
sets = []
for i in range(models):
    # Independent
    sets.append(
        set(
            np.random.choice(
                range(set_range), size=np.random.randint(1, int(set_range * 1 / 5))
            )
        )
    )
universe = set(range(set_range))
names = [f"Model {i + 1}" for i in range(models)]

plt.figure()
venn = supervenn(
    sets,
    set_annotations=names,
    log_color=True,
    square_cell=True,
    universe=universe,
)

# Random sample with dependence

In [None]:
models = 5
set_range = 150
sets = []
for i in range(models):
    # Dependent
    if i == 0:
        sets.append(
            set(
                np.random.choice(
                    range(set_range),
                    size=np.random.randint(
                        int(set_range * 0.1),
                        int(set_range * 0.2),
                    ),
                    replace=False,
                )
            )
        )
    else:
        prev_set = sets[np.random.randint(low=0, high=len(sets))]
        new_set = set(
            np.random.choice(
                list(prev_set),
                size=np.random.randint(int(len(prev_set)) * 0.75, len(prev_set)),
                replace=False,
            )
        ).union(
            set(
                np.random.choice(
                    range(set_range),
                    size=np.random.randint(
                        int(set_range * 0.05), int(set_range * 0.15)
                    ),
                    replace=False,
                )
            )
        )
        sets.append(new_set)

universe = set(range(set_range))
names = [f"Model {i + 1}" for i in range(models)]

plt.figure()
venn = supervenn(
    sets,
    set_annotations=names,
    log_color=True,
    square_cell=True,
    universe=universe,
)

# From dataframe

## Generate example data

In [None]:
models = 5
set_range = 150
sets = []
for i in range(models):
    # Dependent
    if i == 0:
        sets.append(
            set(
                np.random.choice(
                    range(set_range),
                    size=np.random.randint(
                        int(set_range * 0.1),
                        int(set_range * 0.2),
                    ),
                    replace=False,
                )
            )
        )
    else:
        prev_set = sets[np.random.randint(low=0, high=len(sets))]
        new_set = set(
            np.random.choice(
                list(prev_set),
                size=np.random.randint(int(len(prev_set)) * 0.75, len(prev_set)),
                replace=False,
            )
        ).union(
            set(
                np.random.choice(
                    range(set_range),
                    size=np.random.randint(
                        int(set_range * 0.05), int(set_range * 0.15)
                    ),
                    replace=False,
                )
            )
        )
        sets.append(new_set)

df = {}
for i in range(models):
    binary_array = np.zeros(set_range, dtype=int)
    binary_array[np.array(list(sets[i]))] = 1
    df[f"Model {i+1}"] = binary_array
df = pd.DataFrame(df)
df["Patient ID"] = range(1, set_range + 1)
# df["Subgroup"] = (df.iloc[:,:2].sum(axis=1)*df.iloc[:,2]>0).astype(int)
df["Subgroup"] = np.random.choice(["A", "B"], size=set_range, replace=True)

In [None]:
display(df)

In [None]:
venn = create_error_profile(
    df,
    id="Patient ID",
    columns=[f"Model {i + 1}" for i in range(models)],
    incorrect_value=1,
)
plt.show()

In [None]:
subgroup_venns = []
for subgroup in df["Subgroup"].unique():
    print(subgroup)
    subgroup_df = df[df["Subgroup"] == subgroup]
    subgroup_venn = create_error_profile(
        subgroup_df,
        id="Patient ID",
        columns=[f"Model {i + 1}" for i in range(models)],
        incorrect_value=1,
    )
    subgroup_venns.append(subgroup_venn)

In [None]:
oddsratio_venn(
    venn,
    subgroup_venns[0],
    subgroup_venns[1],
    square_cell=True,
    set_annotations=[f"Model {i + 1}" for i in range(models)],
)

In [None]:
create_oddratio_profile(
    df,
    "Subgroup",
    id="Patient ID",
    columns=[f"Model {i + 1}" for i in range(models)],
    incorrect_value=1,
)

In [None]:
create_stratified_error_profile(
    df,
    "Subgroup",
    id="Patient ID",
    columns=[f"Model {i + 1}" for i in range(models)],
    incorrect_value=1,
)
plt.show()