In [None]:
from speos.utils.config import Config
from speos.preprocessing.handler import InputHandler
import os
os.chdir("..")


In [None]:
import json
from extensions.preprocessing import preprocess_labels
import pandas as pd
def get_coregenes(trait: str, background):
    trait2name = {"uc": "uc",
                "cad": "cad_really",
                "scz": "scz",
                "ad": "alz",
                "ra": "ra"}

    mendelians = preprocess_labels("./extensions/{}_only_genes.tsv".format(trait2name[trait]))

    hsps= pd.read_csv("./hsps/{}.txt".format(trait), header=None, index_col=None).iloc[:, 0].tolist()

    with open("/mnt/storage/speos/results/{}_film_nohetioouter_results.json".format(trait2name[trait]), "r") as file:
        candidate2cs = json.load(file)[0]

    coregenes = [key for key, value in candidate2cs.items() if value == 11]

    other_coregenes = [key for key, value in candidate2cs.items() if value != 11]

    allcore = set()
    allcore.update(set(coregenes))
    allcore.update(set(mendelians))
    allcore = allcore.intersection(set(background))

    noncore = set(background).difference(allcore).difference(other_coregenes)

    return allcore, other_coregenes, hsps, noncore

In [None]:
import pandas as pd

df = pd.read_csv("pc_genes.txt", header=0, sep="\t", index_col=None)

In [None]:
from speos.visualization.settings import *
import seaborn as sns
import matplotlib.pyplot as plt
fig, axs = plt.subplots(nrows=1, ncols=3, figsize=(cm*full_width*0.33, 6*cm), sharey=True)
from matplotlib import ticker

def pval_to_string(pval):
    if pval < 1e-3:
        return "***"
    elif pval < 1e-2:
        return "***"
    elif pval < 5e-2:
        return "***"
    else:
        return "n.s."

for ax, trait in zip(axs, ["uc", "cad", "scz"]):

    
    
    hgnc2tss = {row["gene"]: row["promoter_count"] for i, row in df.iterrows()}
    coregenes, hsps, weakcore, noncore = get_coregenes(trait, background = hgnc2tss.keys())
    from scipy.stats import mannwhitneyu
    stat, p = mannwhitneyu([hgnc2tss[hgnc] for hgnc in coregenes], [hgnc2tss[hgnc] for hgnc in noncore])
    #print(mannwhitneyu([hgnc2tss[hgnc] for hgnc in coregenes], [hgnc2tss[hgnc] for hgnc in weakcore]))
    df2 = pd.DataFrame({"TSS Count": [hgnc2tss[hgnc] for hgnc in coregenes] +  [hgnc2tss[hgnc] for hgnc in noncore],
                        "Group": ["Core Genes\n(n={})".format(len(coregenes))] * len(coregenes)  + ["Peripherals\n(n={})".format(len(noncore))] * len(noncore)})

    #bp = ax.boxplot(x=([hgnc2tss[hgnc] for hgnc in coregenes], [hgnc2tss[hgnc] for hgnc in noncore]), 
    #          positions=range(2), widths=[0.3, 0.3], showfliers=False, zorder=5, patch_artist=True)

    sns.boxplot(df2, x="Group", y="TSS Count", ax=ax, showfliers=False,
                palette={"Core Genes\n(n={})".format(len(coregenes)): "#01016f",
                        "Peripherals\n(n={})".format(len(noncore)):  "#5a5a5a"},
                    linewidth=0.5,
                    width=0.5,
                    medianprops=dict(color="white"))
    if trait != "uc":
        ax.set_ylabel("")
    ax.set_xticks((0,1))
    ax.set_xticklabels(["Core Genes\n(n={})".format(len(coregenes)), "Peripherals\n(n={})".format(len(noncore))], rotation=90)
    ax.set_xlabel("")
    ax.text(x=0.5, y=19, s=trait.upper(), ha="center", va="center", fontsize=8)
    ax.text(x=0.5, y=17, s=pval_to_string(p), ha="center", va="center", fontsize=8)
    ax.set_ylim(-0.1, 20)
    ax.yaxis.set_major_locator(ticker.MaxNLocator(integer=True))

#fig.subplots_adjust(wspace=-3)
plt.tight_layout()
plt.savefig("promoter_count.svg", bbox_inches="tight")

In [None]:
from speos.visualization.settings import *
import seaborn as sns
import matplotlib.pyplot as plt
fig, axs = plt.subplots(nrows=1, ncols=3, figsize=(cm*full_width*0.33, 6*cm), sharey=True)

for ax, trait in zip(axs, ["uc", "cad", "scz"]):
    hgnc2tss = {row["gene"]: row["Roadmap_count"] for i, row in df.iterrows()}
    coregenes, hsps, weakcore, noncore = get_coregenes(trait, background = hgnc2tss.keys())
    from scipy.stats import mannwhitneyu
    stat, p = mannwhitneyu([hgnc2tss[hgnc] for hgnc in coregenes], [hgnc2tss[hgnc] for hgnc in noncore])
    #print(mannwhitneyu([hgnc2tss[hgnc] for hgnc in coregenes], [hgnc2tss[hgnc] for hgnc in weakcore]))
    df2 = pd.DataFrame({"Enhancer Count": [hgnc2tss[hgnc] for hgnc in coregenes] +  [hgnc2tss[hgnc] for hgnc in noncore],
                        "Group": ["Core Genes\n(n={})".format(len(coregenes))] * len(coregenes)  + ["Peripherals\n(n={})".format(len(noncore))] * len(noncore)})

    #bp = ax.boxplot(x=([hgnc2tss[hgnc] for hgnc in coregenes], [hgnc2tss[hgnc] for hgnc in noncore]), 
    #          positions=range(2), widths=[0.3, 0.3], showfliers=False, zorder=5, patch_artist=True)

    sns.boxplot(df2, x="Group", y="Enhancer Count", ax=ax, showfliers=False,
                palette={"Core Genes\n(n={})".format(len(coregenes)): "#01016f",
                        "Peripherals\n(n={})".format(len(noncore)):  "#5a5a5a"},
                    linewidth=0.5,
                    width=0.5,
                    medianprops=dict(color="white"))
    if trait != "uc":
        ax.set_ylabel("")
    ax.set_xticks((0,1))
    ax.set_xticklabels(["Core Genes\n(n={})".format(len(coregenes)), "Peripherals\n(n={})".format(len(noncore))], rotation=90)
    ax.set_xlabel("")
    ax.text(x=0.5, y=122, s=trait.upper(), ha="center", va="center", fontsize=8)
    ax.text(x=0.5, y=110, s=pval_to_string(p), ha="center", va="center", fontsize=8)
    ax.set_ylim(-0.1, 130)
    ax.yaxis.set_major_locator(ticker.MaxNLocator(integer=True))

plt.tight_layout()
plt.savefig("enhancer_count.svg", bbox_inches="tight")