In [None]:
import os
os.chdir(os.path.expanduser("~/speos/"))
from speos.utils.datahandlers import ResultsHandler
from speos.postprocessing.postprocessor import PostProcessor
from speos.utils.config import Config
from speos.visualization.settings import *

import numpy as np
import matplotlib as plt

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import ticker
from speos.preprocessing.handler import InputHandler
import pandas as pd

def get_translation_table(path="data/hgnc_official_list.tsv", sep="\t") -> pd.DataFrame:
    hgnc_col="symbol"
    entrez_col="entrez_id"
    ensembl_col="ensembl_gene_id"
    df = pd.read_csv(path, sep=sep, header=0, usecols=[hgnc_col, entrez_col, ensembl_col])
    return df

def load_drugtarget_graph(path_to_graph):
    import networkx as nx
    import pandas as pd

    df = pd.read_table(path_to_graph, sep="\t", names=["Gene", "edge", "Compound"])

    df = df[df["edge"].str.contains("DRUGBANK")]

    graph = nx.from_pandas_edgelist(df, source="Compound", target="Gene", edge_attr="edge", create_using=nx.MultiDiGraph)

    return graph
    
    
def get_drugtarget_dict(path_to_graph="data/drkg/cgi.tsv") -> dict:
    #graph = load_drugtarget_graph(path_to_graph)
    #node2entrez = {node: "".join(node.split("::")[1:]) for node in graph.nodes if node.startswith("Gene")}
    # node2entrez = {value: "".join(value.split("::")[1:]) for value in df["Gene"] if not "".join(value.split("::")[1:]).startswith("drugbank")}
    #entrez2degree = {node2entrez[node]: graph.degree[node] for node in node2entrez.keys()}
    #translation_table = get_translation_table()
    #entrez2hgnc = {str(int(translation_table['entrez_id'][i])): translation_table['symbol'][i] for i in range(len(translation_table)) if not np.isnan(translation_table['entrez_id'][i])}
    #hgnc2degree = {entrez2hgnc[entrez]: degree for entrez, degree in entrez2degree.items() if entrez in entrez2hgnc.keys()}

    drugbank = pd.read_csv("/mnt/storage/speos/data/drugbank/all_targets.csv")
    drugbank = drugbank[drugbank["Species"] == "Humans"]

    experimental = pd.read_csv("/mnt/storage/speos/data/drugbank/experimental.csv")
    experimental = experimental[experimental["Species"] == "Humans"]
    experimental_drugs = experimental["Drug IDs"].tolist()

    expdrugs = set()

    for druglist in experimental_drugs:
        expdrugs.update(set([drug.strip() for drug in druglist.split(";")]))
    expdrugs

    compounds2name = {row["DrugBank ID"]: row["Common name"] for i, row in pd.read_csv("drugbank_vocabulary.csv", header=0).iterrows()}


    hgnc2degree = {}
    for gene, compounds in zip(drugbank["Gene Name"], drugbank["Drug IDs"]):
        compounds = [compound.strip() for compound in compounds.split(";")]
        for compound in compounds:
            
            if compound not in expdrugs:
                try:
                    hgnc2degree[gene] += 1
                except KeyError:
                    hgnc2degree[gene] = 1

    return hgnc2degree

def get_drugtargets(self, path_to_graph="data/drkg/cgi.tsv") -> set:
    path_to_graph = os.path.join(self.config.input.main_dir, path_to_graph)
    hgnc2degree = get_drugtarget_dict(path_to_graph)
    return set(hgnc2degree.keys())

def drugtarget(core_genes: set, peripheral_genes: set) -> tuple:
        """ Takes the results of the outer crossvalidation and analyzes if there is an enrichment of drug targets among the predicted genes.

            Args:
                results_path (str): The path to a resultsfile so the positive labels can be extracted. 
                    This is not necessary if the task :obj:`overlap_analysis` has been run before, then the results paths are already known to the postprocessor.
                plot (bool): If plots should be produced. If True, then the plots are placed in :obj:`config.pp.plot_dir`.
                save (bool): If results should be saved. If True, then the results are placed in the plots in :obj:`config.pp.save_dir`.

            Returns:
                tuple([...], pd.DataFrame): Returns a tuple of various results, most of which are summarized in the DataFrame at the end (tuple[-1]).

            """

        from scipy.stats import fisher_exact, mannwhitneyu

        df = pd.DataFrame(columns=["Group Name", "Group N", "N Drug Targets", "OR DT", "pval DT unadjusted", "pval DT adjusted (FDR)", "Median # of DT", "xDC"," ", "Pairwse Comparison", "pval xDC unadjusted", "pval xDC adjusted (FDR)", "U-Stat"],
                          index=range(2))
        df["Group Name"] = ["Core Genes", "Peripheral Genes"]
        df[" "] = [" "] * 2

        hgnc2degree = get_drugtarget_dict()

        predicted_genes = core_genes
        drug_targets = set(hgnc2degree.keys())
        not_predicted_genes = peripheral_genes

        df["Group N"] = [len(predicted_genes), len(not_predicted_genes)]

        peripheral_drug_targets = pp._return_only_valid(drug_targets, not_predicted_genes)
        core_drug_targets = pp._return_only_valid(drug_targets, predicted_genes)

        df["N Drug Targets"] = [len(core_drug_targets), len(peripheral_drug_targets)]

        valid_dict = {gene: degree for gene, degree in hgnc2degree.items() if gene in core_genes.union(peripheral_genes)}
        genes, degree = list(zip(*valid_dict.items()))
        
        array = pp.make_contingency_table(peripheral_genes.union(core_genes), core_genes, drug_targets.intersection(peripheral_genes.union(core_genes)))

        drug_target_results = []
        drug_target_results = fisher_exact(array)

        df["OR DT"] = [drug_target_results[0], np.nan]
        df["pval DT unadjusted"] = [drug_target_results[1], np.nan]

        predicted_genes_and_drug_targets = predicted_genes.intersection(drug_targets)
        not_predicted_genes_and_drug_targets = not_predicted_genes.intersection(drug_targets)

        predicted_degrees = [hgnc2degree[hgnc] for hgnc in predicted_genes_and_drug_targets]
        not_predicted_degrees = [hgnc2degree[hgnc] for hgnc in not_predicted_genes_and_drug_targets]

        df["Median # of DT"] = [ np.median(predicted_degrees), np.median(not_predicted_degrees)]
        df["xDC"] = [np.median(predicted_degrees) / np.median(not_predicted_degrees), 1]
        df["Pairwse Comparison"] = ["Core Gene vs Peripheral Gene", "None"]

        pvals = []
        u_stats = []
        drug_degree_result = mannwhitneyu(predicted_degrees,
                                          not_predicted_degrees)
        pvals.append(drug_degree_result[1])
        u_stats.append(drug_degree_result[0])


        df["pval xDC unadjusted"] = pvals + [0]
        df["U-Stat"] = u_stats + [0]


        return drug_target_results, pvals, (not_predicted_degrees, predicted_degrees), df


phenotypes = ["Ulcerative Colitis", "Coronary Artery Disease", "Schizophrenia", "Rheumathoid Arthtritis", "Alzheimer's Disease"]
methods = ["film"]
pretty_methods = ["Candidates"]
groups = ["Peripheral Genes", "Core Genes"]
categories = ["DT", "#DC", "Dr", "Dr-"]
tick_size = 12
label_size = 14

core_gene_sets = {}

from statsmodels.stats.multitest import fdrcorrection as fdr
crimson = "#6f0000"
navy = "#02055a"
jungle = "#1e5631"
tan = "#c24e00"
petrol = "#005f6a"
grey = "#bbbbbb"
lightgrey = "#dddddd"
width_ratios= [10, 1]*2
marker_size=10

colors_ = [crimson, jungle, tan, navy, petrol]
all_all_genes = set()

results_dict_values = {}
results_dict_pvals = {}
outer_dfs =  []
global_core_genes = set()
global_peripheral_genes = set(get_translation_table()["symbol"].tolist())
only_mendelians = {}
only_candidates = {}

weak_core_gene_sets = {}
for phenotype in phenotypes:
    plot_df_values = pd.DataFrame(columns=categories, index=["Mendelians"] + pretty_methods)
    plot_df_pvals = pd.DataFrame(columns=categories, index=["Mendelians"] + pretty_methods)
    need_to_set_mendelians = True
    dfs = []
    for method, pretty_method in zip(methods, pretty_methods):
    
        config_paths = {"Ulcerative Colitis": "config_uc_only_nohetio_{}_newstorage.yaml".format(method),
                        "Coronary Artery Disease": "config_cad_really_only_nohetio_{}_newstorage.yaml".format(method),
                        "Schizophrenia": "config_scz_only_nohetio_{}_newstorage.yaml".format(method),
                        "Rheumathoid Arthtritis": "config_ra_only_nohetio_{}_newstorage.yaml".format(method),
                        "Alzheimer's Disease": "config_alz_only_nohetio_{}_newstorage.yaml".format(method),
                        }

        config = Config()
        config.parse_yaml(config_paths[phenotype])
        prepro = InputHandler(config).get_preprocessor()
        prepro.build_graph(adjacency=False)
        mendelians = set([prepro.id2hgnc[_id] for _id in prepro.pos_idx])
        core_genes = set()
        core_genes.update(mendelians)
        only_mendelians[phenotype] = mendelians

        if len(all_all_genes) == 0:
            all_all_genes = set(prepro.id2hgnc.values())
        else:
            all_all_genes = all_genes.intersection(set(prepro.id2hgnc.values()))

        pp = PostProcessor(config)

        import json
        #outer_results = "/mnt/storage/speos/results/uc_{}_nohetioouter_results.json".format(method)
        #results_file = "/mnt/storage/speos/results/uc_{}_nohetio_outer_0_fold_1.tsv".format(method)

        outer_results_paths = {"Ulcerative Colitis": "/mnt/storage/speos/results/uc_{}_nohetioouter_results.json".format(method),
                        "Coronary Artery Disease": "/mnt/storage/speos/results/cad_really_{}_nohetioouter_results.json".format(method),
                        "Schizophrenia": "/mnt/storage/speos/results/scz_{}_nohetioouter_results.json".format(method),
                        "Rheumathoid Arthtritis": "/mnt/storage/speos/results/ra_{}_nohetioouter_results.json".format(method),
                        "Alzheimer's Disease": "/mnt/storage/speos/results/alz_{}_nohetioouter_results.json".format(method),
                        }

                    
        results_files = {"Ulcerative Colitis": "/mnt/storage/speos/results/uc_{}_nohetio_outer_0_fold_1.tsv".format(method),
                        "Coronary Artery Disease": "/mnt/storage/speos/results/cad_really_{}_nohetio_outer_0_fold_1.tsv".format(method),
                        "Schizophrenia": "/mnt/storage/speos/results/scz_{}_nohetio_outer_0_fold_1.tsv".format(method),
                        "Rheumathoid Arthtritis": "/mnt/storage/speos/results/ra_{}_nohetio_outer_0_fold_1.tsv".format(method),
                        "Alzheimer's Disease": "/mnt/storage/speos/results/alz_{}_nohetio_outer_0_fold_1.tsv".format(method)
                        }

        with open(outer_results_paths[phenotype], "r") as file:
            candidates =  set([key for key, value in json.load(file)[0].items() if value >= 11])
        core_genes.update(candidates)
        only_candidates[phenotype] = candidates

        with open("/mnt/storage/speos/results/scz_film_nohetioouter_results.json", "r") as file:
            weak_core_genes =  set([key for key, value in json.load(file)[0].items() if value >= 1 and value < 11])

        weak_core_gene_sets[phenotype] = weak_core_genes

        core_gene_sets[phenotype] = core_genes

        global_core_genes = global_core_genes.union(core_genes)
        all_genes = set(get_translation_table()["symbol"].tolist())
        peripheral_genes = all_genes.difference(weak_core_genes.union(core_genes))
        global_peripheral_genes = global_peripheral_genes.intersection(peripheral_genes)
        print("Number of global peripheral genes: {}".format(len(global_peripheral_genes)))

        drug_target_results, pvals, (not_predicted_degrees, predicted_degrees), df = drugtarget(core_genes, peripheral_genes)
        
        print(drug_target_results)
        print(df)
        continue

    

        total_druggable, leftover_druggable, df2 = pp.druggable(results_files[phenotype])
        df2.columns = [cname + " " if cname in ["Group Name", "Group N"] else cname for cname in df2.columns]
        df["  "] = [" "] * 3
        df = pd.concat([df, df2], axis=1, join="inner", )
        dfs.append(df)

        if need_to_set_mendelians:
            plot_df_values.loc["Mendelians", "DT"] = drug_target_results[0][0]
            plot_df_pvals.loc["Mendelians", "DT"] = drug_target_results[0][1]
            plot_df_values.loc["Mendelians", "Dr"] = total_druggable[0][0]
            plot_df_pvals.loc["Mendelians", "Dr"] = total_druggable[0][1]
            plot_df_values.loc["Mendelians", "Dr-"] = leftover_druggable[0][0]
            plot_df_pvals.loc["Mendelians", "Dr-"] = leftover_druggable[0][1]
            plot_df_values.loc["Mendelians", "#DC"] = np.median(positive_degrees) / np.median(not_predicted_degrees)
            plot_df_pvals.loc["Mendelians", "#DC"] = pvals[1]
            need_to_set_mendelians = False

        plot_df_values.loc[pretty_method, "DT"] = drug_target_results[1][0]
        plot_df_pvals.loc[pretty_method, "DT"] = drug_target_results[1][1]
        plot_df_values.loc[pretty_method, "Dr"] = total_druggable[1][0]
        plot_df_pvals.loc[pretty_method, "Dr"] = total_druggable[1][1]
        plot_df_values.loc[pretty_method, "Dr-"] = leftover_druggable[1][0]
        plot_df_pvals.loc[pretty_method, "Dr-"] = leftover_druggable[1][1]
        plot_df_values.loc[pretty_method, "#DC"] = np.median(predicted_degrees) / np.median(not_predicted_degrees)
        plot_df_pvals.loc[pretty_method, "#DC"] = pvals[0]

    continue
    dfs = pd.concat(dfs, keys = pretty_methods)
    dfs.reset_index(inplace=True)
    dfs.drop("level_1", inplace=True, axis="columns")
    dfs.columns = ["Method"] + dfs.columns[1:].tolist()

    for column in dfs.columns:
        if "OR" in column:
            dfs[column] = dfs[column].round(3)
        if "N " in column:
            dfs[column] = dfs[column].astype(int)
    outer_dfs.append(dfs)
    results_dict_values.update({phenotype: plot_df_values})
    results_dict_pvals.update({phenotype: plot_df_pvals})


drug_target_results, pvals, (not_predicted_degrees, predicted_degrees), df = drugtarget(global_core_genes, global_peripheral_genes)
        
print(drug_target_results)
print(df)

dt_pvals = []
xdc_pvals = []
dr_pvals = []
drminus_pvals = []

for df in outer_dfs:
    dt_pvals.extend(df["pval DT unadjusted"][~np.isnan(df["pval DT unadjusted"])].tolist())
    xdc_pvals.extend(df["pval xDC unadjusted"].tolist())
    dr_pvals.extend(df["pval Dr unadjusted"][~np.isnan(df["pval Dr unadjusted"])].tolist())
    drminus_pvals.extend(df["pval Dr- unadjusted"][~np.isnan(df["pval Dr- unadjusted"])].tolist())

dt_pvals_adjusted = fdr(dt_pvals)[1]
xdc_pvals_adjusted = fdr(xdc_pvals)[1]
dr_pvals_adjusted = fdr(dr_pvals)[1]
drminus_pvals_adjusted = fdr(drminus_pvals)[1]

n_dfs = len(outer_dfs)

for df, df_dt_pvals_adj, df_xdc_pvals_adj, df_dr_pvals_adj, drminus_pvals_adj, phenotype in \
    zip(outer_dfs,  np.array_split(dt_pvals_adjusted, n_dfs), np.array_split(xdc_pvals_adjusted, n_dfs), \
    np.array_split(dr_pvals_adjusted, n_dfs), np.array_split(drminus_pvals_adjusted, n_dfs), phenotypes):

    df.loc[~np.isnan(df["pval DT unadjusted"]), "pval DT adjusted (FDR)"] = df_dt_pvals_adj
    df["pval xDC adjusted (FDR)"] = df_xdc_pvals_adj
    df.loc[~np.isnan(df["pval Dr unadjusted"]), "pval Dr adjusted (FDR)"] = df_dr_pvals_adj
    df.loc[~np.isnan(df["pval Dr- unadjusted"]), "pval Dr- adjusted (FDR)"] = drminus_pvals_adj
    df.to_csv("statistical_dump/{}_drugtarget.tsv".format(phenotype), sep="\t", index=False)
    plot_df_pvals = results_dict_pvals[phenotype].copy()
    need_to_set_mendelians = True
    for method in pretty_methods:
        if need_to_set_mendelians:
            plot_df_pvals.loc["Mendelians", "DT"] = df.loc[(df["Method"] == method) & (df["Group Name"] == "Mendelian"), ["pval DT adjusted (FDR)"]].values
            plot_df_pvals.loc["Mendelians", "#DC"] = df.loc[(df["Method"] == method) & (df["Group Name"] == "Mendelian"), ["pval xDC adjusted (FDR)"]].values
            plot_df_pvals.loc["Mendelians", "Dr"] = df.loc[(df["Method"] == method) & (df["Group Name"] == "Mendelian"), ["pval Dr adjusted (FDR)"]].values
            plot_df_pvals.loc["Mendelians", "Dr-"] = df.loc[(df["Method"] == method) & (df["Group Name"] == "Mendelian"), ["pval Dr- adjusted (FDR)"]].values
            need_to_set_mendelians = False

        plot_df_pvals.loc[method, "DT"] = df.loc[(df["Method"] == method) & (df["Group Name"] == "Candidate Gene"), ["pval DT adjusted (FDR)"]].values
        plot_df_pvals.loc[method, "#DC"] = df.loc[(df["Method"] == method) & (df["Group Name"] == "Candidate Gene"), ["pval xDC adjusted (FDR)"]].values
        plot_df_pvals.loc[method, "Dr"] = df.loc[(df["Method"] == method) & (df["Group Name"] == "Candidate Gene"), ["pval Dr adjusted (FDR)"]].values
        plot_df_pvals.loc[method, "Dr-"] = df.loc[(df["Method"] == method) & (df["Group Name"] == "Candidate Gene"), ["pval Dr- adjusted (FDR)"]].values
    
    results_dict_pvals.update({phenotype: plot_df_pvals})

print(results_dict_values['Ulcerative Colitis'])

In [None]:
"JAK3" in core_gene_sets["Ulcerative Colitis"]

In [None]:
keys = ["Ulcerative Colitis","Rheumathoid Arthtritis", "Coronary Artery Disease", "Alzheimer's Disease", "Schizophrenia"]

genes = [core_gene_sets[key] for key in keys if len(core_gene_sets[key]) > 0]
names = [key for key in keys if len(core_gene_sets[key]) > 0]
import numpy as np
matrix = np.empty((len(genes), len(genes)))

for i, geneset_i in enumerate(genes):
    for j, geneset_j in enumerate(genes):
        matrix[i, j] = len(geneset_i.intersection(geneset_j))
        #matrix[i, j] /= min(len(geneset_i), len(geneset_j))
        matrix[i, j] /= len(geneset_i.union(geneset_j))
        matrix[i, j] *= 100

print(matrix)
import matplotlib.pyplot as plt
import matplotlib as mpl

cmap_reversed = mpl.cm.get_cmap('viridis_r')

fig, ax = plt.subplots(figsize=(5,5))
im = ax.imshow(matrix, cmap=cmap_reversed)
ax.tick_params(top=True, labeltop=True, bottom=False, labelbottom=False)
# Show all ticks and label them with the respective list entries
colnames = names[:]
ax.set_xticks(np.arange(len(names)), labels=colnames, fontsize=8)
ax.set_yticks(np.arange(len(names)), labels=names, fontsize=8)


# Loop over data dimensions and create text annotations.
for i in range(len(names)):
    for j in range(len(names)):
        text = ax.text(j, i, "{}%".format(int(matrix[i, j])),
                       ha="center", va="center", color="w")

plt.xticks(rotation=90, ha='center')
#ax.set_title("Harvest of local farmers (in tons/year)")
fig.tight_layout()
plt.show()

In [None]:
keys = ["Ulcerative Colitis","Rheumathoid Arthtritis", "Coronary Artery Disease", "Alzheimer's Disease", "Schizophrenia"]

genes = [only_mendelians[key] for key in keys if len(only_mendelians[key]) > 0]
names = [key for key in keys if len(only_mendelians[key]) > 0]
import numpy as np
matrix = np.empty((len(genes), len(genes)))

for i, geneset_i in enumerate(genes):
    for j, geneset_j in enumerate(genes):
        matrix[i, j] = len(geneset_i.intersection(geneset_j))
        #matrix[i, j] /= min(len(geneset_i), len(geneset_j))
        matrix[i, j] /= len(geneset_i.union(geneset_j))
        matrix[i, j] *= 100

print(matrix)
import matplotlib.pyplot as plt
import matplotlib as mpl

cmap_reversed = mpl.cm.get_cmap('viridis_r')

fig, ax = plt.subplots(figsize=(5,5))
im = ax.imshow(matrix, cmap=cmap_reversed)
ax.tick_params(top=True, labeltop=True, bottom=False, labelbottom=False)
# Show all ticks and label them with the respective list entries
colnames = names[:]
ax.set_xticks(np.arange(len(names)), labels=colnames, fontsize=8)
ax.set_yticks(np.arange(len(names)), labels=names, fontsize=8)


# Loop over data dimensions and create text annotations.
for i in range(len(names)):
    for j in range(len(names)):
        text = ax.text(j, i, "{}%".format(int(matrix[i, j])),
                       ha="center", va="center", color="w")

plt.xticks(rotation=90, ha='center')
#ax.set_title("Harvest of local farmers (in tons/year)")
fig.tight_layout()
plt.show()

In [None]:
keys = ["Ulcerative Colitis","Rheumathoid Arthtritis", "Coronary Artery Disease", "Alzheimer's Disease", "Schizophrenia"]

genes = [only_candidates[key] for key in keys if len(only_candidates[key]) > 0]
names = [key for key in keys if len(only_candidates[key]) > 0]
import numpy as np
matrix = np.empty((len(genes), len(genes)))

for i, geneset_i in enumerate(genes):
    for j, geneset_j in enumerate(genes):
        matrix[i, j] = len(geneset_i.intersection(geneset_j))
        #matrix[i, j] /= min(len(geneset_i), len(geneset_j))
        matrix[i, j] /= len(geneset_i.union(geneset_j))
        matrix[i, j] *= 100

print(matrix)
import matplotlib.pyplot as plt
import matplotlib as mpl

cmap_reversed = mpl.cm.get_cmap('viridis_r')

fig, ax = plt.subplots(figsize=(5,5))
im = ax.imshow(matrix, cmap=cmap_reversed)
ax.tick_params(top=True, labeltop=True, bottom=False, labelbottom=False)
# Show all ticks and label them with the respective list entries
colnames = names[:]
ax.set_xticks(np.arange(len(names)), labels=colnames, fontsize=8)
ax.set_yticks(np.arange(len(names)), labels=names, fontsize=8)


# Loop over data dimensions and create text annotations.
for i in range(len(names)):
    for j in range(len(names)):
        text = ax.text(j, i, "{}%".format(int(matrix[i, j])),
                       ha="center", va="center", color="w")

plt.xticks(rotation=90, ha='center')
#ax.set_title("Harvest of local farmers (in tons/year)")
fig.tight_layout()
plt.show()

In [None]:
gene2trait = {}

for trait, genes in core_gene_sets.items():
    for gene in genes:
        try:
            gene2trait[gene].append(trait)
        except KeyError:
            gene2trait[gene] = [trait]

gene2nonetrait = {}
for gene in global_peripheral_genes:
    gene2nonetrait[gene] = ["None"]

In [None]:
import networkx as nx

df = pd.read_table("data/drkg/cgi.tsv", sep="\t", names=["Gene", "edge", "Compound"])
df = df[df["edge"].str.contains("DRUGBANK")]

graph = nx.from_pandas_edgelist(df, source="Compound", target="Gene", edge_attr="edge", create_using=nx.MultiDiGraph)

node2entrez = {node: "".join(node.split("::")[1:]) for node in graph.nodes if node.startswith("Gene")}

translation_table = get_translation_table()
entrez2hgnc = {str(int(translation_table['entrez_id'][i])): translation_table['symbol'][i] for i in range(len(translation_table)) if not np.isnan(translation_table['entrez_id'][i])}
node2hgnc = {key: "Gene::" + entrez2hgnc[value] for key, value in node2entrez.items() if value in entrez2hgnc.keys()}

graph = nx.relabel_nodes(graph, node2hgnc)
compounds2name = {"Compound::" + row["DrugBank ID"]: "Compound::" + row["Common name"] for i, row in pd.read_csv("drugbank_vocabulary.csv", header=0).iterrows()}
graph = nx.relabel_nodes(graph, compounds2name)

In [None]:
compound2gene = {}
for edge in graph.edges:
    try:
        compound2gene[edge[0]].append(edge[1])
    except KeyError:
        compound2gene[edge[0]] = [edge[1]]

In [None]:
compound2gene["Compound::Budesonide"]

In [None]:
compound2gene2trait = {}
compound2trait = {}
compound2traitlist = {}
compound2nontrait = {}

for compound, genes in compound2gene.items():
    genes = genes[:]
    for gene in genes:
        try:
            compound2gene2trait[compound].update({gene: gene2trait[gene.split("::")[1]]})
            compound2trait[compound].update(set(gene2trait[gene.split("::")[1]]))
            compound2traitlist[compound] += gene2trait[gene.split("::")[1]]
            #compound2nontrait[compound].update(set(gene2nonetrait[gene.split("::")[1]]))
        except KeyError:
            try:
                compound2gene2trait[compound] = {gene: gene2trait[gene.split("::")[1]][:]}
                compound2trait[compound] = set(gene2trait[gene.split("::")[1]])
                compound2traitlist[compound] = gene2trait[gene.split("::")[1]][:]                
            except KeyError:
                try:
                    compound2nontrait[compound] = set(gene2nonetrait[gene.split("::")[1]])
                except KeyError:
                    continue
compound2nontrait

In [None]:
compound2gene2trait = {}
compound2trait = {}
compound2traitlist = {}

for compound, genes in compound2gene.items():
    genes = genes[:]
    for gene in genes:
        try:
            compound2gene2trait[compound].update({gene: gene2trait[gene.split("::")[1]]})
            compound2trait[compound].update(set(gene2trait[gene.split("::")[1]]))
            compound2traitlist[compound] += gene2trait[gene.split("::")[1]]
            
        except KeyError:
            try:
                compound2gene2trait[compound] = {gene: gene2trait[gene.split("::")[1]][:]}
                compound2trait[compound] = set(gene2trait[gene.split("::")[1]])
                compound2traitlist[compound] = gene2trait[gene.split("::")[1]][:]                
            except KeyError:
                continue


In [None]:
compound2gene2trait["Compound::Budesonide"]

In [None]:
singletons =  {compound: traits for compound, traits in compound2trait.items() if len(traits) == 1}
doubles =  {compound: traits for compound, traits in compound2trait.items() if len(traits) == 2}
triples =  {compound: traits for compound, traits in compound2trait.items() if len(traits) == 3}
quadruples =  {compound: traits for compound, traits in compound2trait.items() if len(traits) == 4}
quintuples =  {compound: traits for compound, traits in compound2trait.items() if len(traits) == 5}

In [None]:
print(len(compound2nontrait.keys()))
print(len(singletons))
print(len(doubles))
print(len(triples))
print(len(quadruples))
print(len(quintuples))

In [None]:
trait2compound = {"Ulcerative Colitis": [], 'Rheumathoid Arthtritis': [], "Coronary Artery Disease": [], "Alzheimer's Disease": [], "Schizophrenia": []}

for compound, traits in compound2trait.items():
    for trait in traits:
        trait2compound[trait].append(compound)

In [None]:
import venn

labels = venn.get_labels(list(trait2compound.values()))
venn.venn5(labels, names=list(trait2compound.keys()))

In [None]:
import pandas as pd
class2name = {row[0]: row[1] for i, row  in pd.read_csv("/mnt/storage/speos/data/drkg/hetionet-v1.0-nodes.tsv", header=0, sep="\t").iterrows() if row[0].startswith("Pharmacologic Class")}

compound2class = {}
for i, row in pd.read_csv("/mnt/storage/speos/data/drkg/drug2class.tsv", header=None, sep="\t").iterrows():
    try:
        compound2class[compounds2name[row[2]]].append(class2name[row[0]])
    except KeyError:
        compound2class[compounds2name[row[2]]] = [class2name[row[0]]]

In [None]:
compound2class

In [None]:
quint_classes = {"Others": []}
for compound in quintuples.keys():
    try:
        for _class in compound2class[compound]:
            try:
                quint_classes[_class].append(compound)
            except KeyError:
                quint_classes[_class] = [compound]
    except KeyError:
        quint_classes["Others"].append(compound)


In [None]:
quint_classes

In [None]:
singletons

In [None]:
compound2mesh =  {compounds2name[row[0]]: row[2] for i, row in pd.read_csv("/mnt/storage/speos/data/drkg/drugbank_treats.tsv", sep="\t", header=None).iterrows()}

mesh2compound = {}

for i, row in pd.read_csv("/mnt/storage/speos/data/drkg/drugbank_treats.tsv", sep="\t", header=None).iterrows():
    try:
        mesh2compound[row[2]].append(compounds2name[row[0]])
    except KeyError:
        mesh2compound[row[2]] = [compounds2name[row[0]]]

In [None]:
from collections import Counter
compound2traitcounter = {}
compound2nontraitcounter = {}

for compound in [node for node in graph.nodes if node.startswith("Compound")]:
    if compound in compound2traitlist.keys():
        counter = Counter({key: 0 for key in phenotypes}) 
        counter.update(compound2traitlist[compound])
        compound2traitcounter[compound] = counter
    else:
        try:
            compound2nontraitcounter[compound] = len(compound2gene[compound])
        except KeyError:
            continue

In [None]:
UCcounters = {}

rows = []
index = []
for compound in mesh2compound["Disease::MESH:D003093"]:
    try:
        counter = compound2traitcounter[compound]
        UCcounters[compound] = sorted(counter.items())
    except KeyError:
        if compound in compound2nontrait.keys():
            pass
            rows.append([0] * 5)
            index.append(compound.split("::")[1])

for compound, traits in UCcounters.items():
    traitlist = [trait for trait, number in traits]
    rows.append([number for trait, number in traits])
    #rows[-1] += [0]
uc_df = pd.DataFrame(index=index  + [key.split("::")[1] for key in UCcounters.keys()], data=rows, columns=traitlist)
uc_df = uc_df[["Ulcerative Colitis", "Rheumathoid Arthtritis", "Coronary Artery Disease", "Alzheimer's Disease", "Schizophrenia"]]
uc_df

In [None]:
uc_df_cut = uc_df[uc_df.sum(axis=1) > 0].drop("Bupivacaine")

uc_means = uc_df_cut.mean(axis=0)
uc_means

In [None]:
CADcounters = {}

rows = []
index = []
for compound in mesh2compound["Disease::MESH:D003324"]:
    try:
        counter = compound2traitcounter[compound]
        CADcounters[compound] = sorted(counter.items())
    except KeyError:
        if compound in compound2nontrait.keys():
            rows.append([0] * 5)
            index.append(compound.split("::")[1])

for compound, traits in CADcounters.items():
    traitlist = [trait for trait, number in traits]
    rows.append([number for trait, number in traits])
cad_df = pd.DataFrame(index=index +[key.split("::")[1] for key in CADcounters.keys()], data=rows, columns=traitlist)
cad_df = cad_df[["Ulcerative Colitis", "Rheumathoid Arthtritis", "Coronary Artery Disease", "Alzheimer's Disease", "Schizophrenia"]]
cad_df

In [None]:
cad_df_cut = cad_df[cad_df.sum(axis=1) > 0].drop(["Rubidium Rb-82", "Ammonia N-13", "Fludeoxyglucose (18F)", "Pravastatin"])

cad_means = cad_df_cut.mean(axis=0)
cad_means

In [None]:
SCZcounters = {}

rows = []
index = []
for compound in mesh2compound["Disease::MESH:D012559"]:
    try:
        counter = compound2traitcounter[compound]
        SCZcounters[compound] = sorted(counter.items())
    except KeyError:
        if compound in compound2nontrait.keys():
            rows.append([0] * 5)
            index.append(compound.split("::")[1])

for compound, traits in SCZcounters.items():
    traitlist = [trait for trait, number in traits]
    rows.append([number for trait, number in traits])
scz_df = pd.DataFrame(index=index + [key.split("::")[1] for key in SCZcounters.keys()], data=rows, columns=traitlist)
scz_df = scz_df[["Ulcerative Colitis", "Rheumathoid Arthtritis", "Coronary Artery Disease", "Alzheimer's Disease", "Schizophrenia"]]
scz_df

In [None]:
scz_df_cut = scz_df[scz_df.sum(axis=1) > 0]

scz_means = scz_df_cut.mean(axis=0)

In [None]:
ADcounters = {}

rows = []
index = []
for compound in mesh2compound["Disease::MESH:D000544"]:
    try:
        counter = compound2traitcounter[compound]
        ADcounters[compound] = sorted(counter.items())
    except KeyError:
        if compound in compound2nontrait.keys():
            rows.append([0] * 5)
            index.append(compound.split("::")[1])


for compound, traits in ADcounters.items():
    traitlist = [trait for trait, number in traits]
    rows.append([number for trait, number in traits])
ad_df = pd.DataFrame(index=index + [key.split("::")[1] for key in ADcounters.keys()], data=rows, columns=traitlist)
ad_df = ad_df[["Ulcerative Colitis", "Rheumathoid Arthtritis", "Coronary Artery Disease", "Alzheimer's Disease", "Schizophrenia"]]
ad_df


In [None]:
ad_df_cut = ad_df[ad_df.sum(axis=1) > 0].drop(["Flutemetamol (18F)", "Florbetapir (18F)", "Florbetaben F-18"])

ad_means = ad_df_cut.mean(axis=0)

In [None]:
RAcounters = {}

rows = []
index = []
for compound in mesh2compound["Disease::MESH:D001172"]:
    try:
        counter = compound2traitcounter[compound]
        RAcounters[compound] = sorted(counter.items())
    except KeyError:
        if compound in compound2nontrait.keys():
            rows.append([0] * 5)
            index.append(compound.split("::")[1])


for compound, traits in RAcounters.items():
    traitlist = [trait for trait, number in traits]
    rows.append([number for trait, number in traits])
ra_df = pd.DataFrame(index=index + [key.split("::")[1] for key in RAcounters.keys()], data=rows, columns=traitlist)
ra_df = ra_df[["Ulcerative Colitis", "Rheumathoid Arthtritis", "Coronary Artery Disease", "Alzheimer's Disease", "Schizophrenia"]]
ra_df

In [None]:
ra_df_cut = ra_df[ra_df.sum(axis=1) > 0].drop(["Bupivacaine", "Ranitidine"])
ra_means = ra_df_cut.mean(axis=0)
ra_means

In [None]:
from scipy.stats import mannwhitneyu
from statsmodels.stats.multitest import fdrcorrection 

fig, axs = plt.subplots(nrows=5, figsize=(full_width*cm*0.5, full_width*cm*0.5), sharex=False)

short2long = {"UC": "Ulcerative Colitis",
              "RA": 'Rheumathoid Arthtritis',
              "CAD": 'Coronary Artery Disease',
              "AD": "Alzheimer's Disease",
              "SCZ": 'Schizophrenia'}

shorthands = ["UC", "RA", "CAD", "AD", "SCZ"]

pvals= []
meandiffs = []
for df in [uc_df_cut, ra_df_cut, cad_df_cut, ad_df, scz_df_cut]:
    trait2coregenetargets = {label: [] for label in shorthands}
    for compound in list(compound2traitcounter.keys())[:]:
        if compound.split("::")[1] not in df.index:
            for inner_shorthand in shorthands:
                trait2coregenetargets[inner_shorthand].append(compound2traitcounter[compound][short2long[inner_shorthand]])
    for inner_shorthand in shorthands:
        meandiffs.append(df[short2long[inner_shorthand]].mean() - np.mean(trait2coregenetargets[inner_shorthand]))
        pvals.append(mannwhitneyu(df[short2long[inner_shorthand]], trait2coregenetargets[inner_shorthand])[1])

pvals = fdrcorrection(pvals)[1]
#pvals = np.asarray(pvals)

for ax, _pvals, _meandiffs, means, label in zip(axs, np.split(pvals, 5), np.split(np.asarray(meandiffs), 5), [uc_means, ra_means, cad_means, ad_means, scz_means], shorthands):

    color = []
    for pval, meandiff in zip(_pvals, _meandiffs):
        if pval > 0.05:
            color.append("lightgray")
        else:
            if meandiff > 0:
                color.append("red")
            else:
                color.append("blue")
    ax.bar(range(len(means)), means, color=color)
    ax.xaxis.tick_top()
    ax.set_xticks([])
    ax.set_ylabel(label)
    ax.spines[['right', 'top']].set_visible(False)

axs[0].set_xticks(range(5))
axs[0].set_xticklabels(["UC", "RA", "CAD", "AD", "SCZ"], fontsize=8)
axs[0].set_xlabel("Affected Core Genes for Trait")
axs[0].xaxis.set_label_position('top') 

axs[2].text(x=-2, y=0.5, s="Genes affect by Drugs for ", rotation=90, va="center")

plt.savefig("drug_target_selectivity.svg")


In [None]:
from scipy.stats import mannwhitneyu
from statsmodels.stats.multitest import fdrcorrection 

fig, axs = plt.subplots(nrows=5, figsize=(full_width*cm*0.5, full_width*cm*0.5), sharex=False)

short2long = {"UC": "Ulcerative Colitis",
              "RA": 'Rheumathoid Arthtritis',
              "CAD": 'Coronary Artery Disease',
              "AD": "Alzheimer's Disease",
              "SCZ": 'Schizophrenia'}

shorthands = ["UC", "RA", "CAD", "AD", "SCZ"]

pvals= []
meandiffs = []
trait2coregenetargets = {label: [] for label in shorthands}
for df, shorthand in zip([uc_df_cut, ra_df_cut, cad_df_cut, ad_df, scz_df_cut], shorthands):
    for compound in list(compound2traitcounter.keys())[:]:
        if compound.split("::")[1] not in df.index:
            trait2coregenetargets[shorthand].append(compound2traitcounter[compound][short2long[shorthand]])
    for inner_shorthand in shorthands:
        meandiffs.append(df[short2long[inner_shorthand]].mean() - np.mean(trait2coregenetargets[shorthand]))
        pvals.append(mannwhitneyu(df[short2long[inner_shorthand]], trait2coregenetargets[shorthand])[1])

pvals = fdrcorrection(pvals)[1]
#pvals = np.asarray(pvals)

for ax, _pvals, _meandiffs, means, label in zip(axs, np.split(pvals, 5), np.split(np.asarray(meandiffs), 5), [uc_means, ra_means, cad_means, ad_means, scz_means], shorthands):

    color = []
    for pval, meandiff in zip(_pvals, _meandiffs):
        if pval > 0.05:
            color.append("lightgray")
        else:
            if meandiff > 0:
                color.append("red")
            else:
                color.append("blue")
    ax.bar(range(len(means)), means, color=color)
    ax.xaxis.tick_top()
    ax.set_xticks([])
    ax.set_ylabel(label)
    ax.spines[['right', 'top']].set_visible(False)

axs[0].set_xticks(range(5))
axs[0].set_xticklabels(["UC", "RA", "CAD", "AD", "SCZ"], fontsize=8)
axs[0].set_xlabel("Affected Core Genes for Trait")
axs[0].xaxis.set_label_position('top') 

axs[2].text(x=-2, y=0.5, s="Genes affect by Drugs for ", rotation=90, va="center")

plt.savefig("drug_target_selectivity.svg")


In [None]:
rows = []
index = []
for compound in compound2gene2trait.keys():

    sorted_traits_and_counts = sorted(compound2traitcounter[compound].items(), key=lambda x:x[0]) 
    columns = [traits for traits, counts in sorted_traits_and_counts]
    rows.append([counts for traits, counts in sorted_traits_and_counts])
    index.append(compound.split("::")[1])
    

all_df = pd.DataFrame(index=index, data=rows, columns=columns)

all_df

In [None]:
from scipy.stats import mannwhitneyu
from statsmodels.stats.multitest import fdrcorrection 

fig, axs = plt.subplots(nrows=5, figsize=(full_width*cm*0.5, full_width*cm*0.5), sharex=False)

short2long = {"UC": "Ulcerative Colitis",
              "RA": 'Rheumathoid Arthtritis',
              "CAD": 'Coronary Artery Disease',
              "AD": "Alzheimer's Disease",
              "SCZ": 'Schizophrenia'}

shorthands = ["UC", "RA", "CAD", "AD", "SCZ"]

pvals= []
meandiffs = []
stats = []
trait2coregenetargets = {label: [] for label in shorthands}
for df in [uc_df_cut, ra_df_cut, cad_df_cut, ad_df, scz_df_cut]:
    for trait in short2long.values():
        background = all_df.drop(df.index)[trait]
        targets = df[trait]
        meandiffs.append(targets.mean() - background.mean())
        pvals.append(mannwhitneyu(background, targets)[1])
        stats.append(mannwhitneyu(background, targets)[0])

pvals = fdrcorrection(pvals)[1]
#pvals = np.asarray(pvals)

for ax, _pvals, _meandiffs, means, label in zip(axs, np.split(pvals, 5), np.split(np.asarray(meandiffs), 5), [uc_means, ra_means, cad_means, ad_means, scz_means], shorthands):

    color = []
    for pval, meandiff in zip(_pvals, _meandiffs):
        if pval > 0.05:
            color.append("lightgray")
        else:
            if meandiff > 0:
                color.append((208/265, 36/265, 36/265))
            else:
                color.append((73/265, 90/265, 176/265))
    ax.bar(range(len(means)), means, color=color)
    ax.xaxis.tick_top()
    ax.set_xticks([])
    ax.set_ylabel(label)
    ax.spines[['right', 'top']].set_visible(False)

axs[0].set_xticks(range(5))
axs[0].set_xticklabels(["UC", "RA", "CAD", "AD", "SCZ"], fontsize=8)
axs[0].set_xlabel("Affected Core Genes for Trait")
axs[0].xaxis.set_label_position('top') 

axs[2].text(x=-2, y=0.5, s="Genes affect by Drugs for ", rotation=90, va="center")

plt.savefig("drug_target_selectivity.svg")


In [None]:
all_df.drop(ad_df_cut.index)["Schizophrenia"].mean()

In [None]:
ad_df_cut["Schizophrenia"].mean()

In [None]:
mannwhitneyu(all_df.drop(ad_df_cut.index)["Schizophrenia"], ad_df_cut["Schizophrenia"])

In [None]:
all_df.drop(ad_df_cut.index)["Ulcerative Colitis"].mean()

In [None]:
ad_df_cut["Ulcerative Colitis"].mean()

In [None]:
mannwhitneyu(all_df.drop(ad_df_cut.index)["Ulcerative Colitis"], ad_df_cut["Ulcerative Colitis"])

In [None]:
mannwhitneyu(all_df.drop(ra_df_cut.index)["Ulcerative Colitis"], ra_df_cut["Ulcerative Colitis"])

In [None]:
len(trait2coregenetargets["UC"])

In [None]:
len(trait2coregenetargets["RA"])

In [None]:
len(trait2coregenetargets["AD"])

# now try including all compounds, also those that do not target any traits core genes

In [None]:
uc_df_cut = uc_df.drop("Bupivacaine")
uc_means = uc_df_cut.mean(axis=0)

cad_df_cut = cad_df.drop(["Rubidium Rb-82", "Ammonia N-13", "Fludeoxyglucose (18F)", "Pravastatin"])
cad_means = cad_df_cut.mean(axis=0)

scz_df_cut = scz_df
scz_means = scz_df.mean(axis=0)

ad_df_cut = ad_df.drop(["Flutemetamol (18F)", "Florbetapir (18F)", "Florbetaben F-18"])
ad_means = ad_df_cut.mean(axis=0)

ra_df_cut = ra_df.drop(["Bupivacaine", "Ranitidine"])
ra_means = ra_df.mean(axis=0)

uc_df

In [None]:
from scipy.stats import mannwhitneyu
from statsmodels.stats.multitest import fdrcorrection 

fig, axs = plt.subplots(nrows=5, figsize=(full_width*cm*0.5, full_width*cm*0.4), sharex=False)

short2long = {"UC": "Ulcerative Colitis",
              "RA": 'Rheumathoid Arthtritis',
              "CAD": 'Coronary Artery Disease',
              "AD": "Alzheimer's Disease",
              "SCZ": 'Schizophrenia'}

shorthands = ["UC", "RA", "CAD", "AD", "SCZ"]

pvals= []
meandiffs = []
for df in [uc_df_cut, ra_df_cut, cad_df_cut, ad_df_cut, scz_df_cut]:
    trait2coregenetargets = {label: [] for label in shorthands}
    for compound in [node for node in graph.nodes if node.startswith("Compound")]:
        if compound.split("::")[1] not in df.index:
            for inner_shorthand in shorthands:
                try:
                    trait2coregenetargets[inner_shorthand].append(compound2traitcounter[compound][short2long[inner_shorthand]])
                except KeyError:
                    trait2coregenetargets[inner_shorthand].append(compound2nontraitcounter[compound])
    for inner_shorthand in shorthands:
        meandiffs.append(df[short2long[inner_shorthand]].mean() - np.mean(trait2coregenetargets[inner_shorthand]))
        pvals.append(mannwhitneyu(df[short2long[inner_shorthand]], trait2coregenetargets[inner_shorthand])[1])

pvals = fdrcorrection(pvals)[1]
pvals = pvals.reshape((-1, 5)).flatten()
meandiffs = np.asarray(meandiffs).reshape((-1, 5)).flatten()
means = np.asarray([uc_means, ra_means, cad_means, ad_means, scz_means]).flatten()

for ax, _pvals, _meandiffs, _means, label in zip(axs, np.split(pvals, 5), np.split(meandiffs, 5), np.split(means, 5), shorthands):

    color = []
    for pval, meandiff in zip(_pvals, _meandiffs):
        if pval > 0.05:
            color.append("lightgray")
        else:
            if meandiff > 0:
                color.append("red")
            else:
                color.append("blue")
    ax.bar(range(len(_means)), _means, color=color)
    ax.xaxis.tick_top()
    ax.set_xticks([])
    ax.set_ylabel(label)
    ax.spines[['right', 'top']].set_visible(False)

axs[0].set_xticks(range(5))
axs[0].set_xticklabels(["UC", "RA", "CAD", "AD", "SCZ"], fontsize=8)
axs[0].set_xlabel("Drugs indicated for")
axs[0].xaxis.set_label_position('top') 

axs[2].text(x=-2, y=0.5, s="Core Genes afffected", rotation=90, va="center")

plt.savefig("drug_target_selectivity_all_compounds.svg")


In [None]:
singletonsbytrait = {}

for compound, trait in singletons.items():
    try:
        singletonsbytrait[list(trait)[0]].append(compound)
    except KeyError:
        singletonsbytrait[list(trait)[0]] = [compound]

singletonsbytrait

In [None]:
rheumadrugs2genes = {}
for compounds in compound2gene.keys():
    genes = compound2gene[compounds]
    of_core_genes = [gene for gene in genes if gene.split("::")[1] in core_gene_sets["Rheumathoid Arthtritis"]]
    rheumadrugs2genes[compounds] = (len(of_core_genes) / len(genes)) * len(of_core_genes)

rheumadrugs2genes = sorted(rheumadrugs2genes.items(), key=lambda x: x[1])[::-1]
rheumadrugs2genes

In [None]:
rheumadrugs2exclusivegenes = {}
other_core_genes = set()
for compounds in compound2gene.keys():
    genes = compound2gene[compounds]
    of_core_genes = [gene for gene in genes if gene.split("::")[1] in ["Rheumathoid Arthtritis"]]
    for trait, coregenes in core_gene_sets.items():
        if trait == ["Rheumathoid Arthtritis"]:
            other_core_genes.update(set([gene for gene in genes if gene.split("::")[1]]))
    exclusive_core_genes = set(of_core_genes).difference(other_core_genes)
    rheumadrugs2exclusivegenes[compounds] = (len(of_core_genes) / len(genes)) * len(of_core_genes)

rheumadrugs2exclusivegenes = sorted(rheumadrugs2exclusivegenes.items(), key=lambda x: x[1])[::-1]
rheumadrugs2exclusivegenes

In [None]:
compound2gene2trait["Compound::Andrographolide"]

In [None]:
exclusive_ra_genes = core_gene_sets["Rheumathoid Arthtritis"].difference(core_gene_sets["Ulcerative Colitis"]).difference(core_gene_sets["Coronary Artery Disease"]).difference(core_gene_sets["Alzheimer's Disease"]).difference(core_gene_sets["Schizophrenia"])
druggable_genes = pd.read_csv("/mnt/storage/speos/data/dgidb/druggable_genome.tsv", sep="\t", header=None).iloc[:, 0].tolist()

ra_druggable =  exclusive_ra_genes.intersection(druggable_genes)
len(ra_druggable)

In [None]:
ra_druggable

In [None]:
[gene for gene in core_gene_sets["Ulcerative Colitis"] if gene.startswith("CASP")]

In [None]:
single_classes = {key: {"Others": []} for key in singletonsbytrait.keys()}
for trait, compounds in singletonsbytrait.items():
    for compound in compounds:
        try:
            for _class in compound2class[compound]:
                try:
                    single_classes[trait][_class].append(compound)
                except KeyError:
                    single_classes[trait][_class] = [compound]
        except KeyError:
            single_classes[trait]["Others"].append(compound)
compound2class

In [None]:
compound2gene2trait["Compound::Donepezil"]

In [None]:
compound2gene2trait["Compound::Memantine"]

In [None]:
compound2gene2trait["Compound::Galantamine"]

In [None]:
compound2gene2trait["Compound::Aripiprazole"]

In [None]:
compound2gene2trait["Compound::Promethazine"]

In [None]:
compound2gene2trait["Compound::Methotrimeprazine"]

In [None]:
compound2gene2trait["Compound::Etanercept"]

In [None]:
single_classes_quantitatively = {}

for trait, classes in single_classes.items():
    single_classes_quantitatively[trait] = {}
    for _class, compounds in classes.items():
        single_classes_quantitatively[trait][_class] = len(compounds)

for trait in single_classes.keys():
    single_classes_quantitatively[trait] = sorted(single_classes_quantitatively[trait].items(), key=lambda x:x[1])

In [None]:
single_classes_quantitatively

In [None]:
trait2num_singletons = {}
for trait, compounds in singletonsbytrait.items():
    trait2num_singletons[trait] = len(compounds)
trait2num_singletons

In [None]:
quint_real_classes = {"Vitamins": ["Pyridoxal phosphate", "Tetrahydrofolic acid","Vitamin E", "Menadione", "alpha-Tocopherol succinate", "D-alpha-Tocopherol acetate"],
                     "Nutrients And Metabolites": ["N-Formylmethionine", "Omega-3-carboxylic acids", "Spermine", "Glutamic acid", "Glucosamine", "Iron", "Zinc", "alpha-D-glucose 6-phosphate", "Beta-D-Glucose", "Palmitic Acid", "Copper", "Polaprezinc", "Fish oil", "Zinc acetate", "Ferrous gluconate", "Ferrous succinate", "Ferrous ascorbate", "Ferrous fumarate", "Ferrous glycine sulfate", "Zinc chloride", ""],
                     "Recreational Drugs": ["Ethanol", "Cannabidiol", "Medical Cannabis", "Nabiximols"],
                     "Antioxidants": ["Glutathione", "NADH", "Genistein", "Resveratrol"],
                     "Cancer Drugs": ["Sunitinib", "Regorafenib", "Doxorubicin", "Fluorouracil", "Sorafenib", "Imatinib", "Tamoxifen", "Dasatinib", "XL999", "XL820", "GEM-231", "Enzastaurin", "Midostaurin", "Nintedanib", "Erdafitinib", "Pexidartinib", "Pazopanib"],
                     "Hormones": ["Diethylstilbestrol", "Liothyronine", "Levothyroxine"],
                     "Tricyclic Antidepressants": ["Amitriptyline", "Imipramine", "Nortriptyline", "Desipramine"],
                     "SSRIs/SNRIs": ["Sertraline", "Bupropion"],
                     "Primary Indication": ["Promethazine", "Olsalazine", "Loperamide", "Carvedilol", "Trapidil",  "Asenapine", "Olanzapine","Aripiprazole","Bepridil", "Verapamil", "Ranolazine"], # IBD, antiphsychotic, antianginal
                     "Second Messengers": ["(Rp)-cAMPS", "Cyclic GMP", "cAMP"],
                     "Others": ["Permethrin", "Tetracycline", "Meperidine", "Valproic acid", "Lamotrigine", "Phenytoin", "Amiodarone", "Emapalumab", "Bioallethrin", "Fostamatinib", "Artenimol", "Foreskin keratinocyte (neonatal)", "Foreskin fibroblast (neonatal)", "Pimagedine", "Fontolizumab", "VIR201", "Becaplermin", "Cisapride", "Hexachlorophene", "Diazoxide", "2,6,8-Trimethyl-3-Amino-9-Benzyl-9-Methoxynonanoic Acid", "L-erythro-7,8-dihydrobiopterin", "D-norleucine", "N-Formylmethionine"]
                     }

#quint_real_classes = sorted(quint_real_classes.items(), key=lambda x: len(x[1]))[::-1]
quint_real_classes

In [None]:
class ColorCycler:
    def __init__(self, colors):
        self.state = 0
        self.colors = colors

    def next(self):
        color = self.colors[self.state]
        if self.state == len(self.colors) - 1:
            self.state = 0
        else:
            self.state += 1
        return color

In [None]:
cycler = ColorCycler(["#01016f", "#89006b", "#d00053", "#f85732", "#ffa600"])
good_sorting = ["Nutrients And Metabolites", "Vitamins", "Antioxidants", "Hormones", "Second Messengers", "Others", "Cancer Drugs", "Primary Indication", "Tricyclic Antidepressants", "Recreational Drugs", "SSRIs/SNRIs"]


fig, (ax1, ax2) = plt.subplots(nrows=2, figsize=(full_width*cm*0.5, full_width*cm*0.5), gridspec_kw={"height_ratios":(1,4)})

ax1.bar(range(5), (len(singletons), len(doubles), len(triples), len(quadruples), len(quintuples)), zorder=1)

previous=0
colors= [(235/256, 125/256, 91/265),
          (254/256, 210/256, 63/265),
          (181/256, 211/256, 61/265),
          (108/256, 162/256, 234/265),
          (68/256, 34/256, 136/265)]
for num_singletons, color in zip(trait2num_singletons.values(), colors):
    ax1.bar([0], (num_singletons), bottom=previous, color=color)
    previous += num_singletons

autoinflammatory = len([compound for compound, traits in doubles.items() if 'Rheumathoid Arthtritis' in traits and 'Ulcerative Colitis' in traits])
psychiatric = len([compound for compound, traits in doubles.items() if "Alzheimer's Disease" in traits and 'Schizophrenia' in traits])
ax1.bar([1], autoinflammatory, color="black")
ax1.bar([1], psychiatric, bottom=autoinflammatory, color="white")


ax1.set_xticks(range(5))
ax1.set_xticklabels(range(1,6))

ax1.yaxis.tick_right()
ax1.yaxis.set_label_position('right') 
ax1.set_ylabel("n Compounds")
ax1.xaxis.tick_top()
ax1.xaxis.set_label_position('top') 
ax1.set_xlabel("Traits targeted by Compound")


ax2.bar((1), (len(quintuples)), width=0.6, color="black")
ax2.set_xlim((-0.5,1.35))

previous=0

for text in good_sorting:
    color=cycler.next()
    ax2.bar((1), (len(quint_real_classes[text])), width=0.6, bottom = previous, color=color)
    ax2.text(x=0.65, ha="right", y=previous+ (len(quint_real_classes[text])/2), s=text, fontsize=7)
    previous += len(quint_real_classes[text])

ax2.set_xticks([])
ax2.yaxis.tick_right()
ax2.yaxis.set_label_position('right') 
ax2.set_ylabel("Number of Compounds")
ax2.spines[['left', 'top', "bottom"]].set_visible(False)
ax2.vlines(-0.2, 0, 36, linewidth=0.5, color="black")
ax2.vlines(-0.2, 39, 97, linewidth=0.5, color="black")
ax2.text(s="Constitutively\nPresent     ", y= 20, x=-0.19, ha="right", va="center", rotation=90, fontsize=8)
ax2.text(s="Drugs\n ", y= 70, x=-0.19, ha="right", va="center", rotation=90, fontsize=8)
#plt.tight_layout()
plt.savefig("drug_groups.svg", bbox_inches="tight")


In [None]:
len(singletons)

In [None]:
colors

In [None]:
for key in quint_real_classes.keys():
    print(key, key in good_sorting)

In [None]:
all_df

In [None]:
group2numgenes = {0: [], 1: [], 2: [], 3: [], 4: [], 5: []}

for group, compounds in zip(range(0,6), [compound2nontrait.keys(), singletons, doubles, triples, quadruples, quintuples]):
    for compound in compounds:
        numgenes = len(compound2gene[compound])
        if numgenes > 51:
            numgenes = 51
        group2numgenes[group].append(numgenes)


In [None]:
fig, axs = plt.subplots(nrows=6, figsize=(full_width*cm*0.5, full_width*cm*0.5), sharex=True)

for ax, (group, numgenes) in zip(axs, group2numgenes.items()):
    ax.hist(numgenes, density=False, bins=10, range=(0, 55))
    ax.set_ylabel("{} Trait\nDrugs".format(group), fontsize=7)
    for y in range(0, int(ax.get_ylim()[1]), 100):
        ax.axhline(y, color="lightgray", linestyle=":", linewidth=1, zorder=-5)

axs[-1].set_xlabel("Number of Target Genes")

# Directly Using Drugbank Info

In [None]:
drugbank = pd.read_csv("/mnt/storage/speos/data/drugbank/all_targets.csv")
drugbank = drugbank[drugbank["Species"] == "Humans"]

experimental = pd.read_csv("/mnt/storage/speos/data/drugbank/experimental.csv")
experimental = experimental[experimental["Species"] == "Humans"]
experimental_drugs = experimental["Drug IDs"].tolist()

expdrugs = set()

for druglist in experimental_drugs:
    expdrugs.update(set([drug.strip() for drug in druglist.split(";")]))
expdrugs

compounds2name = {row["DrugBank ID"]: row["Common name"] for i, row in pd.read_csv("drugbank_vocabulary.csv", header=0).iterrows()}


compound2gene = {}
for gene, compounds in zip(drugbank["Gene Name"], drugbank["Drug IDs"]):
    if gene in all_all_genes:
        compounds = [compound.strip() for compound in compounds.split(";")]
        for compound in compounds:
            if compound not in expdrugs:
                try:
                    compound2gene[compounds2name[compound]].append(gene)
                except KeyError:
                    compound2gene[compounds2name[compound]] = [gene]


In [None]:
compound2gene2trait = {}
compound2trait = {}
compound2traitlist = {}

for compound, genes in compound2gene.items():
    genes = genes[:]
    for gene in genes:
        try:
            compound2gene2trait[compound].update({gene: gene2trait[gene]})
            compound2trait[compound].update(set(gene2trait[gene]))
            compound2traitlist[compound] += gene2trait[gene]
            
        except KeyError:
            try:
                compound2gene2trait[compound] = {gene: gene2trait[gene][:]}
                compound2trait[compound] = set(gene2trait[gene])
                compound2traitlist[compound] = gene2trait[gene][:]                
            except KeyError:
                continue


In [None]:
singletons =  {compound: traits for compound, traits in compound2trait.items() if len(traits) == 1}
doubles =  {compound: traits for compound, traits in compound2trait.items() if len(traits) == 2}
triples =  {compound: traits for compound, traits in compound2trait.items() if len(traits) == 3}
quadruples =  {compound: traits for compound, traits in compound2trait.items() if len(traits) == 4}
quintuples =  {compound: traits for compound, traits in compound2trait.items() if len(traits) == 5}

In [None]:
#print(len(compound2nontrait.keys()))
print(len(singletons))
print(len(doubles))
print(len(triples))
print(len(quadruples))
print(len(quintuples))

In [None]:
singletonsbytrait = {}

for compound, trait in singletons.items():
    try:
        singletonsbytrait[list(trait)[0]].append(compound)
    except KeyError:
        singletonsbytrait[list(trait)[0]] = [compound]

singletonsbytrait

In [None]:
trait2num_singletons = {}
for trait, compounds in singletonsbytrait.items():
    trait2num_singletons[trait] = len(compounds)
trait2num_singletons

In [None]:
list(quintuples.keys())

In [None]:
quint_real_classes = {"Vitamins": ["Ergocalciferol", "Pyridoxal phosphate", "Tetrahydrofolic acid","Vitamin E", "Menadione", "alpha-Tocopherol succinate", "D-alpha-Tocopherol acetate"],
                     "Nutrients And Metabolites": ["Magnesium sulfate", "N-Formylmethionine", "Omega-3-carboxylic acids", "Spermine", "Glutamic acid", "Glucosamine", "Iron", "Zinc", "alpha-D-glucose 6-phosphate", "Beta-D-Glucose", "Palmitic Acid", "Copper", "Polaprezinc", "Fish oil", "Zinc acetate", "Ferrous gluconate", "Ferrous succinate", "Ferrous ascorbate", "Ferrous fumarate", "Ferrous glycine sulfate", "Zinc chloride", ""],
                     "Recreational Drugs": ["Ethanol", "Cannabidiol", "Medical Cannabis", "Nabiximols","Caffeine"],
                     "Antioxidants": ["Glutathione", "NADH", "Genistein", "Resveratrol"],
                     "Cancer Drugs": ["Nirogacestat","Ripretinib", "Pralsetinib", "Tivozanib", "Sunitinib", "Regorafenib", "Doxorubicin", "Fluorouracil", "Sorafenib", "Imatinib", "Tamoxifen", "Dasatinib", "XL999", "XL820", "GEM-231", "Enzastaurin", "Midostaurin", "Nintedanib", "Erdafitinib", "Pexidartinib", "Pazopanib"],
                     "Hormones": ["Diethylstilbestrol", "Liothyronine", "Levothyroxine"],
                     "Tricyclic Antidepressants": ["Amitriptyline", "Imipramine", "Nortriptyline", "Desipramine"],
                     "SSRIs/SNRIs": ["Sertraline", "Bupropion"],
                     "Primary Indication": ["Fontolizumab","Promethazine", "Olsalazine", "Carvedilol", "Trapidil",  "Asenapine", "Olanzapine","Aripiprazole","Bepridil", "Verapamil", "Ranolazine"], # IBD, antiphsychotic, antianginal
                     "Second Messengers": ["(Rp)-cAMPS", "Cyclic GMP", "cAMP"],
                     "Antiepileptics": [ "Topiramate","Stiripentol","Lamotrigine", "Valproic acid", "Lamotrigine", "Phenytoin"], 
                     "Analgetics": ["Isoflurane", "Enflurane","Meperidine"],
                     "Antihypertensives": ["Lacidipine", "Cilnidipine", "Nitrendipine", "Nicardipine", "Manidipine"],
                     "Others": [ "Loperamide","Tiludronic acid", "Drotaverine", "Butamben", "Levomenthol", "Yohimbine", "Spironolactone",  "Trimebutine", "Acetylsalicylic acid", "Permethrin", "Tetracycline", "Amiodarone", "Emapalumab", "Bioallethrin", "Fostamatinib", "Artenimol", "Foreskin keratinocyte (neonatal)", "Foreskin fibroblast (neonatal)", "Pimagedine",  "VIR201", "Becaplermin", "Cisapride", "Hexachlorophene", "Diazoxide", "2,6,8-Trimethyl-3-Amino-9-Benzyl-9-Methoxynonanoic Acid", "L-erythro-7,8-dihydrobiopterin", "D-norleucine", "N-Formylmethionine"]
                     }

#quint_real_classes = sorted(quint_real_classes.items(), key=lambda x: len(x[1]))[::-1]
quint_real_classes

In [None]:
class ColorCycler:
    def __init__(self, colors):
        self.state = 0
        self.colors = colors

    def next(self):
        color = self.colors[self.state]
        if self.state == len(self.colors) - 1:
            self.state = 0
        else:
            self.state += 1
        return color

In [None]:
cycler = ColorCycler(["#01016f", "#89006b", "#d00053", "#f85732", "#ffa600"])
good_sorting = ["Nutrients And Metabolites", "Vitamins", "Antioxidants", "Hormones", "Second Messengers", "Others", "Cancer Drugs", "Primary Indication", "Antiepileptics", "Antihypertensives",   "Recreational Drugs","Tricyclic Antidepressants", "Analgetics","SSRIs/SNRIs"]


fig, (ax1, ax2) = plt.subplots(nrows=2, figsize=(full_width*cm*0.5, full_width*cm*0.5), gridspec_kw={"height_ratios":(1,4)})

ax1.bar(range(5), (len(singletons), len(doubles), len(triples), len(quadruples), len(quintuples)), zorder=1, color= "#8c92Ac") #"#a9afcb")

previous=0
colors= [(235/256, 125/256, 91/265),
          (254/256, 210/256, 63/265),
          (181/256, 211/256, 61/265),
          (108/256, 162/256, 234/265),
          (68/256, 34/256, 136/265)]
for num_singletons, color in zip(trait2num_singletons.values(), colors):
    ax1.bar([0], (num_singletons), bottom=previous, color=color)
    previous += num_singletons

autoinflammatory = len([compound for compound, traits in doubles.items() if 'Rheumathoid Arthtritis' in traits and 'Ulcerative Colitis' in traits])
psychiatric = len([compound for compound, traits in doubles.items() if "Alzheimer's Disease" in traits and 'Schizophrenia' in traits])
ax1.bar([1], autoinflammatory, color="black")
ax1.bar([1], psychiatric, bottom=autoinflammatory, color="white")


ax1.set_xticks(range(5))
ax1.set_xticklabels(range(1,6))

ax1.yaxis.tick_right()
ax1.yaxis.set_label_position('right') 
ax1.set_ylabel("n Compounds")
ax1.xaxis.tick_top()
ax1.xaxis.set_label_position('top') 
ax1.set_xlabel("Traits targeted by Compound")


ax2.bar((1), (len(quintuples)), width=0.6, color="black", zorder=-1)
ax2.set_xlim((-0.5,1.35))

previous=0

for text in good_sorting:
    
    height = sum([drug in quintuples.keys() for drug in quint_real_classes[text]])
    if height == 0:
        continue
    color=cycler.next()
    ax2.bar((1), (height), width=0.6, bottom = previous, color=color, zorder=1)
    ax2.text(x=0.65, ha="right", va="center", y=previous+ (height/2), s=text, fontsize=7)
    previous += height

ax2.set_xticks([])
ax2.yaxis.tick_right()
ax2.yaxis.set_label_position('right') 
ax2.set_ylabel("Number of Compounds")
ax2.spines[['left', 'top', "bottom"]].set_visible(False)
ax2.vlines(-0.2, 0, 20, linewidth=0.5, color="black")
ax2.vlines(-0.2, 22, 83, linewidth=0.5, color="black")
ax2.text(s="Constitutively\nPresent     ", y= 10, x=-0.19, ha="right", va="center", rotation=90, fontsize=8)
ax2.text(s="Drugs\n ", y= 50, x=-0.19, ha="right", va="center", rotation=90, fontsize=8)
#plt.tight_layout()
plt.savefig("drug_groups.svg", bbox_inches="tight")


In [None]:
#compound2mesh =  {compounds2name[row[0].split("::")[1]]: row[2].split("::")[1] for i, row in pd.read_csv("/mnt/storage/speos/data/drkg/drugbank_treats.tsv", sep="\t", header=None).iterrows()}

mesh2compound = {}

for i, row in pd.read_csv("/mnt/storage/speos/data/drkg/drugbank_treats.tsv", sep="\t", header=None).iterrows():
    try:
        mesh2compound[row[2].split("::")[1]].append(compounds2name[row[0].split("::")[1]])
    except KeyError:
        mesh2compound[row[2].split("::")[1]] = [compounds2name[row[0].split("::")[1]]]

In [None]:
from collections import Counter
compound2traitcounter = {}
compound2nontraitcounter = {}

for compound in compound2gene.keys():
    if compound in compound2traitlist.keys():
        counter = Counter({key: 0 for key in phenotypes}) 
        counter.update(compound2traitlist[compound])
        compound2traitcounter[compound] = counter
    else:
        try:
            compound2nontraitcounter[compound] = len(compound2gene[compound])
        except KeyError:
            continue

In [None]:
mesh_terms = ["MESH:D003093", "MESH:D001172", "MESH:D003324","MESH:D000544", "MESH:D012559"]
disease_terms = ["Ulcerative Colitis", "Rheumathoid Arthtritis", "Coronary Artery Disease", "Alzheimer's Disease", "Schizophrenia"]

In [None]:
mesh2compound["MESH:D003093"]

In [None]:
compound2gene["Upadacitinib"]

In [None]:
"TNF" in core_gene_sets["Ulcerative Colitis"]

In [None]:
banlist = ["Hydrocortisone butyrate", "Hydrocortisone cypionate","Hydrocortisone phosphate", "Hydrocortisone probutate", "Hydrocortisone valerate", "Amlodipine", "Pravastatin", "Fludeoxyglucose (18F)", "Rubidium Rb-82", "Ammonia N-13", "Technetium Tc-99m sestamibi", "Meloxicam", "Choline magnesium trisalicylate", "Corticotropin", "Tetracosactide", "Dexamethasone", "Ketoprofen", "Acetylsalicylic acid", "Meclofenamic acid", "Etodolac", "Sulindac", "Valdecoxib", "Fenoprofen", "Celecoxib", "Indomethacin", "Tiaprofenic acid", "Oxaprozin", "Diflunisal", "Flurbiprofen", "Diclofenac", "Piroxicam", "Tolmetin", "Tenoxicam", "Nabumetone", "Ibuprofen", "Prednisone", "Melixocam", "Naproxen", "Bupivacaine", "Betamethasone", "Triamcinolone", "Hydrocortisone", "Prednisolone", "Methylprednisolone", "Budesonide", "Cortisone acetate", "Hydrocortisone acetate", "Dexamethasone"]


In [None]:
from scipy.stats import mannwhitneyu
from statsmodels.stats.multitest import fdrcorrection

banlist = ["Hydrocortisone butyrate", "Hydrocortisone cypionate","Hydrocortisone phosphate", "Hydrocortisone probutate", "Hydrocortisone valerate", "Amlodipine", "Pravastatin", "Fludeoxyglucose (18F)", "Rubidium Rb-82", "Ammonia N-13", "Technetium Tc-99m sestamibi", "Meloxicam", "Choline magnesium trisalicylate", "Corticotropin", "Tetracosactide", "Dexamethasone", "Ketoprofen", "Acetylsalicylic acid", "Meclofenamic acid", "Etodolac", "Sulindac", "Valdecoxib", "Fenoprofen", "Celecoxib", "Indomethacin", "Tiaprofenic acid", "Oxaprozin", "Diflunisal", "Flurbiprofen", "Diclofenac", "Piroxicam", "Tolmetin", "Tenoxicam", "Nabumetone", "Ibuprofen", "Prednisone", "Melixocam", "Naproxen", "Bupivacaine", "Betamethasone", "Triamcinolone", "Hydrocortisone", "Prednisolone", "Methylprednisolone", "Budesonide", "Cortisone acetate", "Hydrocortisone acetate", "Dexamethasone"]


pvals = []
stats = []
meandiffs = []
means = []
dfs = {}

for mesh, disease in zip(mesh_terms, disease_terms):

    counters = {}

    rows = []
    index = []
    for compound in mesh2compound[mesh]:
        if compound in banlist:
            continue
        try:
            counter = compound2traitcounter[compound]
            counters[compound] = sorted(counter.items())
        except KeyError:
            
            rows.append([0] * 5)
            index.append(compound)

    for compound, traits in counters.items():
        traitlist = [trait for trait, number in traits]
        rows.append([number for trait, number in traits])
        #rows[-1] += [0]
    trait_df = pd.DataFrame(index=index  + [key for key in counters.keys()], data=rows, columns=traitlist)
    trait_df = trait_df[["Ulcerative Colitis", "Rheumathoid Arthtritis", "Coronary Artery Disease", "Alzheimer's Disease", "Schizophrenia"]]
    dfs[disease] = trait_df
    rows = []
    index = []

    for compound in compound2gene.keys():
        if compound not in trait_df.index:
            try:
                counter = compound2traitcounter[compound]
                counters[compound] = sorted(counter.items())
            except KeyError:
                
                rows.append([0] * 5)
                index.append(compound)

    for compound, traits in counters.items():
        traitlist = [trait for trait, number in traits]
        rows.append([number for trait, number in traits])

    counter_df = pd.DataFrame(index=index  + [key for key in counters.keys()], data=rows, columns=traitlist)
    counter_df = counter_df[["Ulcerative Colitis", "Rheumathoid Arthtritis", "Coronary Artery Disease", "Alzheimer's Disease", "Schizophrenia"]]

    meandiffs.append(trait_df.mean(axis=0) - counter_df.mean(axis=0))
    means.append(trait_df.mean(axis=0))
    pvals.append(mannwhitneyu(trait_df, counter_df)[1])
    stats.append(mannwhitneyu(trait_df, counter_df)[0])


pvals = np.asarray(pvals)
oldshape  = pvals.shape
pvals = fdrcorrection(pvals.flatten())[1]
pvals = np.asarray(pvals)

fig, axs = plt.subplots(nrows=5, figsize=(full_width*cm*0.5, full_width*cm*0.5), sharex=False)

short2long = {"UC": "Ulcerative Colitis",
              "RA": 'Rheumathoid Arthtritis',
              "CAD": 'Coronary Artery Disease',
              "AD": "Alzheimer's Disease",
              "SCZ": 'Schizophrenia'}

shorthands = ["UC", "RA", "CAD", "AD", "SCZ"]

for ax, _pvals, _meandiffs, _means, label in zip(axs, np.split(pvals, 5), meandiffs, means, shorthands):

    color = []
    for pval, meandiff in zip(_pvals, _meandiffs):
        if pval > 0.05:
            color.append("lightgray")
        else:
            if meandiff > 0:
                color.append("red")
            else:
                color.append("blue")
    ax.bar(range(len(_means)), _means, color=color)
    ax.xaxis.tick_top()
    ax.set_xticks([])
    ax.set_ylabel(label)
    ax.spines[['right', 'top']].set_visible(False)
    ax.set_ylim(0,2)

axs[0].set_xticks(range(5))
axs[0].set_xticklabels(["UC", "RA", "CAD", "AD", "SCZ"], fontsize=8)
axs[0].set_xlabel("Targeted Core Genes for Trait")
axs[0].xaxis.set_label_position('top') 

axs[2].text(x=-2, y=0.5, s="Genes targetet by Drugs for ", rotation=90, va="center")

plt.savefig("drug_target_selectivity_new.svg")


In [None]:
dfs["Rheumathoid Arthtritis"]

In [None]:
compound2gene2trait["Galantamine"]

In [None]:
centerwatch_mappings = {
    #https://www.centerwatch.com/directories/1067-fda-approved-drugs/topic/270-ulcerative-colitis
    "Ulcerative Colitis": ["Olsalazine", "Mesalazine", "Balsalazide", "Vedolizumab", "Adalimumab", "Mirikizumab", "Upadacitinib", "Golimumab", "Ustekinumab", "Etrasimod", "Tofacitinib", "Ozanimod"],
    "Coronary Artery Disease": ["Cangrelor", "Rivaroxaban"],
    "Alzheimer's Disease": ["Adacanumab", "Donepezil", "Rivastigmine", "Lecanemab", "Memantine", "Galantamine"],
    "Schizophrenia": ["Aripiprazole", "Lumateperone", "Iloperidone", "Ziprasidone", "Dexmedetomidine", "Paliperidone", "Lurasidone", "Olanzapine", "Samidorphan", "Brexipiprazole", "Risperidone", "Asenapine", "Quetiapine", "Cariprizane"],
    "Rheumathoid Arthtritis": ["Tocilizumab", "Sulfasalazine", "Ibuprofen", "Etanercept", "Adalimumab", "Sarilumab", "Anakinra", "Melixocam", "Naproxen", "Baricitinib", "Abatacept", "Prednisone", "Infliximab", "Upadacitinib", "Rituximab", "Golimumab", "Tofacitinib"]
}

#banlist = ["Ibuprofen", "Prednisone", "Melixocam", "Naproxen"]

In [None]:
from scipy.stats import mannwhitneyu
from statsmodels.stats.multitest import fdrcorrection

pvals = []
stats = []
meandiffs = []
means = []
dfs = {}

weakcoregene_drugs = {"Ulcerative Colitis": ["Upadacitinib", "Golimumab", "Filgotinib"],
                      "Rheumathoid Arthtritis": ["Anakinra", "Infliximab", "Upadacitinib", "Golimumab", "Filgotinib"],
                      "Coronary Artery Disease": [],
                      "Schizophrenia": [],
                      "Alzheimer's Disease": []}

for disease in disease_terms:

    counters = {}

    rows = []
    index = []
    for compound in centerwatch_mappings[disease]:
        if compound in banlist:
            continue
        if compound not in compound2gene.keys():
            continue
        if compound in weakcoregene_drugs[disease]:
            continue
        try:
            counter = compound2traitcounter[compound]
            counters[compound] = sorted(counter.items())
        except KeyError:
            
            rows.append([0] * 5)
            index.append(compound)

    for compound, traits in counters.items():
        traitlist = [trait for trait, number in traits]
        rows.append([number for trait, number in traits])
        #rows[-1] += [0]
    trait_df = pd.DataFrame(index=index  + [key for key in counters.keys()], data=rows, columns=traitlist)
    trait_df = trait_df[["Ulcerative Colitis", "Rheumathoid Arthtritis", "Coronary Artery Disease", "Alzheimer's Disease", "Schizophrenia"]]
    dfs[disease] = trait_df
    rows = []
    index = []

    for compound in compound2gene.keys():
        if compound not in trait_df.index:
            try:
                counter = compound2traitcounter[compound]
                counters[compound] = sorted(counter.items())
            except KeyError:
                
                rows.append([0] * 5)
                index.append(compound)

    for compound, traits in counters.items():
        traitlist = [trait for trait, number in traits]
        rows.append([number for trait, number in traits])

    counter_df = pd.DataFrame(index=index  + [key for key in counters.keys()], data=rows, columns=traitlist)
    counter_df = counter_df[["Ulcerative Colitis", "Rheumathoid Arthtritis", "Coronary Artery Disease", "Alzheimer's Disease", "Schizophrenia"]]

    meandiffs.append(trait_df.mean(axis=0) - counter_df.mean(axis=0))
    means.append(trait_df.mean(axis=0))
    pvals.append(mannwhitneyu(trait_df, counter_df)[1])
    stats.append(mannwhitneyu(trait_df, counter_df)[0])


pvals = np.asarray(pvals)
oldshape  = pvals.shape
pvals = fdrcorrection(pvals.flatten())[1]
pvals = np.asarray(pvals)

fig, axs = plt.subplots(nrows=5, figsize=(full_width*cm*0.5, full_width*cm*0.5), sharex=False)

short2long = {"UC": "Ulcerative Colitis",
              "RA": 'Rheumathoid Arthtritis',
              "CAD": 'Coronary Artery Disease',
              "AD": "Alzheimer's Disease",
              "SCZ": 'Schizophrenia'}

shorthands = ["UC", "RA", "CAD", "AD", "SCZ"]

for ax, _pvals, _meandiffs, _means, label in zip(axs, np.split(pvals, 5), meandiffs, means, shorthands):

    color = []
    for pval, meandiff in zip(_pvals, _meandiffs):
        if pval > 0.05:
            color.append("lightgray")
        else:
            if meandiff > 0:
                color.append("red")
            else:
                color.append("blue")
    ax.bar(range(len(_means)), _means, color=color)
    ax.xaxis.tick_top()
    ax.set_xticks([])
    ax.set_ylabel(label)
    ax.spines[['right', 'top']].set_visible(False)
    ax.set_ylim(0,3)

axs[0].set_xticks(range(5))
axs[0].set_xticklabels(["UC", "RA", "CAD", "AD", "SCZ"], fontsize=8)
axs[0].set_xlabel("Targeted Core Genes for Trait")
axs[0].xaxis.set_label_position('top') 

#axs[2].text(x=-2, y=0.5, s="Genes targetet by Drugs for ", rotation=90, va="center")

plt.savefig("drug_target_selectivity_new.svg")


In [None]:
from scipy.stats import mannwhitneyu
from statsmodels.stats.multitest import fdrcorrection

banlist = ["Hydrocortisone butyrate", "Hydrocortisone cypionate","Hydrocortisone phosphate", "Hydrocortisone probutate", "Hydrocortisone valerate", "Amlodipine", "Pravastatin", "Fludeoxyglucose (18F)", "Rubidium Rb-82", "Ammonia N-13", "Technetium Tc-99m sestamibi", "Meloxicam", "Choline magnesium trisalicylate", "Corticotropin", "Tetracosactide", "Dexamethasone", "Ketoprofen", "Acetylsalicylic acid", "Meclofenamic acid", "Etodolac", "Sulindac", "Valdecoxib", "Fenoprofen", "Celecoxib", "Indomethacin", "Tiaprofenic acid", "Oxaprozin", "Diflunisal", "Flurbiprofen", "Diclofenac", "Piroxicam", "Tolmetin", "Tenoxicam", "Nabumetone", "Ibuprofen", "Prednisone", "Melixocam", "Naproxen", "Bupivacaine", "Betamethasone", "Triamcinolone", "Hydrocortisone", "Prednisolone", "Methylprednisolone", "Budesonide", "Cortisone acetate", "Hydrocortisone acetate", "Dexamethasone"]

pvals = []
stats = []
meandiffs = []
means = []
dfs = {}

for mesh, disease in zip(mesh_terms, disease_terms):

    counters = {}

    rows = []
    index = []
    for compound in mesh2compound[mesh] + centerwatch_mappings[disease]:
        if compound in banlist:
            continue
        if compound not in compound2gene.keys():
            continue
        if compound in weakcoregene_drugs[disease]:
            continue
        try:
            counter = compound2traitcounter[compound]
            counters[compound] = sorted(counter.items())
        except KeyError:
            
            rows.append([0] * 5)
            index.append(compound)

    for compound, traits in counters.items():
        traitlist = [trait for trait, number in traits]
        rows.append([number for trait, number in traits])
        #rows[-1] += [0]
    trait_df = pd.DataFrame(index=index  + [key for key in counters.keys()], data=rows, columns=traitlist)
    trait_df = trait_df[["Ulcerative Colitis", "Rheumathoid Arthtritis", "Coronary Artery Disease", "Alzheimer's Disease", "Schizophrenia"]]
    dfs[disease] = trait_df
    rows = []
    index = []

    for compound in compound2gene.keys():
        if compound not in trait_df.index:
            try:
                counter = compound2traitcounter[compound]
                counters[compound] = sorted(counter.items())
            except KeyError:
                
                rows.append([0] * 5)
                index.append(compound)

    for compound, traits in counters.items():
        traitlist = [trait for trait, number in traits]
        rows.append([number for trait, number in traits])

    counter_df = pd.DataFrame(index=index  + [key for key in counters.keys()], data=rows, columns=traitlist)
    counter_df = counter_df[["Ulcerative Colitis", "Rheumathoid Arthtritis", "Coronary Artery Disease", "Alzheimer's Disease", "Schizophrenia"]]

    meandiffs.append(trait_df.mean(axis=0) - counter_df.mean(axis=0))
    means.append(trait_df.mean(axis=0))
    pvals.append(mannwhitneyu(trait_df, counter_df)[1])
    stats.append(mannwhitneyu(trait_df, counter_df)[0])
    global_means = pd.concat((counter_df, trait_df)).mean(axis=0)


pvals = np.asarray(pvals)
oldshape  = pvals.shape
pvals = fdrcorrection(pvals.flatten())[1]
pvals = np.asarray(pvals)

fig, axs = plt.subplots(nrows=5, figsize=(full_width*cm*0.5, full_width*cm*0.5), sharex=False)

short2long = {"UC": "Ulcerative Colitis",
              "RA": 'Rheumathoid Arthtritis',
              "CAD": 'Coronary Artery Disease',
              "AD": "Alzheimer's Disease",
              "SCZ": 'Schizophrenia'}

shorthands = ["UC", "RA", "CAD", "AD", "SCZ"]

for ax, _pvals, _meandiffs, _means, label in zip(axs, np.split(pvals, 5), meandiffs, means, shorthands):

    color = []
    for pval, meandiff in zip(_pvals, _meandiffs):
        if pval > 0.05:
            color.append("lightgray")
        else:
            if meandiff > 0:
                color.append("red")
            else:
                color.append("blue")
    ax.bar(range(len(_means)), _means, color=color)
    ax.xaxis.tick_top()
    ax.set_xticks([])
    ax.set_ylabel(label)
    ax.spines[['right', 'top']].set_visible(False)
    ax.set_ylim(0,2)
    ax.step(x=np.arange(len(global_means)+1)-0.5, y =global_means.tolist() + [global_means[-1]], where = "post", c="black",linestyle="--",zorder=5, linewidth=0.5)


axs[0].set_xticks(range(5))
axs[0].set_xticklabels(["UC", "RA", "CAD", "AD", "SCZ"], fontsize=8)
axs[0].set_xlabel("Targeted Core Genes for Trait")
axs[0].xaxis.set_label_position('top') 

axs[2].text(x=-2, y=0.5, s="Genes targetet by Drugs for ", rotation=90, va="center")

plt.savefig("drug_target_selectivity_new.svg")


In [None]:
import xmltodict

with open("/mnt/storage/speos/data/drugbank/full database.xml", "r") as file:
    python_dict=xmltodict.parse(file.read())


In [None]:
queries = ["ulcerative colitis", "rheumatoid arthritis", "coronary artery disease", "Alzheimer", "schizophrenia"]

disease2query = {
    "Ulcerative Colitis": [],
    "Rheumathoid Arthtritis": [],
    "Coronary Artery Disease": [],
    "Alzheimer's Disease": [],
    "Schizophrenia": []
    
}

for query, disease in zip(queries, disease2query.keys()):
    for entry in python_dict["drugbank"]["drug"]:
        try:
            if ("investigational" not in entry["groups"]["group"] and "experimental" not in entry["groups"]["group"]) or 'approved' in entry["groups"]["group"] :
                if query in entry["indication"]:
                    disease2query[disease].append(entry["name"])
        except TypeError:
            continue


In [None]:
disease2query["Alzheimer's Disease"]

In [None]:
nutraceuticals = set()
for drug in  python_dict["drugbank"]["drug"]:
    if "nutraceutical" in drug["groups"]["group"]:
        nutraceuticals.update((drug["name"],))

In [None]:
from scipy.stats import mannwhitneyu
from statsmodels.stats.multitest import fdrcorrection

banlist = ["Atorvastatin",
            "Simvastatin",
            "Lovastatin",
           "Pitavastatin",
           "Rosuvastatin", "Isosorbide mononitrate", "Isosorbide dinitrate", "Nitroglycerin", "Methotrexate", "Betamethasone phosphate", "Acemetacin", "Alclofenac", "Droxicam", "Etoricoxib", "Salsalate", "Mefenamic acid", "Ketorolac", "Aceclofenac", "Glutamic acid", "Kappadione", "Tixocortol", "D-alpha-Tocopherol acetate", "NADH", "Phylloquinone", "Hydrocortisone butyrate", "Hydrocortisone cypionate","Hydrocortisone phosphate", "Hydrocortisone probutate", "Hydrocortisone valerate", "Amlodipine", "Pravastatin", "Fludeoxyglucose (18F)", "Rubidium Rb-82", "Ammonia N-13", "Technetium Tc-99m sestamibi", "Meloxicam", "Choline magnesium trisalicylate", "Corticotropin", "Tetracosactide", "Dexamethasone", "Ketoprofen", "Acetylsalicylic acid", "Meclofenamic acid", "Etodolac", "Sulindac", "Valdecoxib", "Fenoprofen", "Celecoxib", "Indomethacin", "Tiaprofenic acid", "Oxaprozin", "Diflunisal", "Flurbiprofen", "Diclofenac", "Piroxicam", "Tolmetin", "Tenoxicam", "Nabumetone", "Ibuprofen", "Prednisone", "Meloxicam", "Naproxen", "Bupivacaine", "Betamethasone", "Triamcinolone", "Hydrocortisone", "Prednisolone", "Methylprednisolone", "Budesonide", "Cortisone acetate", "Hydrocortisone acetate", "Dexamethasone"]

pvals = []
stats = []
meandiffs = []
means = []
dfs = {}

for mesh, disease in zip(mesh_terms, disease_terms):

    counters = {}

    rows = []
    index = []
    for compound in disease2query[disease]:
        if compound in banlist:
            continue
        if compound not in compound2gene.keys():
            continue
        if compound in weakcoregene_drugs[disease]:
            continue
        if compound in nutraceuticals:
            continue
        try:
            counter = compound2traitcounter[compound]
            counters[compound] = sorted(counter.items())
        except KeyError:
            
            rows.append([0] * 5)
            index.append(compound)

    for compound, traits in counters.items():
        traitlist = [trait for trait, number in traits]
        rows.append([number for trait, number in traits])
        #rows[-1] += [0]
    trait_df = pd.DataFrame(index=index  + [key for key in counters.keys()], data=rows, columns=traitlist)
    trait_df = trait_df[["Ulcerative Colitis", "Rheumathoid Arthtritis", "Coronary Artery Disease", "Alzheimer's Disease", "Schizophrenia"]]
    dfs[disease] = trait_df
    rows = []
    index = []

    for compound in compound2gene.keys():
        if compound in nutraceuticals:
            continue
        if compound not in trait_df.index:
            try:
                counter = compound2traitcounter[compound]
                counters[compound] = sorted(counter.items())
            except KeyError:
                
                rows.append([0] * 5)
                index.append(compound)

    for compound, traits in counters.items():
        traitlist = [trait for trait, number in traits]
        rows.append([number for trait, number in traits])

    counter_df = pd.DataFrame(index=index  + [key for key in counters.keys()], data=rows, columns=traitlist)
    counter_df = counter_df[["Ulcerative Colitis", "Rheumathoid Arthtritis", "Coronary Artery Disease", "Alzheimer's Disease", "Schizophrenia"]]

    meandiffs.append(trait_df.mean(axis=0) - counter_df.mean(axis=0))
    means.append(trait_df.mean(axis=0))
    pvals.append(mannwhitneyu(trait_df, counter_df)[1])
    stats.append(mannwhitneyu(trait_df, counter_df)[0])

    global_means = pd.concat((counter_df, trait_df)).mean(axis=0)


pvals = np.asarray(pvals)
oldshape  = pvals.shape
pvals = fdrcorrection(pvals.flatten())[1]
pvals = np.asarray(pvals)

fig, axs = plt.subplots(nrows=5, figsize=(full_width*cm*0.5, full_width*cm*0.5), sharex=False)
#plt.rc('text', usetex=False)
short2long = {"UC": "Ulcerative Colitis",
              "RA": 'Rheumathoid Arthtritis',
              "CAD": 'Coronary Artery Disease',
              "AD": "Alzheimer's Disease",
              "SCZ": 'Schizophrenia'}

shorthands = ["UC", "RA", "CAD", "AD", "SCZ"]

for ax, _pvals, _meandiffs, _means, label in zip(axs, np.split(pvals, 5), meandiffs, means, shorthands):

    color = []
    for pval, meandiff in zip(_pvals, _meandiffs):
        if pval > 0.05:
            color.append("lightgray")
        else:
            if meandiff > 0:
                color.append((208/265, 36/265, 36/265))
            else:
                color.append((73/265, 90/265, 176/265))
    ax.bar(range(len(_means)), _means, color=color)
    ax.xaxis.tick_top()
    ax.set_xticks([])
    ax.set_ylabel(label + "\n" + "$\\regular{n=%s}$" % len(dfs[short2long[label]]))
    ax.spines[['right', 'top']].set_visible(False)
    ax.set_ylim(0,2)
    ax.step(x=np.arange(len(global_means)+1)-0.5, y =global_means.tolist() + [global_means[-1]], where = "post", c="black",linestyle="--",zorder=5, linewidth=0.5)

axs[0].set_xticks(range(5))
axs[0].set_xticklabels(["UC", "RA", "CAD", "AD", "SCZ"], fontsize=8)
axs[0].set_xlabel("Targeted Core Genes for Trait")
axs[0].xaxis.set_label_position('top') 

axs[2].text(x=-2.5, y=0.5, s="Genes targetet by Drugs for ", rotation=90, va="center")

plt.savefig("drug_target_selectivity_new.svg")


In [None]:
dfs["Rheumathoid Arthtritis"]

In [None]:
from scipy.stats import mannwhitneyu
from statsmodels.stats.multitest import fdrcorrection

banlist = ["Kappadione", "Tixocortol", "D-alpha-Tocopherol acetate", "NADH", "Phylloquinone", "Hydrocortisone butyrate", "Hydrocortisone cypionate","Hydrocortisone phosphate", "Hydrocortisone probutate", "Hydrocortisone valerate", "Amlodipine", "Pravastatin", "Fludeoxyglucose (18F)", "Rubidium Rb-82", "Ammonia N-13", "Technetium Tc-99m sestamibi", "Meloxicam", "Choline magnesium trisalicylate", "Corticotropin", "Tetracosactide", "Dexamethasone", "Ketoprofen", "Acetylsalicylic acid", "Meclofenamic acid", "Etodolac", "Sulindac", "Valdecoxib", "Fenoprofen", "Celecoxib", "Indomethacin", "Tiaprofenic acid", "Oxaprozin", "Diflunisal", "Flurbiprofen", "Diclofenac", "Piroxicam", "Tolmetin", "Tenoxicam", "Nabumetone", "Ibuprofen", "Prednisone", "Meloxicam", "Naproxen", "Bupivacaine", "Betamethasone", "Triamcinolone", "Hydrocortisone", "Prednisolone", "Methylprednisolone", "Budesonide", "Cortisone acetate", "Hydrocortisone acetate", "Dexamethasone"]

pvals = []
stats = []
meandiffs = []
means = []
dfs = {}

for mesh, disease in zip(mesh_terms, disease_terms):

    counters = {}

    rows = []
    index = []
    for compound in disease2query[disease]:
        if compound in banlist:
            continue
        if compound not in compound2gene.keys():
            continue
        if compound in weakcoregene_drugs[disease]:
            continue
        try:
            counter = compound2traitcounter[compound]
            counters[compound] = sorted(counter.items())
        except KeyError:
            
            rows.append([0] * 5)
            index.append(compound)

    for compound, traits in counters.items():
        traitlist = [trait for trait, number in traits]
        rows.append([number for trait, number in traits])
        #rows[-1] += [0]
    trait_df = pd.DataFrame(index=index  + [key for key in counters.keys()], data=rows, columns=traitlist)
    trait_df = trait_df[["Ulcerative Colitis", "Rheumathoid Arthtritis", "Coronary Artery Disease", "Alzheimer's Disease", "Schizophrenia"]]
    dfs[disease] = trait_df
    rows = []
    index = []

    for compound in compound2gene.keys():
        if compound not in trait_df.index:
            try:
                counter = compound2traitcounter[compound]
                counters[compound] = sorted(counter.items())
            except KeyError:
                
                rows.append([0] * 5)
                index.append(compound)

    for compound, traits in counters.items():
        traitlist = [trait for trait, number in traits]
        rows.append([number for trait, number in traits])

    counter_df = pd.DataFrame(index=index  + [key for key in counters.keys()], data=rows, columns=traitlist)
    counter_df = counter_df[["Ulcerative Colitis", "Rheumathoid Arthtritis", "Coronary Artery Disease", "Alzheimer's Disease", "Schizophrenia"]]

    meandiffs.append(trait_df.mean(axis=0) - counter_df.mean(axis=0))
    means.append(trait_df.mean(axis=0))
    pvals.append(mannwhitneyu(trait_df, counter_df)[1])
    stats.append(mannwhitneyu(trait_df, counter_df)[0])

    global_means = pd.concat((counter_df, trait_df)).mean(axis=0)


pvals = np.asarray(pvals)
oldshape  = pvals.shape
pvals = fdrcorrection(pvals.flatten())[1]
pvals = np.asarray(pvals)

fig, axs = plt.subplots(nrows=5, figsize=(full_width*cm*0.5, full_width*cm*0.5), sharex=False)

short2long = {"UC": "Ulcerative Colitis",
              "RA": 'Rheumathoid Arthtritis',
              "CAD": 'Coronary Artery Disease',
              "AD": "Alzheimer's Disease",
              "SCZ": 'Schizophrenia'}

shorthands = ["UC", "RA", "CAD", "AD", "SCZ"]

for ax, _df, _pvals, _meandiffs, _means, label in zip(axs, dfs.values(), np.split(pvals, 5), meandiffs, means, shorthands):

    color = []
    for pval, meandiff in zip(_pvals, _meandiffs):
        if pval > 0.05:
            color.append("lightgray")
        else:
            if meandiff > 0:
                color.append("red")
            else:
                color.append("blue")
    #ax.bar(range(len(_means)), _means, color=color)
    ax.boxplot(_df, positions = range(0,5))
    ax.xaxis.tick_top()
    ax.set_xticks([])
    ax.set_ylabel(label)
    ax.spines[['right', 'top']].set_visible(False)
    ax.set_ylim(0,8)
    ax.step(x=np.arange(len(global_means)+1)-0.5, y =global_means.tolist() + [global_means[-1]], where = "post", c="black",linestyle="--",zorder=5, linewidth=0.5)

axs[0].set_xticks(range(5))
axs[0].set_xticklabels(["UC", "RA", "CAD", "AD", "SCZ"], fontsize=8)
axs[0].set_xlabel("Targeted Core Genes for Trait")
axs[0].xaxis.set_label_position('top') 

axs[2].text(x=-2, y=0.5, s="Genes targetet by Drugs for ", rotation=90, va="center")

plt.savefig("drug_target_selectivity_new.svg")


In [None]:
dfs["Ulcerative Colitis"]

In [None]:
compound2gene["Kappadione"]

In [None]:
compound2gene2trait["Baricitinib"]

In [None]:
compound2gene2trait["Sarilumab"]

In [None]:
compound2gene2trait["Etanercept"]

In [None]:
compound2gene2trait["Aripiprazole"]

In [None]:
compound2gene2trait

In [None]:
dfs["Schizophrenia"].sort_index()

In [None]:
dfs["Coronary Artery Disease"].sort_index()

In [None]:
dfs["Alzheimer's Disease"].sort_index()

In [None]:
dfs["Rheumathoid Arthtritis"].sort_index()

In [None]:
atypical_antipsychotics = ["Sulpiride", "Loxapine", "Remoxipride", "Methotrimeprazine", "Sertindole", "Brexpiprazole", "Zotepine", "Aripiprazole","Aripiprazole lauroxil",  "Lurasidone", "Quetiapine", "Cariprazine", "Brexiprazole", "Olanzapine", "Ziprasidone", "Asenapine", "Risperidone", "Paliperidone", "Lumateperone", "Iloperidone", "Pimavanserin", "Clozapine"]

typical_antipsychotics = ["Trifluoperazine", "Thioproperazine", "Pimozide", "Fluphenazine", "Flupentixol", "Droperidol", "Perphenazine", "Molindone", "Promazine", "Periciazine", "Chlorprothixene","Mesoridazine", "Levomepromazine", "Prochlorperazine", "Chlorpromazine", "Haloperidol", "Thioridazine", "Thiothixene", "Zuclopenthixol","Fluspirilene", ]


In [None]:
typical_antipsychotics = [drug for drug in dfs["Schizophrenia"].index if drug not in atypical_antipsychotics]

In [None]:

other_weakcoregenes =[values for items, values in weak_core_gene_sets.items() if items != "Schizophrenia"]

other_weakcoregenes = [gene for genelist in other_weakcoregenes for gene in genelist]

other_strongcoregenes =[values for items, values in core_gene_sets.items() if items != "Schizophrenia"]

other_strongcoregenes = [gene for genelist in other_strongcoregenes for gene in genelist]

other_coregenes = other_weakcoregenes + other_strongcoregenes


In [None]:


totals = []
counters = {}
rows = []
index = []

weaks = []

for compound in atypical_antipsychotics:
        if compound in banlist:
            continue
        if compound not in compound2gene.keys():
            continue
        if compound in weakcoregene_drugs[disease]:
            continue
        try:
            counter = compound2traitcounter[compound]
            counters[compound] = sorted(counter.items())
        except KeyError:
            rows.append([0] * 5)
            index.append(compound)
        totals.append(len([gene for gene in compound2gene[compound] if gene in global_peripheral_genes]))
        weaks.append(len([gene for gene in compound2gene[compound] if gene in other_coregenes]))

for compound, traits in counters.items():
        traitlist = [trait for trait, number in traits]
        rows.append([number for trait, number in traits])
        #rows[-1] += [0]
atypical_df = pd.DataFrame(index=index  + [key for key in counters.keys()], data=rows, columns=traitlist)
atypical_df = atypical_df[["Ulcerative Colitis", "Rheumathoid Arthtritis", "Coronary Artery Disease", "Alzheimer's Disease", "Schizophrenia"]]
atypical_df["Others"] = totals - atypical_df.sum(axis=1)
atypical_df["Weak Core Genes"] = weaks

totals = []
counters = {}
rows = []
index = []
weaks=[]
for compound in typical_antipsychotics:
        if compound in banlist:
            continue
        if compound not in compound2gene.keys():
            continue
        if compound in weakcoregene_drugs[disease]:
            continue
        try:
            counter = compound2traitcounter[compound]
            counters[compound] = sorted(counter.items())
        except KeyError:
            rows.append([0] * 5)
            index.append(compound)
        totals.append(len([gene for gene in compound2gene[compound] if gene in global_peripheral_genes]))
        weaks.append(len([gene for gene in compound2gene[compound] if gene in other_coregenes]))

for compound, traits in counters.items():
        traitlist = [trait for trait, number in traits]
        rows.append([number for trait, number in traits])
        #rows[-1] += [0]
typical_df = pd.DataFrame(index=index  + [key for key in counters.keys()], data=rows, columns=traitlist)
typical_df = typical_df[["Ulcerative Colitis", "Rheumathoid Arthtritis", "Coronary Artery Disease", "Alzheimer's Disease", "Schizophrenia"]]
typical_df["Others"] = totals - typical_df.sum(axis=1)
typical_df["Weak Core Genes"] = weaks


In [None]:
atypical_df.mean(axis=0)

In [None]:
typical_df.mean(axis=0)

In [None]:
mannwhitneyu(atypical_df, typical_df)

In [None]:
quint_real_classes["Primary Indication"]

In [None]:
compound2gene2trait["Ranolazine"]

In [None]:
compound2gene2trait["Carvedilol"]

In [None]:
compound2gene2trait["Aripiprazole"]

In [None]:
atypical_df

In [None]:
atypical_targets =[]
for compound in atypical_antipsychotics:
    if compound in banlist:
        continue
    if compound not in compound2gene.keys():
        continue
    if compound in weakcoregene_drugs[disease]:
        continue
    atypical_targets.extend(compound2gene[compound])

atypical_targets = Counter(atypical_targets)

typical_targets =[]
for compound in typical_antipsychotics:
    if compound in banlist:
        continue
    if compound not in compound2gene.keys():
        continue
    if compound in weakcoregene_drugs[disease]:
        continue
    typical_targets.extend(compound2gene[compound])

typical_targets = Counter(typical_targets)

In [None]:
typical_Targets_relative = {}

for gene, num in typical_targets.items():
    typical_Targets_relative[gene] = num / len(typical_antipsychotics)

typical_Targets_relative


In [None]:
atypical_Targets_relative = {}

for gene, num in atypical_targets.items():
    atypical_Targets_relative[gene] = num / len(atypical_antipsychotics)

atypical_Targets_relative

In [None]:
recepter_types = {"Dopaminergic": ["DRD"],
                "Serotonergic": ["HTR"],
                "Adrenergic": ["ADR"],
                "Cholinergic": ["CHRM"],
                "GABAergic": ["GAB"],
                "Glutamergic": ["GRI"],
                "Histaminergic": ["HRH"],
                "Opioid": ["OPR"],
                }

all_psych_targets = list(set(list(typical_Targets_relative.keys()) + list(atypical_Targets_relative.keys())))
 
genes = [] 
label2gene = {}
for label, prefices in recepter_types.items(): 
    for gene in all_psych_targets:
        if any([gene.startswith(prefix) for prefix in prefices]):
            try: 
                label2gene[label].append(gene)
            except KeyError:
                label2gene[label] = [gene]
            genes.append(gene)

for gene in all_psych_targets:
    if gene not in genes:
        try:
            label2gene["Others"].append(gene)
        except KeyError:
            label2gene["Others"] = [gene]
        genes.append(gene)

for label in label2gene.keys():
    label2gene[label] = sorted(label2gene[label])


In [None]:
label2gene

In [None]:
cycler = ColorCycler(["#01016f", "#89006b", "#d00053", "#f85732", "#ffa600"])


genes = []
values = []
colors = []
tokens = []

target_and_othercore = 0
target_not_othercore = 0
nontarget_and_othercore = 0
nontarget_not_othercore = 0

for label, labeled_genes in label2gene.items():
    color = cycler.next()
    for gene in labeled_genes:
        try:
            percentage = typical_Targets_relative[gene]
            if gene in other_coregenes:
                target_and_othercore += 1
            else:
                target_not_othercore += 1
        except KeyError:
            percentage = 0
            if gene in other_coregenes:
                nontarget_and_othercore += 1
            else:
                nontarget_not_othercore += 1

        genes.append(gene)
        values.append(percentage)
        colors.append(color)
        if gene in core_gene_sets["Schizophrenia"]:
            tokens.append("*")
        elif gene in other_coregenes:
            tokens.append("†")
        elif gene in weak_core_gene_sets["Schizophrenia"]:
            tokens.append("·")
        else:
            tokens.append(" ")
                   
color = cycler.next()
"""
for gene in label2gene:
    if gene not in genes:
        try:
            percentage = typical_Targets_relative[gene]
        except KeyError:
            percentage = 0
        genes.append(gene)
        values.append(percentage)
        colors.append(color)
        if gene in core_gene_sets["Schizophrenia"]:
            tokens.append("*")
        elif gene in other_coregenes:
            tokens.append("†")
        elif gene in weak_core_gene_sets["Schizophrenia"]:
            tokens.append("·")
        else:
            tokens.append(" ")
"""
fig, ax = plt.subplots(figsize=(full_width*cm, 4*cm))
ax.bar(range(len(values)), values, width=1, color=colors, zorder=5)
ax.set_xticks(range(len(values)), genes, rotation=90, fontsize=5)
ax.grid(axis="y", linestyle="--", zorder=-5, color="lightgray")
ax1 = ax.twinx()
ax1.set_ylim(0, len(typical_antipsychotics))
ax1.set_yticks(list(range(0, len(typical_antipsychotics), 3)) + [len(typical_antipsychotics)])
ax1.set_ylabel("Target Count")
ax.set_ylim(0,1)
ax.set_xlim(-0.5, len(values)-0.5)
ax.set_ylabel("Target Fraction")
for i, token in enumerate(tokens):
    ax.text(x=i, y=values[i], s=token, va="bottom", ha="center", fontsize=5)

ax.text(x=len(values)/2, y =0.95, s="Typical Antipsychotics", va="top", ha="center")

plt.savefig("typical_antipsychotics.svg")


In [None]:
from scipy.stats import fisher_exact

fisher_exact([[target_and_othercore, target_not_othercore],
              [nontarget_and_othercore, nontarget_not_othercore]])

In [None]:
[[target_and_othercore, target_not_othercore],
[nontarget_and_othercore, nontarget_not_othercore]]

In [None]:
cycler = ColorCycler(["#01016f", "#89006b", "#d00053", "#f85732", "#ffa600"])


genes = []
values = []
colors = []
tokens = []

target_and_othercore = 0
target_not_othercore = 0
nontarget_and_othercore = 0
nontarget_not_othercore = 0

for label, labeled_genes in label2gene.items():
    color = cycler.next()
    for gene in labeled_genes:
        try:
            percentage = atypical_Targets_relative[gene]
            if gene in other_coregenes:
                target_and_othercore += 1
            else:
                target_not_othercore += 1
        except KeyError:
            percentage = 0
            if gene in other_coregenes:
                nontarget_and_othercore += 1
            else:
                nontarget_not_othercore += 1

        genes.append(gene)
        values.append(percentage)
        colors.append(color)
        if gene in core_gene_sets["Schizophrenia"]:
            tokens.append("*")
        elif gene in other_coregenes:
            tokens.append("†")
        elif gene in weak_core_gene_sets["Schizophrenia"]:
            tokens.append("·")
        else:
            tokens.append(" ")


fig, ax = plt.subplots(figsize=(full_width*cm, 4*cm))
ax.bar(range(len(values)), values, width=1, color=colors, zorder=5)
ax.set_xticks(range(len(values)), genes, rotation=90, fontsize=5)
ax.set_ylim(0,1)
ax.set_xlim(-0.5, len(values)-0.5)
ax.set_ylabel("Target Fraction")
ax.grid(axis="y", linestyle="--", zorder=-5, color="lightgray")
ax1 = ax.twinx()
ax1.set_ylim(0, len(atypical_antipsychotics))
ax1.set_yticks(list(range(0, len(atypical_antipsychotics), 3)) + [len(atypical_antipsychotics)])
ax1.set_ylabel("Target Count")
for i, token in enumerate(tokens):
    ax.text(x=i, y=values[i], s=token, va="bottom", ha="center", fontsize=5)

ax.text(x=len(values)/2, y =0.95, s="Atypical Antipsychotics", va="top", ha="center")
plt.savefig("atypical_antipsychotics.svg")

In [None]:
from scipy.stats import fisher_exact

fisher_exact([[target_and_othercore, target_not_othercore],
              [nontarget_and_othercore, nontarget_not_othercore]])

In [None]:
[[target_and_othercore, target_not_othercore],
              [nontarget_and_othercore, nontarget_not_othercore]]

In [None]:
import scipy.stats as stats

odds_typical = fisher_exact([[17, 58], [1, 11]])[0]
odds_atypical = fisher_exact([[4, 57], [14, 12]])[0]

log_odds_typical = np.log(odds_typical)
log_odds_atypical = np.log(0.06477732793522267)
delta = log_odds_typical - log_odds_atypical

typical_array = [17, 58, 1, 11]
atypical_array = [4, 57, 14, 12]

var_typical = np.sum([1 / value for value in typical_array])
var_atypical = np.sum([1 / value for value in atypical_array])

se_delta = np.sqrt(var_typical + var_atypical)
zval = delta / se_delta
pval = stats.norm.sf(np.abs(zval)) * 2

In [None]:
pval

In [None]:
[compound for compound in set(typical_antipsychotics).intersection(set(compound2gene.keys())) if "ORM2" in compound2gene[compound]]

In [None]:
[compound for compound in set(typical_antipsychotics).intersection(set(compound2gene.keys())) if "ORM1" in compound2gene[compound]]

In [None]:
[compound for compound in set(typical_antipsychotics).intersection(set(compound2gene.keys())) if "CALM1" in compound2gene[compound]]

In [None]:
[compound for compound in compound2gene.keys() if "CALM1" in compound2gene[compound]]

In [None]:
pval

In [None]:
compound2gene2trait["Aripiprazole"]

In [None]:
"Sertindole" in typical_antipsychotics

In [None]:
"Ziprasidone" in typical_antipsychotics

In [None]:
"HTRA1" in weak_core_gene_sets["Schizophrenia"]

In [None]:
compound2gene2trait["Samidorphan"]

In [None]:
gene2trait["KCNH1"]

In [None]:
gene2trait["ADRB1"]

In [None]:
gene2trait["ORM2"]

In [None]:
compound2gene2trait["Aripiprazole"]

In [None]:
len(new_values)

In [None]:
rheumadrugs2genes = {}
for compounds in compound2gene.keys():
    genes = compound2gene[compounds]
    of_core_genes = [gene for gene in genes if gene in core_gene_sets["Alzheimer's Disease"]]
    rheumadrugs2genes[compounds] = (len(of_core_genes) / len(genes)) * len(of_core_genes)

rheumadrugs2genes = sorted(rheumadrugs2genes.items(), key=lambda x: x[1])[::-1]
rheumadrugs2genes

In [None]:
compound2gene2trait["Prenylamine"]

In [None]:
compound2gene2trait["Sulpiride"]

In [None]:
compound2gene2trait["Dexamethasone"]

In [None]:
compound2gene["Nicotine"]

In [None]:
compound2gene["Tacrolimus"]

In [None]:
quintuples