In [1]:
# IMPORTS
# base
import pandas as pd
import numpy as np
import json
import ast
import plotly.graph_objects as go

# plots
import seaborn as sns
from matplotlib import pyplot as plt
import plotly.express as px

# importing from scripts
from src.scripts import targets
from src.scripts.targets import join_targets_and_diseases

import warnings

warnings.filterwarnings("ignore")

In [2]:
BINDING_DATASET = "../data/BindingDB_All.tsv"

In [3]:
usecols = [
    "BindingDB Reactant_set_id",
    # "Ligand SMILES",
    # "Ligand InChI",
    # "Ligand InChI Key",
    # "BindingDB MonomerID",
    # "BindingDB Ligand Name",
    # "Target Name",
    "Target Source Organism According to Curator or DataSource",
    # "Ki (nM)",
    # "IC50 (nM)",
    # "Kd (nM)",
    # "EC50 (nM)",
    # "kon (M-1-s-1)",
    # "koff (s-1)",
    # "pH",
    # "Temp (C)",
    # "Curation/DataSource",
    # "Article DOI",
    # "BindingDB Entry DOI",
    # "PMID",
    # "PubChem AID",
    "Patent Number",
    # "Authors",
    # "Institution",
    # "Ligand HET ID in PDB",
    # "PDB ID(s) for Ligand-Target Complex",
    # "PubChem CID",
    # "PubChem SID",
    # "ChEBI ID of Ligand",
    # "ChEMBL ID of Ligand",
    # "DrugBank ID of Ligand",
    # "IUPHAR_GRAC ID of Ligand",
    # "KEGG ID of Ligand",
    # "ZINC ID of Ligand",
    # "Number of Protein Chains in Target (>1 implies a multichain complex)",
    # "BindingDB Target Chain Sequence",
    # "PDB ID(s) of Target Chain",
    "UniProt (SwissProt) Recommended Name of Target Chain",
    "UniProt (SwissProt) Entry Name of Target Chain",
    "UniProt (SwissProt) Primary ID of Target Chain",
    "UniProt (TrEMBL) Primary ID of Target Chain",
    "UniProt (TrEMBL) Submitted Name of Target Chain",
]

In [4]:
df = pd.read_csv(BINDING_DATASET, sep="\t", on_bad_lines="skip", usecols=usecols)

### Sankey diagram that links target class with disease class:

Linking diseases and target classes :

In [5]:
diseases_target_df = join_targets_and_diseases(df)

In [6]:
mapped_names = targets.get_target_class(names_df=df)
temp = mapped_names.value_counts().reset_index()
top_classes = temp.iloc[:10].sort_values("count", ascending=False)
diseases_target_df = diseases_target_df.merge(top_classes, left_on='Target Classes', right_on='UniProt (SwissProt) Recommended Name of Target Chain')

In [7]:
diseases_target_top10 = diseases_target_df.sort_values(by='count', ascending=False).head(10)
diseases_target_top10 = diseases_target_top10.drop(['UniProt (SwissProt) Recommended Name of Target Chain'], axis=1)
diseases_target_top10 = diseases_target_top10.drop(['count'], axis=1)

In [8]:
exploded = diseases_target_top10.explode('Disease Classes')
to_plot = exploded.groupby(['Target Classes', 'Disease Classes']).size().reset_index(name='Count')
to_plot = to_plot.groupby('Target Classes').apply(
    lambda x: x.nlargest(10, 'Count')
).reset_index(drop=True)

target_classes = to_plot["Target Classes"].unique()
disease_classes = to_plot["Disease Classes"].unique()
target_mapping = {name: idx for idx, name in enumerate(target_classes)}
disease_mapping = {name: idx + len(target_classes) for idx, name in enumerate(disease_classes)}
sources = to_plot['Target Classes'].map(target_mapping)
targets = to_plot['Disease Classes'].map(disease_mapping)
values = to_plot['Count']

fig = go.Figure(go.Sankey(
    node=dict(
        pad=30,
        thickness=20,
        line=dict(color="black", width=0.5),
        label=list(target_classes) + list(disease_classes)
    ),
    link=dict(
        source=sources,
        target=targets,
        value=values
    )
))
fig.update_layout(
    title_text="Target to Disease Classes Diagram",
    font_size=10,
    # width=1000, 
    # height=800,
    title_x=0.5,
    plot_bgcolor="rgb(34, 37, 41)", paper_bgcolor="rgb(34, 37, 41)",
    template="plotly_dark"
)
fig.show()

In [9]:
# custom_style = """
# <style>
#   body, html {
#     background-color: rgb(34, 37, 41);
#   }
# </style>
# """

# # Save the HTML file and inject the custom style
# html_content = fig.to_html(full_html=True, include_plotlyjs="cdn")
# # Insert the style in the <head>
# html_content = html_content.replace("</head>", f"{custom_style}</head>")

# # Write to a file
# with open("Sankey_Targets_Diseases.html", "w") as f:
#     f.write(html_content)

# fig.write_image("Sankey_Targets_Diseases.svg")