In [1]:
# IMPORTS
# base
import pandas as pd
import numpy as np
import json

# plots
import seaborn as sns
from matplotlib import pyplot as plt
import plotly.express as px

# importing from scripts
from src.scripts import targets

# importing from scripts
from src.scripts import targets

import warnings

warnings.filterwarnings("ignore")

In [2]:
with open("../src/data/citations.json", "r") as f:
    citations = json.load(f)

In [3]:
names_df = pd.read_csv(
    "../data/BindingDB_All.tsv",
    sep="\t",
    usecols=[
        "UniProt (SwissProt) Recommended Name of Target Chain",
        "UniProt (TrEMBL) Submitted Name of Target Chain",
        "Article DOI"
    ],
)

In [4]:
#colors = sns.color_palette("flare", 2).as_hex()
colors = ['#357266']
mapped_names = targets.get_target_class(names_df=names_df)
plot_df = mapped_names.value_counts().reset_index()
top_classes = plot_df.iloc[:10].sort_values("count", ascending=True)
fig = px.bar(
    top_classes,
    x="count",
    y="UniProt (SwissProt) Recommended Name of Target Chain",
    orientation='h',
    title="Most Important Target Classes",
    log_x=True,
    color_discrete_sequence=colors
)
fig.update_layout(
    yaxis_title="Top 10 Target Classes",
    xaxis_title="Protein count",
    title_x=0.5,
    plot_bgcolor="rgb(34, 37, 41)", paper_bgcolor="rgb(34, 37, 41)",
    template="plotly_dark"
)
fig.update_yaxes(categoryorder="total ascending")
fig.show()

In [145]:
custom_style = """
<style>
  body, html {
    background-color: rgb(34, 37, 41);
  }
</style>
"""

# Save the HTML file and inject the custom style
html_content = fig.to_html(full_html=True, include_plotlyjs="cdn")
# Insert the style in the <head>
html_content = html_content.replace("</head>", f"{custom_style}</head>")

# Write to a file
with open("TargetClasses.html", "w") as f:
    f.write(html_content)

fig.write_image("TargetClasses.svg")

In [5]:
merged = names_df.merge(mapped_names, left_index=True, right_index=True)
merged = merged.dropna(subset="Article DOI")
target_dois = (
    merged.groupby("UniProt (SwissProt) Recommended Name of Target Chain_y")["Article DOI"].apply(set).reset_index()
)
citations_dict = {item["doi"]: item["citation"] for item in citations}
target_citations_exp = []
for index, row in target_dois.iterrows():
    target_class = row['UniProt (SwissProt) Recommended Name of Target Chain_y']
    dois = row["Article DOI"]
    citations_list = []
    for doi in dois: 
        citation_count = citations_dict.get(doi, 0)  # 0 if DOI not found
        citations_list.append({"DOI": doi, "Citations": citation_count})
    target_citations_exp.append(
        {"Target Classes": target_class, "Citations": citations_list}
    )
target_citations_df = pd.DataFrame(target_citations_exp)

In [6]:
target_h_index_exp = []
for index, row in target_citations_df.iterrows():
    target_class = row["Target Classes"]
    citations_counts = [
        entry["Citations"]
        for entry in row["Citations"]
        if entry["Citations"] is not None
    ]
    if citations_counts:
        h_index = sum(
            x >= i + 1 for i, x in enumerate(sorted(citations_counts, reverse=True))
        )
    else:
        h_index = 0
    target_h_index_exp.append(
        {"Target Classes": target_class, "H-Index": h_index}
    )
    target_h_index_df = pd.DataFrame(target_h_index_exp)
    target_h_index_df = target_h_index_df.sort_values(
            by="H-Index", ascending=False
        )
target_h_index_df

Unnamed: 0,Target Classes,H-Index
2105,Neurotransmitter receptor,143
1554,Hormone Receptor,128
1422,Growth Factor Receptor,128
2107,Neurotransmitter transporter,123
2189,Other Protein Kinase,122
...,...,...
441,Beta-arrestin-2,1
2060,Natural resistance-associated macrophage prote...,1
0,(malaria parasite P. vivax) hypothetical protein,1
2658,RNA polymerase sigma factor SigB,0


In [7]:
colors = ['#357266']
fig = px.bar(
    target_h_index_df[0:10],
    x="H-Index",
    y="Target Classes",
    orientation='h',
    title="H-Index per Target Class",
    log_x=True,
    color_discrete_sequence=colors
)
fig.update_layout(
    yaxis_title="Target Classes",
    title_x=0.5,
    plot_bgcolor="rgb(34, 37, 41)", paper_bgcolor="rgb(34, 37, 41)",
    template="plotly_dark"
)
fig.update_yaxes(categoryorder="total ascending")
fig.show()

In [147]:
custom_style = """
<style>
  body, html {
    background-color: rgb(34, 37, 41);
  }
</style>
"""

# Save the HTML file and inject the custom style
html_content = fig.to_html(full_html=True, include_plotlyjs="cdn")
# Insert the style in the <head>
html_content = html_content.replace("</head>", f"{custom_style}</head>")

# Write to a file
with open("HIndex_Target.html", "w") as f:
    f.write(html_content)

fig.write_image("HIndex_Target.svg")

In [7]:
target_citations_df["Article Count"] = target_citations_df["Citations"].apply(len)
target_citations_df["Total Citations"] = target_citations_df["Citations"].apply(
    lambda citations: sum(citation.get("Citations", 0) or 0 for citation in citations)
)
target_citations_df

Unnamed: 0,Target Classes,Citations,Article Count,Total Citations
0,(malaria parasite P. vivax) hypothetical protein,"[{'DOI': '10.1016/j.bmc.2009.06.065', 'Citatio...",1,36
1,"1,25-dihydroxyvitamin D(3) 24-hydroxylase, mit...","[{'DOI': '10.1021/jm5009314', 'Citations': 17}...",11,610
2,"1,3-beta-D-glucan synthase catalytic subunit","[{'DOI': '10.1016/j.bmcl.2012.04.127', 'Citati...",7,213
3,"1,3-beta-glucan synthase","[{'DOI': '10.1016/j.bmcl.2011.03.083', 'Citati...",12,817
4,"1,3-beta-glucan synthase component GSC2","[{'DOI': '10.1021/acsmedchemlett.8b00274', 'Ci...",5,50
...,...,...,...,...
3593,steroid 11beta-monooxygenase,"[{'DOI': '10.1016/j.bmc.2011.01.017', 'Citatio...",1,11
3594,tRNA (guanine-N(1)-)-methyltransferase,"[{'DOI': '10.1021/acs.jmedchem.9b00582', 'Cita...",1,25
3595,tRNA-dihydrouridine(20) synthase [NAD(P)+]-like,"[{'DOI': '10.1038/nrd.2016.266', 'Citations': ...",1,98
3596,threonine--tRNA ligase,"[{'DOI': '10.1021/jf302857x', 'Citations': 30}]",1,30


In [8]:
target_sorted_citations = target_citations_df.sort_values(by="Total Citations", ascending=False)
target_sorted_articles = target_citations_df.sort_values(by="Article Count", ascending=False)

In [None]:
colors = ['#d45e41']
fig = px.bar(
    target_sorted_citations[0:10],
    x="Total Citations",
    y="Target Classes",
    orientation='h',
    title="Total Citations per Target Class",
    log_x=True
)
fig.update_layout(
    yaxis_title="Target Classes",
    template="plotly_dark"
)
fig.update_yaxes(categoryorder="total ascending")
fig.show()

In [107]:
fig = px.bar(
    target_sorted_articles[0:10],
    x="Article Count",
    y="Target Classes",
    orientation='h',
    title="Top 10 Target Classes by Article Count",
    log_x=True
)
fig.update_layout(
    yaxis_title="Target Classes",
    template="plotly_dark"
)
fig.update_yaxes(categoryorder="total ascending")
fig.show()

In [9]:
top_10_citations = target_sorted_citations.head(10)
top_10_articles = target_sorted_articles.head(10)
top_10_union = pd.concat([top_10_citations['Target Classes'], top_10_articles['Target Classes']]).drop_duplicates()
top_10_union.reset_index(drop=True, inplace=True)
corresp_citations = target_sorted_citations[target_sorted_citations['Target Classes'].isin(top_10_union)]
corresp_articles = target_sorted_articles[target_sorted_articles['Target Classes'].isin(top_10_union)]
union = pd.concat([corresp_citations, corresp_articles]).drop_duplicates(subset='Target Classes')
union.reset_index(drop=True, inplace=True)
union

Unnamed: 0,Target Classes,Citations,Article Count,Total Citations
0,Neurotransmitter receptor,"[{'DOI': '10.1021/jm00173a031', 'Citations': 1...",3312,150177
1,Hormone Receptor,"[{'DOI': '10.1016/j.bmcl.2010.10.106', 'Citati...",2419,117972
2,Growth Factor Receptor,"[{'DOI': '10.1016/j.bmcl.2006.03.069', 'Citati...",1807,105239
3,Neurotransmitter transporter,"[{'DOI': '10.1016/j.bmcl.2018.09.013', 'Citati...",1846,90911
4,Other Protein Kinase,"[{'DOI': '10.1016/j.bmcl.2012.01.084', 'Citati...",1446,89760
5,Ion Channel,"[{'DOI': '10.1016/j.bmc.2006.09.051', 'Citatio...",2406,86761
6,Oxidase,"[{'DOI': '10.1016/j.bmc.2018.04.054', 'Citatio...",1979,67964
7,Non Receptor Tyr Kinase,"[{'DOI': '10.1021/acs.jmedchem.7b00637', 'Cita...",1025,64215
8,CDK,"[{'DOI': '10.1007/s00044-009-9233-5', 'Citatio...",844,57943
9,Histone Modifier,"[{'DOI': '10.1016/j.ejmech.2020.112152', 'Cita...",1004,51925


In [148]:
import pandas as pd
import plotly.graph_objects as go

colors = ["#9BC59D", "#44633F"]

fig = go.Figure()
fig.add_trace(
    go.Bar(
        y=union['Target Classes'],
        x=union['Total Citations'],
        name='Total Citations',
        orientation='h',
        marker=dict(color=colors[0]),
        offsetgroup=1
    )
)
fig.add_trace(
    go.Bar(
        y=union['Target Classes'],
        x=union['Article Count'],
        name='Article Count',
        orientation='h',
        marker=dict(color=colors[1]),
        offsetgroup=2,
        xaxis='x2'
    )
)
fig.update_layout(
    title="Distribution of Articles and Citations per Target Class",
    title_x=0.5,
    title_y=0.95,
    yaxis=dict(
        title="Target Classes",
        categoryorder="array",
        categoryarray=union['Target Classes'][::-1] 
    ),
    xaxis=dict(
        title="Total Citations",
        titlefont=dict(color=colors[0]),
        tickfont=dict(color=colors[0])
    ),
    xaxis2=dict(
        title="Article Count",
        titlefont=dict(color=colors[1]),
        tickfont=dict(color=colors[1]),
        overlaying='x', 
        side='top',
        position=1
    ),
    barmode='group',
    legend=dict(x=1, y=1, traceorder='normal', orientation='v'),
    plot_bgcolor="rgb(34, 37, 41)", paper_bgcolor="rgb(34, 37, 41)",
    template='plotly_dark'
)
fig.show()

In [149]:
custom_style = """
<style>
  body, html {
    background-color: rgb(34, 37, 41);
  }
</style>
"""

# Save the HTML file and inject the custom style
html_content = fig.to_html(full_html=True, include_plotlyjs="cdn")
# Insert the style in the <head>
html_content = html_content.replace("</head>", f"{custom_style}</head>")

# Write to a file
with open("Citations_per_Target.html", "w") as f:
    f.write(html_content)

fig.write_image("Citations_per_Target.svg")

In [151]:
# from plotly.subplots import make_subplots
# import plotly.graph_objects as go

# fig1 = px.bar(
#     top_classes,
#     x="count",
#     y="UniProt (SwissProt) Recommended Name of Target Chain",
#     orientation='h',
#     title="Number of proteins in each class (Top 10 Classes)",
#     log_x=True
# )
# fig1.update_layout(yaxis_title="Target Classes", template="plotly_dark")
# fig1.update_yaxes(categoryorder="total ascending")

# fig2 = px.bar(
#     target_h_index_df[0:10],
#     x="H-Index",
#     y="Target Classes",
#     orientation='h',
#     title="Top 10 Target Classes by H-Index",
#     log_x=True
# )
# fig2.update_layout(yaxis_title="Target Classes", template="plotly_dark")
# fig2.update_yaxes(categoryorder="total ascending")

# fig3 = px.bar(
#     target_sorted_citations[0:10],
#     x="Total Citations",
#     y="Target Classes",
#     orientation='h',
#     title="Top 10 Target Classes by Total Citations",
#     log_x=True
# )
# fig3.update_layout(yaxis_title="Target Classes", template="plotly_dark")
# fig3.update_yaxes(categoryorder="total ascending")

# fig4 = px.bar(
#     target_sorted_articles[0:10],
#     x="Article Count",
#     y="Target Classes",
#     orientation='h',
#     title="Top 10 Target Classes by Article Count",
#     log_x=True
# )
# fig4.update_layout(yaxis_title="Target Classes", template="plotly_dark")
# fig4.update_yaxes(categoryorder="total ascending")

# fig = make_subplots(
#     rows=2, cols=2,
#     subplot_titles=(
#         "Number of proteins in each class (Top 10 Classes)",
#         "Top 10 Target Classes by H-Index",
#         "Top 10 Target Classes by Total Citations",
#         "Top 10 Target Classes by Article Count"
#     ),
#     vertical_spacing=0.2, 
#     horizontal_spacing=0.15
# )
# for trace in fig1.data:
#     fig.add_trace(trace, row=1, col=1)

# for trace in fig2.data:
#     fig.add_trace(trace, row=1, col=2)

# for trace in fig3.data:
#     fig.add_trace(trace, row=2, col=1)

# for trace in fig4.data:
#     fig.add_trace(trace, row=2, col=2)

# fig.update_yaxes(categoryorder="total ascending", row=1, col=1)
# fig.update_yaxes(categoryorder="total ascending", row=1, col=2)
# fig.update_yaxes(categoryorder="total ascending", row=2, col=1)
# fig.update_yaxes(categoryorder="total ascending", row=2, col=2)

# fig.update_layout(height=900, width=1400, title_text="Subplots of Protein Target Data", template="plotly_dark")
# fig.show()


In [10]:
target_citations_df = target_citations_df.merge(target_h_index_df, on="Target Classes")
target_citations_df = target_citations_df.sort_values(by="H-Index", ascending=False)
target_citations_df

Unnamed: 0,Target Classes,Citations,Article Count,Total Citations,H-Index
2105,Neurotransmitter receptor,"[{'DOI': '10.1021/jm00173a031', 'Citations': 1...",3312,150177,143
1554,Hormone Receptor,"[{'DOI': '10.1016/j.bmcl.2010.10.106', 'Citati...",2419,117972,128
1422,Growth Factor Receptor,"[{'DOI': '10.1016/j.bmcl.2006.03.069', 'Citati...",1807,105239,128
2107,Neurotransmitter transporter,"[{'DOI': '10.1016/j.bmcl.2018.09.013', 'Citati...",1846,90911,123
2189,Other Protein Kinase,"[{'DOI': '10.1016/j.bmcl.2012.01.084', 'Citati...",1446,89760,122
...,...,...,...,...,...
441,Beta-arrestin-2,"[{'DOI': '10.1007/s00259-006-0324-y', 'Citatio...",1,51,1
2060,Natural resistance-associated macrophage prote...,"[{'DOI': '10.1016/j.bmcl.2012.05.129', 'Citati...",1,26,1
0,(malaria parasite P. vivax) hypothetical protein,"[{'DOI': '10.1016/j.bmc.2009.06.065', 'Citatio...",1,36,1
2658,RNA polymerase sigma factor SigB,"[{'DOI': '10.1021/acs.biochem.6b01267', 'Citat...",1,0,0


In [45]:
# df_melted = target_citations_df[0:10].melt(
#     id_vars='Target Classes',
#     value_vars=['H-Index', 'Total Citations', 'Article Count'],
#     var_name='Metric', 
#     value_name='Value'
# )
# fig = px.bar(
#     df_melted,
#     x='Value',
#     y='Target Classes',
#     orientation='h',
#     color='Metric',
#     barmode='group',
#     title='Comparison of Metrics for Target Classes',
#     template='plotly_dark'
# )
# fig.update_layout(
#     legend_title='Metric'
# )
# fig.update_yaxes(categoryorder="total ascending")
# fig.show()

In [150]:
# import plotly.graph_objects as go

# # Melt the DataFrame as before
# df_melted = target_citations_df[0:10].melt(
#     id_vars='Target Classes',
#     value_vars=['H-Index', 'Total Citations', 'Article Count'],
#     var_name='Metric',
#     value_name='Value'
# )

# # Separate data for different axes
# data_primary = df_melted[df_melted['Metric'] == 'Total Citations']  # For primary x-axis
# data_secondary = df_melted[df_melted['Metric'].isin(['H-Index', 'Article Count'])]  # For secondary x-axis

# # Initialize figure
# fig = go.Figure()

# # Add bars for Total Citations (primary x-axis)
# fig.add_trace(
#     go.Bar(
#         x=data_primary['Value'],
#         y=data_primary['Target Classes'],
#         name='Total Citations',
#         orientation='h',
#         marker=dict(color='lightblue')
#     )
# )

# # Add bars for H-Index and Article Count (secondary x-axis)
# for metric in data_secondary['Metric'].unique():
#     filtered_data = data_secondary[data_secondary['Metric'] == metric]
#     fig.add_trace(
#         go.Bar(
#             x=filtered_data['Value'],
#             y=filtered_data['Target Classes'],
#             name=metric,
#             orientation='h',
#             xaxis="x2"  # Assign these traces to the secondary x-axis
#         )
#     )

# # Update layout to include secondary x-axis
# fig.update_layout(
#     title='Comparison of Metrics for Target Classes',
#     template='plotly_dark',
#     barmode='group',  # Group the bars
#     legend_title='Metric',
#     xaxis=dict(
#         title='Total Citations',,
#         showgrid=True,
#     ),
#     xaxis2=dict(
#         title='H-Index and Article Count',
#         overlaying='x',  # Overlay on the primary x-axis
#         side='top'       # Position above the primary x-axis
#     ),
#     yaxis=dict(
#         categoryorder="total ascending",
#         title='Target Classes'
#     )
# )

# fig.show()
