In [1]:
# IMPORTS
# base
import pandas as pd
import numpy as np
import json

# plots
import seaborn as sns
from matplotlib import pyplot as plt
import plotly.express as px
import plotly.graph_objects as go

# importing from scripts
from src.scripts import targets
from src.scripts.citations import calculate_H_index_targets

import warnings
warnings.filterwarnings("ignore")

In [2]:
with open("../src/data/citations.json", "r") as f:
    citations = json.load(f)

In [3]:
names_df = pd.read_csv(
    "../data/BindingDB_All.tsv",
    sep="\t",
    usecols=[
        "UniProt (SwissProt) Recommended Name of Target Chain",
        "UniProt (TrEMBL) Submitted Name of Target Chain",
        "Article DOI"
    ],
)

In [4]:
colors = ['#357266']
mapped_names = targets.get_target_class(names_df=names_df)
plot_df = mapped_names.value_counts().reset_index()
top_classes = plot_df.iloc[:10].sort_values("count", ascending=True)
fig = px.bar(
    top_classes,
    x="count",
    y="UniProt (SwissProt) Recommended Name of Target Chain",
    orientation='h',
    title="Most Important Target Classes",
    log_x=True,
    color_discrete_sequence=colors
)
fig.update_layout(
    yaxis_title="Top 10 Target Classes",
    xaxis_title="Protein count",
    title_x=0.5,
    plot_bgcolor="rgb(34, 37, 41)", paper_bgcolor="rgb(34, 37, 41)",
    template="plotly_dark"
)
fig.update_yaxes(categoryorder="total ascending")
fig.show()

In [5]:
others_count = plot_df.iloc[10:]["count"].sum()
others_row = pd.DataFrame({
    "UniProt (SwissProt) Recommended Name of Target Chain": ["Other Target Classes"],
    "count": [others_count]
})
top_classes_with_others = pd.concat([top_classes, others_row], ignore_index=True)

fig = px.pie(
    top_classes_with_others,
    values="count",
    names="UniProt (SwissProt) Recommended Name of Target Chain",
    title="Most Important Target Classes"
)
fig.update_layout(
    title_x=0.5,
    plot_bgcolor="rgb(34, 37, 41)", paper_bgcolor="rgb(34, 37, 41)",
    template="plotly_dark"
)
fig.show()

In [None]:
# custom_style = """
# <style>
#   body, html {
#     background-color: rgb(34, 37, 41);
#   }
# </style>
# """

# # Save the HTML file and inject the custom style
# html_content = fig.to_html(full_html=True, include_plotlyjs="cdn")
# # Insert the style in the <head>
# html_content = html_content.replace("</head>", f"{custom_style}</head>")

# # Write to a file
# with open("TargetClasses.html", "w") as f:
#     f.write(html_content)

# fig.write_image("TargetClasses.svg")

In [6]:
merged = names_df.merge(mapped_names, left_index=True, right_index=True)
merged = merged.dropna(subset="Article DOI")
target_dois = (
    merged.groupby("UniProt (SwissProt) Recommended Name of Target Chain_y")["Article DOI"].apply(set).reset_index()
)
citations_dict = {item["doi"]: item["citation"] for item in citations}
target_citations_exp = []
for index, row in target_dois.iterrows():
    target_class = row['UniProt (SwissProt) Recommended Name of Target Chain_y']
    dois = row["Article DOI"]
    citations_list = []
    for doi in dois: 
        citation_count = citations_dict.get(doi, 0)  # 0 if DOI not found
        citations_list.append({"DOI": doi, "Citations": citation_count})
    target_citations_exp.append(
        {"Target Classes": target_class, "Citations": citations_list}
    )
target_citations_df = pd.DataFrame(target_citations_exp)

In [7]:
target_h_index_df = calculate_H_index_targets(target_citations_df)

In [8]:
colors = ['#357266']
fig = px.bar(
    target_h_index_df[0:10],
    x="H-Index",
    y="Target Classes",
    orientation='h',
    title="H-Index per Target Class",
    log_x=True,
    color_discrete_sequence=colors
)
fig.update_layout(
    yaxis_title="Target Classes",
    title_x=0.5,
    plot_bgcolor="rgb(34, 37, 41)", paper_bgcolor="rgb(34, 37, 41)",
    template="plotly_dark"
)
fig.update_yaxes(categoryorder="total ascending")
fig.show()

In [None]:
# custom_style = """
# <style>
#   body, html {
#     background-color: rgb(34, 37, 41);
#   }
# </style>
# """

# # Save the HTML file and inject the custom style
# html_content = fig.to_html(full_html=True, include_plotlyjs="cdn")
# # Insert the style in the <head>
# html_content = html_content.replace("</head>", f"{custom_style}</head>")

# # Write to a file
# with open("HIndex_Target.html", "w") as f:
#     f.write(html_content)

# fig.write_image("HIndex_Target.svg")

In [9]:
target_citations_df["Article Count"] = target_citations_df["Citations"].apply(len)
target_citations_df["Total Citations"] = target_citations_df["Citations"].apply(
    lambda citations: sum(citation.get("Citations", 0) or 0 for citation in citations)
)
target_sorted_citations = target_citations_df.sort_values(by="Total Citations", ascending=False)
target_sorted_articles = target_citations_df.sort_values(by="Article Count", ascending=False)

In [10]:
colors = ['#d45e41']
fig = px.bar(
    target_sorted_citations[0:10],
    x="Total Citations",
    y="Target Classes",
    orientation='h',
    title="Total Citations per Target Class",
    log_x=True
)
fig.update_layout(
    yaxis_title="Target Classes",
    title_x=0.5,
    plot_bgcolor="rgb(34, 37, 41)", paper_bgcolor="rgb(34, 37, 41)",
    template="plotly_dark"
)
fig.update_yaxes(categoryorder="total ascending")
fig.show()

In [11]:
fig = px.bar(
    target_sorted_articles[0:10],
    x="Article Count",
    y="Target Classes",
    orientation='h',
    title="Top 10 Target Classes by Article Count",
    log_x=True
)
fig.update_layout(
    yaxis_title="Target Classes",
    title_x=0.5,
    plot_bgcolor="rgb(34, 37, 41)", paper_bgcolor="rgb(34, 37, 41)",
    template="plotly_dark"
)
fig.update_yaxes(categoryorder="total ascending")
fig.show()

In [12]:
top_10_citations = target_sorted_citations.head(10)
top_10_articles = target_sorted_articles.head(10)
top_10_union = pd.concat([top_10_citations['Target Classes'], top_10_articles['Target Classes']]).drop_duplicates()
top_10_union.reset_index(drop=True, inplace=True)
corresp_citations = target_sorted_citations[target_sorted_citations['Target Classes'].isin(top_10_union)]
corresp_articles = target_sorted_articles[target_sorted_articles['Target Classes'].isin(top_10_union)]
union = pd.concat([corresp_citations, corresp_articles]).drop_duplicates(subset='Target Classes')
union.reset_index(drop=True, inplace=True)

In [13]:
colors = ["#9BC59D", "#44633F"]

fig = go.Figure()
fig.add_trace(
    go.Bar(
        y=union['Target Classes'],
        x=union['Total Citations'],
        name='Total Citations',
        orientation='h',
        marker=dict(color=colors[0]),
        offsetgroup=1
    )
)
fig.add_trace(
    go.Bar(
        y=union['Target Classes'],
        x=union['Article Count'],
        name='Article Count',
        orientation='h',
        marker=dict(color=colors[1]),
        offsetgroup=2,
        xaxis='x2'
    )
)
fig.update_layout(
    title="Distribution of Articles and Citations per Target Class",
    title_x=0.5,
    title_y=0.95,
    yaxis=dict(
        title="Target Classes",
        categoryorder="array",
        categoryarray=union['Target Classes'][::-1] 
    ),
    xaxis=dict(
        title="Total Citations",
        titlefont=dict(color=colors[0]),
        tickfont=dict(color=colors[0])
    ),
    xaxis2=dict(
        title="Article Count",
        titlefont=dict(color=colors[1]),
        tickfont=dict(color=colors[1]),
        overlaying='x', 
        side='top',
        position=1
    ),
    barmode='group',
    legend=dict(x=1, y=1, traceorder='normal', orientation='v'),
    plot_bgcolor="rgb(34, 37, 41)", paper_bgcolor="rgb(34, 37, 41)",
    template='plotly_dark'
)
fig.show()

In [None]:
# custom_style = """
# <style>
#   body, html {
#     background-color: rgb(34, 37, 41);
#   }
# </style>
# """

# # Save the HTML file and inject the custom style
# html_content = fig.to_html(full_html=True, include_plotlyjs="cdn")
# # Insert the style in the <head>
# html_content = html_content.replace("</head>", f"{custom_style}</head>")

# # Write to a file
# with open("Citations_per_Target.html", "w") as f:
#     f.write(html_content)

# fig.write_image("Citations_per_Target.svg")

In [15]:
doi_metadata = pd.read_csv("../src/data/metadata.csv").dropna()
doi_year_dict = dict(zip(doi_metadata["Article DOI"], doi_metadata["year"]))

In [19]:
def get_years_from_citations(citations_list, doi_year_dict):
    years = []
    for citation in citations_list:
        doi = citation.get('DOI')
        if doi in doi_year_dict:
            years.append(doi_year_dict[doi])
        else:
            years.append(None)
    return years
target_citations_df['Publication Years'] = target_citations_df['Citations'].apply(
    lambda x: get_years_from_citations(x, doi_year_dict)
)

In [69]:
top10 = top_classes['UniProt (SwissProt) Recommended Name of Target Chain'].to_list()
target_citations_10 = target_citations_df[target_citations_df['Target Classes'].isin(top10)]
non_top10 = target_citations_df[~target_citations_df['Target Classes'].isin(top10)]
non_top10['Target Classes'] = 'Others'
target_citations_10 = pd.concat([target_citations_10, non_top10])


In [70]:
expanded_rows = []
for index, row in target_citations_10.iterrows():
    citations = row["Citations"]
    years = row["Publication Years"]
    target_class = row["Target Classes"]
    for year, citation in zip(years, citations):
        expanded_rows.append({"Year": year, "Target Classes": target_class, "Citations": citation})
expanded_df = pd.DataFrame(expanded_rows)
def extract_citations(citation_dict):
    if isinstance(citation_dict, dict) and 'Citations' in citation_dict:
        return citation_dict['Citations']
    else:
        return 0
expanded_df["Citations"] = expanded_df["Citations"].apply(extract_citations)
expanded_df["Citations"] = pd.to_numeric(expanded_df["Citations"], errors='coerce')
aggregated_df = expanded_df.groupby(["Year", "Target Classes"], as_index=False)["Citations"].sum()

In [72]:
fig = px.area(
    aggregated_df,
    x="Year",
    y="Citations",
    color="Target Classes",
    line_group="Target Classes",
    title="Citations Over Time by Target Class",
    labels={"Year": "Publication Year", "Citations": "Citation Count"},
)
fig.update_traces(
    selector=dict(name='Others'),
    visible='legendonly'
)
fig.update_layout(title_x=0.5, plot_bgcolor="rgb(34, 37, 41)", paper_bgcolor="rgb(34, 37, 41)",
    template="plotly_dark")
fig.show()

In [None]:
# custom_style = """
# <style>
#   body, html {
#     background-color: rgb(34, 37, 41);
#   }
# </style>
# """

# # Save the HTML file and inject the custom style
# html_content = fig.to_html(full_html=True, include_plotlyjs="cdn")
# # Insert the style in the <head>
# html_content = html_content.replace("</head>", f"{custom_style}</head>")

# # Write to a file
# with open("Timeseries_Citations_Targets.html", "w") as f:
#     f.write(html_content)

# fig.write_image("Timeseries_Citations_Targets.svg")