In [17]:
%load_ext autoreload
%autoreload 2


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [18]:
# Standard Library Imports
import datetime
import json
import os
import sys
import time
from itertools import product

import colorcet as cc
import matplotlib.pyplot as plt
import networkx as nx

# Third-Party Library Imports
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import scipy.sparse as sparse
import seaborn as sns
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

# Access environment variables
python_path = os.getenv('PYTHONPATH')
data_dir = os.getenv('DATA_DIR')
src_dir = os.getenv('SRC_DIR')
output_dir = os.getenv('OUTPUT_DIR')

from src.main_path.MainPathDataAssigner import MainPathDataAssigner
from src.main_path.MainPathReader import MainPathReader


In [33]:
p = data_dir + "/08-analysis-data/2025/df_analysis.pkl"
df = pd.read_pickle(p)

p_labels = output_dir +'/cluster-qualifications_2025/cluster-label-tree/cluster_labels_filtered.json'
cluster_labels_dict = json.load(open(p_labels, 'r'))

p_color = output_dir + '/cluster-qualifications_2025/colors/cluster_color_map_2025.json'
cluster_color_dict = json.load(open(p_color, 'r'))

Cannot be read by networkx, so reading in from scratch

# read in main path data from pajek


In [22]:
path_to_main_path = (
   data_dir + "/10-main-path-graphs/2025/17. Key-Route Local Main Path [10-20] [0.010] of N2 (109)GOOD.net"
)
mp_reader = MainPathReader(path_to_main_path, node_attributes=["eid"])
Gmp = mp_reader.get_graph()


DiGraph with 109 nodes and 119 edges


In [6]:
list(Gmp.nodes(data=True))[0]


('1',
 {'label': 'DeWilde_1982',
  'x': '0.4045',
  'y': '0.0458',
  'size': '0.5000',
  'shape': 'ellipse',
  'eid': '2-s2.0-0019954145'})

# Add Info from df to mp


In [7]:
print(list(Gmp.edges(data=True))[0])


('3', '2', {'weight': 0.025409519})


In [39]:
attr_to_assign = [
    "title",
    "citedby_count",
    "doi",
    "year",
    "cluster_alpha0.3_k10_res0.002",
    "centrality_alpha0.3_k10_res0.002",
]
data_assigner = MainPathDataAssigner(Gmp, df, attr_to_assign,cluster_labels_dict, cluster_color_dict)
Gmp = data_assigner.process_mp()
print("family node:")
print(list(Gmp.nodes(data=True))[9])
print("single node:")
print(list(Gmp.nodes(data=True))[0])

family node:
('10', {'label': 'family_Sindrup_1992_Sindrup_1992_2', 'eid': '2-s2.0-0026606822;2-s2.0-0026576928', 'title': 'The relationship between paroxetine and the sparteine oxidation polymorphism;Pharmacokinetics of the selective serotonin reuptake inhibitor paroxetine: Nonlinearity and relation to the sparteine oxidation polymorphism', 'citedby_count': '245;169', 'doi': 'nan;nan', 'year': '1992;1992', 'cluster_alpha0.3_k10_res0.002': '6', 'centrality_alpha0.3_k10_res0.002': '0.3097024599773926;0.21713932497749894', 'cluster_label': 'SSRIs and the Cytochrome P450 System', 'color': [0.1270824126203872, 0.32798911987262874, 0.8204457680820253]})
single node:
('1', {'label': 'DeWilde_1982', 'eid': '2-s2.0-0019954145', 'title': 'Fluvoxamine and chlorimipramine in endogenous depression', 'citedby_count': '68', 'doi': '10.1016/0165-0327(82)90009-X', 'year': '1982', 'cluster_alpha0.3_k10_res0.002': '14', 'centrality_alpha0.3_k10_res0.002': '0.30485052177028066', 'cluster_label': 'Fluvoxa

In [42]:
# 
# # Then create the interactive plot
plotter = MainPathPlotterInteractive(
    Gmp, 
    cluster_col="cluster_alpha0.3_k10_res0.002", 
    label_col="label",
    hover_cols=["title", "cited_by", "year", "first_author"],
    color_attr="color",
    cluster_label_attr="cluster_label"
)

# Show network visualization
plotter.plot_network_on_timeline_interactive(
    return_fig=False, 
    adjust_overlap=True,
    show_legend=True,
    title="Research Topic Network"
)

# Show timeline view
plotter.plot_timeline_view()

# Save visualizations
#plotter.plot_network_on_timeline_interactive(savingpath="network_visualization.html")
#plotter.plot_timeline_view(savingpath="timeline_visualization.html")

'<div>                        <script type="text/javascript">window.PlotlyConfig = {MathJaxConfig: \'local\'};</script>\n        <script charset="utf-8" src="https://cdn.plot.ly/plotly-3.0.1.min.js"></script>                <div id="b2fe9924-6b7f-45c9-bfcf-32408175d65e" class="plotly-graph-div" style="height:600px; width:1200px;"></div>            <script type="text/javascript">                window.PLOTLYENV=window.PLOTLYENV || {};                                if (document.getElementById("b2fe9924-6b7f-45c9-bfcf-32408175d65e")) {                    Plotly.newPlot(                        "b2fe9924-6b7f-45c9-bfcf-32408175d65e",                        [{"hoverinfo":"text","hovertext":["\\u003cb\\u003etitle:\\u003c\\u002fb\\u003e Fluvoxamine and chlorimipramine in endogenous depression\\u003cbr\\u003e\\u003cb\\u003eyear:\\u003c\\u002fb\\u003e 1982\\u003cbr\\u003e\\u003cb\\u003ecluster_label:\\u003c\\u002fb\\u003e Fluvoxamine for Depression\\u003cbr\\u003e\\u003cb\\u003edoi:\\u003c\\u00

# plot the main path


In [11]:
from src.main_path.MainPathPlotterInteractive import MainPathPlotterInteractive
from src.main_path.MainPathPlotterStatic import MainPathPlotterStatic


# plot all the main paths


In [12]:
import glob

In [13]:
# read in all files in data/08-main-paths that end on .net

files = glob.glob(data_dir +"/10-main-path-graphs/2025/*.net")
main_paths = [MainPathReader(f, node_attributes=["eid"]).get_graph() for f in files]


DiGraph with 117 nodes and 128 edges
DiGraph with 160 nodes and 196 edges
DiGraph with 176 nodes and 223 edges
DiGraph with 128 nodes and 142 edges
DiGraph with 117 nodes and 141 edges
DiGraph with 128 nodes and 144 edges
DiGraph with 109 nodes and 119 edges
DiGraph with 116 nodes and 125 edges
DiGraph with 170 nodes and 211 edges


In [31]:
main_paths_data[0].nodes(data=True)

NodeDataView({'1': {'label': 'Magnussen_1982', 'x': '0.8695', 'y': '0.8266', 'eid': '2-s2.0-0019996341', 'title': 'Treatment of myoclonic syndromes with paroxetine alone or combined with 5‐HTP', 'citedby_count': '13', 'doi': '10.1111/j.1600-0404.1982.tb04525.x', 'year': '1982', 'first_author': 'Magnussen I.', 'cluster_alpha0.3_k10_res0.002': '49', 'centrality_alpha0.3_k10_res0.002': '0.05704960519302766'}, '2': {'label': 'Magnussen_1982_2', 'x': '0.8569', 'y': '0.8107', 'eid': '2-s2.0-0020446870', 'title': 'Paroxetine, a potent selective long-acting inhibitor of synaptosomal 5-HT uptake in mice', 'citedby_count': '43', 'doi': '10.1007/BF01276577', 'year': '1982', 'first_author': 'Magnussen I.', 'cluster_alpha0.3_k10_res0.002': '27', 'centrality_alpha0.3_k10_res0.002': '0.12533142808722617'}, '3': {'label': 'DeWilde_1982', 'x': '0.8771', 'y': '0.5294', 'eid': '2-s2.0-0019954145', 'title': 'Fluvoxamine and chlorimipramine in endogenous depression', 'citedby_count': '68', 'doi': '10.1016/

In [27]:

attr_to_assign = [
    "title",
    "citedby_count",
    "doi",
    "year",
    "first_author",
    "cluster_alpha0.3_k10_res0.002",
    "centrality_alpha0.3_k10_res0.002",
]


main_paths_data = [
    MainPathDataAssigner(mp, df, attr_to_assign).process_mp(
        attr_to_remove=["size", "shape"]
    )
    for mp in main_paths
]
cluster_col = "cluster_alpha0.3_k10_res0.002"
n_clusters = []
figs = []
for f, mp in zip(files, main_paths_data):
    print(f)
    print(mp)
    # Get node attributes
    x = nx.get_node_attributes(mp, "x")
    y = nx.get_node_attributes(mp, "y")

    # Convert coordinates to numeric values
    x_numeric = {k: float(v) for k, v in x.items()}
    y_numeric = {k: float(v) for k, v in y.items()}

    pos = {k: (x_numeric[k], y_numeric[k]) for k in x_numeric}
    mpp_i = MainPathPlotterInteractive(mp, cluster_col=cluster_col, label_col="label")
    fig = mpp_i.plot_network_on_timeline_interactive(
        return_fig=True, adjust_overlap=True, pos=None
    )
    figs.append(fig)
    n_clusters.append(len(set(nx.get_node_attributes(mp, cluster_col).values())))

with open(
    "../output/figures/main-paths/2025/mainpaths_alpha0.3_k10_res0.002.html", "w"
) as f:
    f.write("<html><head><title>Visualization</title></head><body>\n")
    # Write header
    f.write("<h1>cluster_alpha0.3_k10_res0.002</h1>\n")
    for idx, (filename, fig) in enumerate(zip(files, figs)):
        # Assuming 'files' contains some form of identifier or filename you want to display
        f.write(
            f"<h2>{filename.split('/')[-1]}</h2>\n<p>Number of clusters: {n_clusters[idx]}</p>"
        )  # Writing the identifier/filename before the figure
        f.write(fig)  # Writing the figure's HTML string
        f.write("<br>\n")
    f.write("</body></html>")


/Users/jlq293/Projects/Study-1-Bibliometrics/data/10-main-path-graphs/2025/10. Key-Route Local Main Path [30-40] [0.020] of N2 (117).net
DiGraph with 117 nodes and 128 edges


/Users/jlq293/Projects/Study-1-Bibliometrics/data/10-main-path-graphs/2025/9. Key-Route Local Main Path [30-40] [0.200] of N2 (160).net
DiGraph with 160 nodes and 196 edges


/Users/jlq293/Projects/Study-1-Bibliometrics/data/10-main-path-graphs/2025/12. Key-Route Local Main Path [40-60] [0.200] of N2 (176).net
DiGraph with 176 nodes and 223 edges


/Users/jlq293/Projects/Study-1-Bibliometrics/data/10-main-path-graphs/2025/local_mp_4050_0.02_N128.net
DiGraph with 128 nodes and 142 edges


/Users/jlq293/Projects/Study-1-Bibliometrics/data/10-main-path-graphs/2025/15. Key-Route Local Main Path [10-20] [0.100] of N2 (117).net
DiGraph with 117 nodes and 141 edges


/Users/jlq293/Projects/Study-1-Bibliometrics/data/10-main-path-graphs/2025/18. Key-Route Local Main Path [25-50] [0.010] of N2 (128)GOOOD.net
DiGraph with 128 nodes and 144 edges


/Users/jlq293/Projects/Study-1-Bibliometrics/data/10-main-path-graphs/2025/17. Key-Route Local Main Path [10-20] [0.010] of N2 (109)GOOD.net
DiGraph with 109 nodes and 119 edges


/Users/jlq293/Projects/Study-1-Bibliometrics/data/10-main-path-graphs/2025/Local Main Path [40-50] [0.200] of N6 (116).net
DiGraph with 116 nodes and 125 edges


/Users/jlq293/Projects/Study-1-Bibliometrics/data/10-main-path-graphs/2025/8. Key-Route Local Main Path [30-50] [0.200] of N2 (170).net
DiGraph with 170 nodes and 211 edges


FileNotFoundError: [Errno 2] No such file or directory: '../output/figures/main-paths/2025/mainpaths_alpha0.3_k10_res0.002.html'

<Figure size 640x480 with 0 Axes>

# cluster_alpha0.3_k20_res0.005.pkl


In [18]:
parameter = "alpha0.3_k20_res0.005"
df = pd.read_pickle(f"../data/06-clustered-df/{parameter}.pkl")

attr_to_assign = [
    "title",
    "citedby_count",
    "doi",
    "year",
    "unique_auth_year",
    f"cluster_{parameter}",
    f"centrality_{parameter}",
]


main_paths_data = [
    MainPathDataAssigner(mp, df, attr_to_assign).process_mp() for mp in main_paths
]

n_clusters = []
figs = []
for f, mp in zip(files, main_paths_data):
    print(f)
    print(mp)
    mpp_i = MainPathPlotterInteractive(mp, f"cluster_{parameter}", "label")
    fig = mpp_i.plot_network_on_timeline_interactive(return_fig=True)
    figs.append(fig)
    n_clusters.append(
        len(set(nx.get_node_attributes(mp, f"cluster_{parameter}").values()))
    )

with open(f"../output/figures/main-paths/mainpaths_{parameter}.html", "w") as f:
    f.write("<html><head><title>Visualization</title></head><body>\n")
    # Write header
    f.write(f"<h1>cluster: {parameter}</h1>\n")
    for idx, (filename, fig) in enumerate(zip(files, figs)):
        # Assuming 'files' contains some form of identifier or filename you want to display
        f.write(
            f"<h2>{filename.split('/')[-1]}</h2>\n<p>Number of clusters: {n_clusters[idx]}</p>"
        )  # Writing the identifier/filename before the figure
        f.write(fig)  # Writing the figure's HTML string
        f.write("<br>\n")
    f.write("</body></html>")


../data/08-main-paths/7. Key-Route Global Main Path [50-150] of N3 (192).net
DiGraph with 192 nodes and 244 edges


../data/08-main-paths/4. Key-Route Global Main Path [125-175] of N3 (166).net
DiGraph with 166 nodes and 202 edges


../data/08-main-paths/9. Key-Route Global Main Path [100-150] of N3 (169).net
DiGraph with 169 nodes and 203 edges


../data/08-main-paths/8. Key-Route Global Main Path [75-150] of N3 (185).net
DiGraph with 185 nodes and 228 edges


../data/08-main-paths/6. Key-Route Global Main Path [100-200] of N3 (229).net
DiGraph with 229 nodes and 301 edges


<Figure size 640x480 with 0 Axes>

# Subset


In [19]:
subset = [
    "12. Key-Route Global Main Path [150-200] of N2 (184).net",
    "5. Key-Route Global Main Path [125-175] of N2 (166).net",
]


In [20]:
# get all files in
p = "../data/06-clustered-df"
df_paths = glob.glob(f"{p}/*.pkl")
print(df_paths)


['../data/06-clustered-df/alpha0.3_k20_res0.005.pkl', '../data/06-clustered-df/alpha0.3_k10_res0.002.pkl', '../data/06-clustered-df/alpha0.3_k20_res0.006.pkl', '../data/06-clustered-df/alpha0.3_k20_res0.01.pkl', '../data/06-clustered-df/alpha0.5_k20_res0.006.pkl', '../data/06-clustered-df/alpha0.3_k15_res0.004.pkl']


In [21]:
files


['../data/08-main-paths/7. Key-Route Global Main Path [50-150] of N3 (192).net',
 '../data/08-main-paths/4. Key-Route Global Main Path [125-175] of N3 (166).net',
 '../data/08-main-paths/9. Key-Route Global Main Path [100-150] of N3 (169).net',
 '../data/08-main-paths/8. Key-Route Global Main Path [75-150] of N3 (185).net',
 '../data/08-main-paths/6. Key-Route Global Main Path [100-200] of N3 (229).net']

In [23]:
for df_path in df_paths:
    df = pd.read_pickle(df_path)
    partition = df_path.split("/")[-1].split(".pkl")[0]
    print(partition)

    attr_to_assign = [
        "title",
        "citedby_count",
        "doi",
        "year",
        f"cluster_{partition}",
        f"centrality_{partition}",
    ]

    main_paths_data = [
        MainPathDataAssigner(mp, df, attr_to_assign).process_mp() for mp in main_paths
    ]

    n_clusters = []
    figs = []
    for f, mp in zip(files, main_paths_data):
        if f.split("/")[-1] not in subset:
            print(f"skipping {f} ")
            continue
        print(f"Computing {f} ")
        mpp_i = MainPathPlotterInteractive(mp, f"cluster_{partition}", "label")
        fig = mpp_i.plot_network_on_timeline_interactive(return_fig=True)
        figs.append(fig)
        n_clusters.append(
            len(set(nx.get_node_attributes(mp, f"cluster_{partition}").values()))
        )

    with open(
        f"../output/figures/main-paths/SUBSET_mainpaths_{partition}.html", "w"
    ) as f:
        f.write("<html><head><title>Visualization</title></head><body>\n")
        # Write header
        f.write(f"<h1>cluster: {partition}</h1>\n")
        for idx, (filename, fig) in enumerate(zip(subset, figs)):
            # Assuming 'files' contains some form of identifier or filename you want to display
            f.write(
                f"<h2>{filename.split('/')[-1]}</h2>\n<p>Number of clusters: {n_clusters[idx]}</p>"
            )  # Writing the identifier/filename before the figure
            f.write(fig)  # Writing the figure's HTML string
            f.write("<br>\n")
        f.write("</body></html>")


alpha0.3_k20_res0.005
skipping ../data/08-main-paths/7. Key-Route Global Main Path [50-150] of N3 (192).net 
skipping ../data/08-main-paths/4. Key-Route Global Main Path [125-175] of N3 (166).net 
skipping ../data/08-main-paths/9. Key-Route Global Main Path [100-150] of N3 (169).net 
skipping ../data/08-main-paths/8. Key-Route Global Main Path [75-150] of N3 (185).net 
skipping ../data/08-main-paths/6. Key-Route Global Main Path [100-200] of N3 (229).net 
alpha0.3_k10_res0.002
skipping ../data/08-main-paths/7. Key-Route Global Main Path [50-150] of N3 (192).net 
skipping ../data/08-main-paths/4. Key-Route Global Main Path [125-175] of N3 (166).net 
skipping ../data/08-main-paths/9. Key-Route Global Main Path [100-150] of N3 (169).net 
skipping ../data/08-main-paths/8. Key-Route Global Main Path [75-150] of N3 (185).net 
skipping ../data/08-main-paths/6. Key-Route Global Main Path [100-200] of N3 (229).net 
alpha0.3_k20_res0.006
skipping ../data/08-main-paths/7. Key-Route Global Main Pa

# FINAL


In [106]:
cluster_label_dict = "/Users/jlq293/Projects/Study-1-Bibliometrics/output/orlando/ORLANDOSingleSolExplorer_alpha0.3_k20_res0.005.xlsx"
cluster_label_dict = pd.read_excel(cluster_label_dict, sheet_name="summary")
cluster_label_dict = dict(
    zip(cluster_label_dict["Cluster"], cluster_label_dict["Given Label"])
)
cluster_label_dict


{0: 'ocd, treatment, extension',
 1: 'pregnancy, safety, special population',
 2: 'determine drugs, detection, Pharmacology; Analytical Chemistry, clinical, plasma, analytical methods for the simultaneous determination and quantification of antidepressant drugs in various biological samples',
 3: 'chronic unpredictable mild stress in mice, animal models, natural medicine, pharmacology, animal studies',
 4: 'Serotonin modulation and antidepressant mechanisms, receptor, in vivo, animal models',
 5: 'Pharmacokinetic interactions between antidepressants and the cytochrome P450 system; Pharmacology; Drug metabolism',
 6: 'Environmental impact of fluoxetine on aquatic organisms, particularly fish, ecotoxicology, fluoxetine',
 7: 'Environmental Science; wastewater',
 8: 'Developmental and prenatal exposure to fluoxetine and maternal stress; pharmacology, animal models,',
 9: 'prescription patterns, public health, ',
 10: 'ptsd, indication extension',
 11: 'sexual dysfunction, safety, side eff

In [92]:
import glob

# read in all files in data/08-main-paths that end on .net
files = glob.glob("../data/08-main-paths/*.net")
main_paths = [MainPathReader(f, node_attributes=["eid"]).get_graph() for f in files]

main_path_dict = {k: v for k, v in zip(files, main_paths)}
main_path_dict.keys()


DiGraph with 192 nodes and 244 edges
DiGraph with 166 nodes and 202 edges
DiGraph with 169 nodes and 203 edges
DiGraph with 185 nodes and 228 edges
DiGraph with 229 nodes and 301 edges


dict_keys(['../data/08-main-paths/7. Key-Route Global Main Path [50-150] of N3 (192).net', '../data/08-main-paths/4. Key-Route Global Main Path [125-175] of N3 (166).net', '../data/08-main-paths/9. Key-Route Global Main Path [100-150] of N3 (169).net', '../data/08-main-paths/8. Key-Route Global Main Path [75-150] of N3 (185).net', '../data/08-main-paths/6. Key-Route Global Main Path [100-200] of N3 (229).net'])

In [93]:
Gmp = main_path_dict[
    "../data/08-main-paths/9. Key-Route Global Main Path [100-150] of N3 (169).net"
]

p = "../data/06-clustered-df/alpha0.3_k20_res0.005.pkl"
df = pd.read_pickle(p)

p_summary = "../output/tables/cluster-explorer/FinalSelect/SingleSolExplorer_alpha0.3_k20_res0.005.xlsx"


In [94]:
partition = "alpha0.3_k20_res0.005"
cluster_col = f"cluster_{partition}"
attr_to_assign = [
    "title",
    "citedby_count",
    "doi",
    "year",
    f"cluster_{partition}",
    f"centrality_{partition}",
]

mp = MainPathDataAssigner(Gmp, df, attr_to_assign).process_mp()


In [105]:
mp_clusters = set(nx.get_node_attributes(mp, cluster_col).values())

mp_cluster_label_dict = {
    k: v for k, v in cluster_label_dict.items() if str(k) in mp_clusters
}

mp_cluster_label_dict


{'60', '36', '18', '27', '7;6', '2', '30', '8', '110', '61', '116', '54', '29', '64', '3', '54;19;54', '73', '40', '19', '24', '5', '5;64', '79', '13', '6', '4', '21', '1', '66'}


{1: 'pregnancy, safety, special population',
 2: 'determine drugs, detection, Pharmacology; Analytical Chemistry, clinical, plasma, analytical methods for the simultaneous determination and quantification of antidepressant drugs in various biological samples',
 3: 'chronic unpredictable mild stress in mice, animal models, natural medicine, pharmacology, animal studies',
 4: 'Serotonin modulation and antidepressant mechanisms, receptor, in vivo, animal models',
 5: 'Pharmacokinetic interactions between antidepressants and the cytochrome P450 system; Pharmacology; Drug metabolism',
 6: 'Environmental impact of fluoxetine on aquatic organisms, particularly fish, ecotoxicology, fluoxetine',
 8: 'Developmental and prenatal exposure to fluoxetine and maternal stress; pharmacology, animal models,',
 13: 'children and adolescents, Pediatrics, special population',
 18: 'cardiovascular, safety, treatment',
 19: 'paroxetine, treatment, ',
 21: 'pain, indication extension, treatment',
 24: 'pharma

In [149]:
mp_cluster_label_dict = {
    1: "Pregnancy",
    2: "Pharmaceutical Chemistry; Antidepressant Detection",
    3: "Animial models; chronic unpredictable mild stress in mice",
    4: "Serotonin Regulation mechanisms; animal models",
    5: "Pharmacokinetics; cytochrome P450 system; Drug metabolism",
    6: "Aquatic toxicology",
    7: "Environmental Science; wastewater",
    8: "neonatology; animal models,",
    13: "Pediatric Use, special population",
    18: "cardiovascular risk",
    19: "paroxetine depression treatment",
    21: "pain management",
    24: "Pychopharmacology; reviews",
    27: "Pharmacogenomics;Gene Expression; Neurology",
    29: "Pharmacogenomics; Neurogenic Mechanisms; animal models",
    30: "Fear; animal studies",
    36: "serotonin binding; animal studies,",
    40: "covid-19",
    54: "fluvoxamine depression treatment",
    61: "schizophrenia; adjunctive treatment",
    60: "venlafaxine; depression treatment",
    64: "Clinical Pharmacogenetics; depression treatment",
    66: "escitalopram depression treatment",
    73: "health economics",
    79: "citalopram depression treatment",
    110: "headache treatment",
    116: "agomelatine depression treatment",
}

# makle all in title format
mp_cluster_label_dict = {k: v.title() for k, v in mp_cluster_label_dict.items()}
mp_cluster_label_dict


{1: 'Pregnancy',
 2: 'Pharmaceutical Chemistry; Antidepressant Detection',
 3: 'Animial Models; Chronic Unpredictable Mild Stress In Mice',
 4: 'Serotonin Regulation Mechanisms; Animal Models',
 5: 'Pharmacokinetics; Cytochrome P450 System; Drug Metabolism',
 6: 'Aquatic Toxicology',
 7: 'Environmental Science; Wastewater',
 8: 'Neonatology; Animal Models,',
 13: 'Pediatric Use, Special Population',
 18: 'Cardiovascular Risk',
 19: 'Paroxetine Depression Treatment',
 21: 'Pain Management',
 24: 'Pychopharmacology; Reviews',
 27: 'Pharmacogenomics;Gene Expression; Neurology',
 29: 'Pharmacogenomics; Neurogenic Mechanisms; Animal Models',
 30: 'Fear; Animal Studies',
 36: 'Serotonin Binding; Animal Studies,',
 40: 'Covid-19',
 54: 'Fluvoxamine Depression Treatment',
 61: 'Schizophrenia; Adjunctive Treatment',
 60: 'Venlafaxine; Depression Treatment',
 64: 'Clinical Pharmacogenetics; Depression Treatment',
 66: 'Escitalopram Depression Treatment',
 73: 'Health Economics',
 79: 'Citalopram

In [144]:
mp_clusters = set(nx.get_node_attributes(mp, cluster_col).values())

for node, data in mp.nodes(data=True):
    if data["label"] == "Magnussen_1982":
        mp.nodes[node][cluster_col] = "36"
    cluster = data[cluster_col]
    try:
        label = mp_cluster_label_dict.get(int(cluster), "Unknown")
    except:
        print(cluster)
        fam_cluster = cluster.split(";")
        label = " ::: ".join(
            [mp_cluster_label_dict.get(int(c), "Unknown") for c in fam_cluster]
        )
        mp.nodes[node][cluster_col] = "Family"
        print(label)
    mp.nodes[node]["full_label"] = str(cluster) + "; " + label


5;64
Pharmacokinetics; Cytochrome P450 System; Drug Metabolism ::: Clinical Pharmacogenetics; Depression Treatment
54;19;54
Fluvoxamine Depression Treatment ::: Paroxetine Depression Treatment ::: Fluvoxamine Depression Treatment
7;6
Environmental Science; Wastewater ::: Aquatic Toxicology


In [148]:
# safe to file grpahml
nx.write_graphml(mp, "../data/08-main-paths/final_mp_clustered.graphml")
nx.write_graphml(mp, "../output/orlando/final_mp_clustered.graphml")


In [145]:
list(mp.nodes(data=True))[0]


('1',
 {'label': 'Magnussen_1982',
  'eid': '2-s2.0-0019996341',
  'title': 'Treatment of myoclonic syndromes with paroxetine alone or combined with 5‐HTP',
  'citedby_count': '13',
  'doi': '10.1111/j.1600-0404.1982.tb04525.x',
  'year': '1982',
  'cluster_alpha0.3_k20_res0.005': '36',
  'centrality_alpha0.3_k20_res0.005': '0.05427762074604986',
  'full_label': '36; Serotonin Binding; Animal Studies,'})

In [146]:
# Get node attributes
x = nx.get_node_attributes(mp, "x")
y = nx.get_node_attributes(mp, "y")

# Convert coordinates to numeric values
x_numeric = {k: float(v) for k, v in x.items()}
y_numeric = {k: float(v) for k, v in y.items()}

pos = {k: (x_numeric[k], y_numeric[k]) for k in x_numeric}
mpp_i = MainPathPlotterInteractive(
    mp,
    cluster_col=cluster_col,
    label_col=cluster_col,
    hover_cols=["title", "citedby_count", "full_label"],
)
fig = mpp_i.plot_network_on_timeline_interactive(
    return_fig=True, adjust_overlap=True, pos=None
)
n_clusters = len(set(nx.get_node_attributes(mp, cluster_col).values()))

with open(f"../output/figures/main-paths/orlando_mainpaths_{partition}.html", "w") as f:
    f.write("<html><head><title>Visualization</title></head><body>\n")
    # Write header
    f.write(f"<h1>cluster_{partition}</h1>\n")
    # Assuming 'files' contains some form of identifier or filename you want to display
    f.write(
        f"<h2>{partition}</h2>\n<p>Number of clusters: {n_clusters}</p>"
    )  # Writing the identifier/filename before the figure
    f.write(fig)  # Writing the figure's HTML string
    f.write("<br>\n")
    f.write("</body></html>")


<Figure size 640x480 with 0 Axes>