In [15]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [16]:
# Standard Library Imports
import datetime
import json
import os
import sys
import time
from itertools import product

import colorcet as cc
import matplotlib.pyplot as plt
import networkx as nx

# Third-Party Library Imports
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import scipy.sparse as sparse
import seaborn as sns
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

# Access environment variables
python_path = os.getenv("PYTHONPATH")
data_dir = os.getenv("DATA_DIR")
src_dir = os.getenv("SRC_DIR")
output_dir = os.getenv("OUTPUT_DIR")

from src.main_path.MainPathDataAssigner import MainPathDataAssigner
from src.main_path.MainPathReader import MainPathReader
from src.main_path.MainPathPlotterInteractive import MainPathPlotterInteractive

In [17]:
p = data_dir + "/08-analysis-data/df_analysis.pkl"
df = pd.read_pickle(p)

p_labels = (
    output_dir
    + "/cluster-qualifications/cluster-label-tree/cluster_labels_filtered.json"
)
cluster_labels_dict = json.load(open(p_labels, "r"))

p_color = output_dir + "/cluster-qualifications/colors/cluster_color_map_2025.json"
cluster_color_dict = json.load(open(p_color, "r"))

Cannot be read by networkx, so reading in from scratch

# read in main path data from pajek


In [18]:
path_to_main_path = (
    data_dir
    + "/10-main-path-graphs/25. Key-Route Global Main Path [75-125] of N2 (155).net"
)

mp_reader = MainPathReader(path_to_main_path, node_attributes=["eid"])
Gmp = mp_reader.get_graph()

DiGraph with 155 nodes and 188 edges


# Add Info from df to mp


In [19]:
attr_to_assign = [
    "title",
    "citedby_count",
    "doi",
    "year",
    "cluster_alpha0.3_k10_res0.002",
    # "centrality_alpha0.3_k10_res0.002",
    "authors_apa_string",
]
data_assigner = MainPathDataAssigner(
    Gmp, df, attr_to_assign, cluster_labels_dict, cluster_color_dict
)
Gmp = data_assigner.process_mp()
print("family node:")
print(list(Gmp.nodes(data=True))[9])
print("single node:")
print(list(Gmp.nodes(data=True))[0])

family node:
('10', {'label': 'Overmars_1983', 'eid': '2-s2.0-0021077683', 'title': 'Fluvoxamine maleate: metabolism in man', 'citedby_count': '65', 'doi': '10.1007/BF03188757', 'year': '1983', 'cluster_alpha0.3_k10_res0.002': '6', 'authors_apa_string': 'Overmars et al.', 'cluster_label': 'SSRIs and the Cytochrome P450 System', 'color': [0.16824062480658128, 0.39474741839955463, 0.8798207680820249]})
single node:
('1', {'label': 'family_Boeck_1982_Pedersen_1982', 'eid': '2-s2.0-0019989539;2-s2.0-0020047901', 'title': 'Citalopram, a selective serotonin reuptake inhibitor: Clinical antidepressive and long-term effect - a phase II study;Studies on Acute Toxicity and Drug Levels of Citalopram in the Dog', 'citedby_count': '62;34', 'doi': '10.1007/BF00464566;10.1111/j.1600-0773.1982.tb00959.x', 'year': '1982;1982', 'cluster_alpha0.3_k10_res0.002': '19;48', 'authors_apa_string': 'Pedersen et al.;Boeck et al.', 'cluster_label': 'Escitalopram for Depression;QTC Prolongation', 'color': [0.41903

# Save some metadata


In [20]:
cluster_occurence_dict = {}
for node_id, node_data in Gmp.nodes(data=True):
    cluster = node_data["cluster_alpha0.3_k10_res0.002"]
    if ";" in cluster:
        cluster = cluster.split(";")[0]
    cluster_occurence_dict[cluster] = cluster_occurence_dict.get(cluster, 0) + 1

print(f"Nr of unique clusters: {len(cluster_occurence_dict.keys())} \n")

# Sort by occurrence (descending) and print
for cluster, count in sorted(
    cluster_occurence_dict.items(), key=lambda x: x[1], reverse=True
):
    print(f"{cluster}: {count} ++++++ label: {cluster_labels_dict[str(cluster)]}")


# save to output
file = output_dir + "/descriptive-stats-logs/main_path_cluster_occurence.json"

with open(file, "w") as f:
    json.dump(cluster_occurence_dict, f, indent=2)

Nr of unique clusters: 23 

19: 36 ++++++ label: Escitalopram for Depression
0: 31 ++++++ label: Aquatic Ecotoxicology
6: 23 ++++++ label: SSRIs and the Cytochrome P450 System
38: 11 ++++++ label: Fluvoxamine for Covid 19
14: 10 ++++++ label: Fluvoxamine for Depression
10: 9 ++++++ label: Risk of Prenatal Exposure (Rodents)
27: 8 ++++++ label: Paroxetine Binding to Serotonin Transporter
7: 6 ++++++ label: Impact of SSRIs on Neurogenesis
39: 3 ++++++ label: Pharmacoeconomics of SSRIs for Depression
48: 2 ++++++ label: QTC Prolongation
17: 2 ++++++ label: SSRIs Effect on Fear
24: 2 ++++++ label: SSRIs in Dementias
45: 2 ++++++ label: Pharmacovigilance of SSRIs
73: 1 ++++++ label: Non-SERT Transporters in Antidepressant Action
49: 1 ++++++ label: Extrapyramidal Reactions Risk
22: 1 ++++++ label: SSRIs for Pain
81: 1 ++++++ label: Sertraline for Depression
1: 1 ++++++ label: Serotonin Receptor Modulation in SSRI Treatment
41: 1 ++++++ label: Venlafaxine vs SSRIs for Depression
61: 1 ++++++

# Coloring


In [21]:
distinct_colors = [
    "#1f77b4",
    "#ff7f0e",
    "#2ca02c",
    "#d62728",
    "#9467bd",
    "#8c564b",
    "#e377c2",
    "#7f7f7f",
    "#bcbd22",
    "#17becf",
    "#ff9896",
    "#98df8a",
    "#ffbb78",
    "#aec7e8",
    "#c5b0d5",
    "#c49c94",
    "#f7b6d2",
    "#dbdb8d",
    "#9edae5",
    "#393b79",
    "#e6550d",
    "#31a354",
    "#756bb1",
    "#de2d26",
]


def hex_to_rgb(hex_color):
    """Convert hex color to RGB values (0-1 scale)"""
    hex_color = hex_color.lstrip("#")
    rgb = tuple(int(hex_color[i : i + 2], 16) for i in (0, 2, 4))
    return [x / 255 for x in rgb]


# Create color mapping for base clusters
cluster_color_dict = dict(zip(list(cluster_occurence_dict.keys()), distinct_colors))

# Assign colors to all nodes
for node_id, node_data in Gmp.nodes(data=True):
    cluster = node_data["cluster_alpha0.3_k10_res0.002"]

    # Get base cluster (remove everything after semicolon if present)
    base_cluster = cluster.split(";")[0] if ";" in cluster else cluster

    # Get color and convert to RGB
    hex_color = cluster_color_dict[base_cluster]
    rgb_color = hex_to_rgb(hex_color)

    # Add color to node
    Gmp.nodes[node_id]["color"] = rgb_color

# plot


In [22]:
plotter = MainPathPlotterInteractive(
    Gmp,
    cluster_col="cluster_alpha0.3_k10_res0.002",
    node_label_col="label",
    hover_cols=["title", "cited_by", "year"],
    color_attr="color",
    cluster_label_attr="cluster_label",
)
# Show network visualization
fig_interactive = plotter.plot_network_on_timeline_interactive(
    savingpath=output_dir + "/paper-output/main_path_fig.html",
    width=1000,
    height=1300,
    return_fig=True,
    adjust_overlap=True,
    show_legend=True,
    title=None,
    use_custom_colors=True,
    show_labels=False,
    align_nodes_with_timeline=False,
)

Generating colors using a color palette.
