# Imports


In [1]:
# Standard library imports
import json
import random
import time
from collections import defaultdict
from multiprocessing import Pool, cpu_count
from typing import Dict, List, Optional, Tuple, Union
import logging

# Data manipulation and analysis
import numpy as np
import pandas as pd

# Disable SettingWithCopyWarning
pd.options.mode.chained_assignment = None

# Graphs and networks
import networkx as nx
import igraph as ig

# Visualization
import matplotlib.pyplot as plt
import colorcet as cc
from matplotlib.colors import to_hex, to_rgb

# Data visualization and processing
import datashader as ds
import datashader.transfer_functions as tf
from datashader.bundling import hammer_bundle

# Scientific computing
from scipy.spatial import cKDTree
from scipy.interpolate import CubicSpline, interp1d


# Progress bars
from tqdm import tqdm

# Performance optimization
from numba import jit, prange

# Custom modules
from fa2_modified import ForceAtlas2

# Warnings
import warnings

In [2]:
# 2. Constants and configuration
INPUT_GRAPH_PATH = "../data/07-clustered-graphs/alpha0.3_k10_res0.002.graphml"
CLUSTER_INFO_LABEL_TREE = "../output/cluster-qualifications/ClusterInfoLabelTree.xlsx"
CLUSTER_LABEL_DICT_PATH = "../data/99-testdata/cluster_label_dict.json"
CLUSTER_TREE_PATH = "../output/cluster-qualifications/ClusterHierachy_noComments.json"
OUTPUT_DIR = "../data/99-testdata/"
THREEJS_OUTPUT_DIR = (
    "/Users/jlq293/Projects/Random Projects/LW-ThreeJS/2d_ssrinetworkviz/src/data/"
)
CLUSTER_HIERACHY_FOR_LEGEND_PATH = (
    "../output/cluster-qualifications/ClusterHierachy_noComments.json"
)

In [8]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from matplotlib.colors import to_hex
from matplotlib.colors import to_rgb

In [9]:
class ClusterColorAssigner:
    """
    TO DO:
    1. clusters not mutually exclusive - need to assign to multiple categories
    2. colors too similar - need to assign more distinct colors

    A class for assigning colors to clusters based on their characteristics.

    This class provides methods to categorize clusters into color palettes,
    assign specific colors within those palettes, and create a mapping
    between clusters and their assigned colors.

    Attributes:
        colormaps (dict): A dictionary mapping color names to matplotlib colormaps.
        condition_names (dict): A dictionary mapping color names to condition names.

    Methods:
        assign_color_categories(clust_hierarchy): Assigns color categories to clusters.
        print_color_mapping(): Prints the mapping of conditions to color palettes.
        assign_colors(df, colormap): Assigns specific colors to clusters within a palette.
        create_color_dataframes(clust_hierarchy): Creates separate DataFrames for each color category.
        process_cluster_hierarchy(clust_hierarchy): Processes the entire cluster hierarchy.

    Usage:
        color_assigner = ClusterColorAssigner()
        processed_hierarchy, color_dict = color_assigner.process_cluster_hierarchy(cluster_hierarchy_df)
    """

    def __init__(self):
        self.colormaps = {
            "blue": plt.get_cmap("Blues"),
            "red": plt.get_cmap("Reds"),
            "green": plt.get_cmap("Greens"),
            "purple": plt.get_cmap("Purples"),
        }
        self.condition_names = {
            "blue": "pharmacology",
            "red": "indications",
            "green": "safety",
            "purple": "other",
        }

    def assign_color_categories(self, clust_hierarchy):
        conditions = [
            clust_hierarchy["pharmacology"] == 1,
            clust_hierarchy["indications"] == 1,
            clust_hierarchy["safety"] == 1,
            clust_hierarchy["other"] == 1,
        ]
        choices = ["blue", "red", "green", "purple"]
        clust_hierarchy["color_pal"] = np.select(conditions, choices, default="")
        return clust_hierarchy

    def print_color_mapping(self):
        print("Mapping of conditions to color palettes:")
        for color, condition in self.condition_names.items():
            print(f"{condition.capitalize()}: {color}")

    @staticmethod
    def assign_colors(df, colormap):
        num_colors = df.shape[0]
        colors = [to_hex(colormap(x)) for x in np.linspace(0.1, 0.9, num_colors)]
        df["color"] = colors
        return df

    def create_color_dataframes(self, clust_hierarchy):
        color_dfs = {}
        for color_name, colormap in self.colormaps.items():
            df_color = clust_hierarchy[
                clust_hierarchy["color_pal"] == color_name
            ].copy()
            if not df_color.empty:
                color_dfs[color_name] = self.assign_colors(df_color, colormap)
        return color_dfs

    def process_cluster_hierarchy(self, clust_hierarchy):
        clust_hierarchy = self.assign_color_categories(clust_hierarchy)
        self.print_color_mapping()
        color_dfs = self.create_color_dataframes(clust_hierarchy)
        clust_hierarchy = pd.concat(color_dfs.values())
        cluster_color_dict = dict(
            zip(clust_hierarchy["cluster"], clust_hierarchy["color"])
        )
        return clust_hierarchy, cluster_color_dict

    def save_dict_to_json(self, dict, path):
        with open(path, "w") as f:
            json.dump(dict, f)
        print(f"Cluster color dictionary saved to {path}")

In [10]:
# Assuming clust_hierarchy is your input DataFrame

clust_hierarchy = pd.read_excel(CLUSTER_INFO_LABEL_TREE)

color_assigner = ClusterColorAssigner()
clust_hierarchy, cluster_color_dict = color_assigner.process_cluster_hierarchy(
    clust_hierarchy
)

color_assigner.save_dict_to_json(
    cluster_color_dict, OUTPUT_DIR + "cluster_color_dict.json"
)

cluster_label_dict = dict(
    zip(clust_hierarchy["cluster"], clust_hierarchy["clusterlabel"])
)

color_assigner.save_dict_to_json(cluster_label_dict, CLUSTER_LABEL_DICT_PATH)

print("\nCluster color dictionary (first 5 items):")
print(dict(list(cluster_color_dict.items())[:5]))
print("\nCluster label dictionary (first 5 items):")
print(dict(list(cluster_label_dict.items())[:5]))

Mapping of conditions to color palettes:
Pharmacology: blue
Indications: red
Safety: green
Other: purple
Cluster color dictionary saved to ../data/99-testdata/cluster_color_dict.json
Cluster color dictionary saved to ../data/99-testdata/cluster_label_dict.json

Cluster color dictionary (first 5 items):
{0: '#e3eef9', 2: '#dfebf7', 3: '#dbe9f6', 5: '#d6e6f4', 6: '#d3e3f3'}

Cluster label dictionary (first 5 items):
{0: 'Serotonin Receptor Studies', 2: 'Risks of Prenatal Exposure', 3: 'Quantification of SSRIs in Biological Samples', 5: 'SSRIs and the Cytochrome P450 System', 6: 'SSRI Neuroscience'}


In [12]:
clust_hierarchy

Unnamed: 0,cluster,clusterlabel,et_clinical,et_non_clinical,et_mixed,et_review,et_garbage,et_other,nr_of_pubs,25th_percentile_year,...,psilocybin,generic_ssris,ssris,ecotoxicology,alternative_treatments,drug_utilization_patterns,cost-effectineness,otherother,color_pal,color
0,0,Serotonin Receptor Studies,0,1,0,0,0,0,1218,1995.0,...,0,0,0,0,0,0,0,0,blue,#e3eef9
2,2,Risks of Prenatal Exposure,1,0,0,0,0,0,1073,2007.0,...,0,0,0,0,0,0,0,0,blue,#dfebf7
3,3,Quantification of SSRIs in Biological Samples,0,1,0,0,0,0,1070,2005.0,...,0,0,0,0,0,0,0,0,blue,#dbe9f6
5,5,SSRIs and the Cytochrome P450 System,0,1,0,0,0,0,1005,1997.0,...,0,0,0,0,0,0,0,0,blue,#d6e6f4
6,6,SSRI Neuroscience,0,1,0,0,0,0,847,2008.0,...,0,0,0,0,0,0,0,0,blue,#d3e3f3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
100,100,Canine Behavioral Disorders,0,0,0,0,0,1,102,2009.0,...,0,0,0,0,0,0,0,1,purple,#7262ac
106,106,St. John's Wort for Depression,1,0,0,0,0,0,96,2002.0,...,0,0,0,0,1,0,0,0,purple,#6950a3
111,111,Generic SSRIs,1,0,0,0,0,0,89,2006.0,...,0,1,0,0,0,0,0,0,purple,#61409b
115,115,Selective serotonin reuptake inhibitors,0,0,0,0,1,1,82,1994.0,...,0,0,1,0,0,0,0,0,purple,#582f93


In [14]:
cluster_label_dict[54]

'(Sertraline) Drug Delivery'

# legend json creation


In [21]:
def transform_dict_to_legend(cluster_hierachy_dict, cluster_label_dict):
    """
    Adds the cluster labels to the cluster hierarchy dictionary to create a legend.
    """
    # make sure keys in cluster_label_dict are integers
    cluster_label_dict = {int(k): v for k, v in cluster_label_dict.items()}
    legend = cluster_hierachy_dict.copy()
    for key, value in legend.items():
        if isinstance(value, dict):
            # Recursively transform dictionaries
            transform_dict(value, cluster_label_dict)
        elif isinstance(value, list):
            new_list = []
            for item in value:
                if isinstance(item, int) and item in cluster_label_dict:
                    new_list.append({item: cluster_label_dict[item]})
                else:
                    new_list.append(item)
            legend[key] = new_list
    return legend


with open(CLUSTER_HIERACHY_FOR_LEGEND_PATH, "r") as f:
    cluster_hierachy_dict = json.load(f)

with open(CLUSTER_LABEL_DICT_PATH, "r") as f:
    cluster_label_dict = json.load(f)

legend = transform_dict_to_legend(cluster_hierachy_dict, cluster_label_dict)

NameError: name 'transform_dict' is not defined

In [None]:
legend

In [None]:
# Save as JSON
with open(OUTPUT_DIR + "legend_full_label_tree_clusternr.json", "w") as json_file:
    json.dump(cat_tree, json_file, indent=4)

    # Save as JSON
with open(
    THREEJS_OUTPUT_DIR + "legend_tree.json",
    "w",
) as json_file:
    json.dump(cat_tree, json_file, indent=4)