### Generating Cytoscape-Compatible Graphs for Co-Mentioning and Co-Citation Networks from Adjacency Matrix

In [None]:
import numpy as np
import json
import pandas as pd
import requests
import igraph as ig
import py4cytoscape as p4c
import pickle
import gzip

from pathlib import Path

from bh24_literature_mining import graph_tools
from bh24_literature_mining.europepmc_api import EuropePMCClient
from bh24_literature_mining.utils import (
    load_biotools_pub,
    load_biotools_from_zip,
    load_biotools_from_json,
)

### Load adjacency matrix.
This matrix was generated using the notebook *"create_adjacency.ipynb"*.

In [None]:
with gzip.open("../data/biotools/adjancency_cites_filt.pkl", "rb") as f:
    adjacency = pickle.load(f)

Check total number of edges (matrix is symmetric)

In [66]:
num_edges = (adjacency.values > 0).sum()
print("Total edges (including undirected duplicates):", num_edges)

Total edges (including undirected duplicates): 506784


### Apply optional filtering to prune edges

#### Filter edges based on tool names and publication IDs

Filters the adjacency matrix by removing edges between nodes that share the same prefix (tool name before "\_")  or a suffix (pubmed ID after "\_"). Useful for cocitation matrices - sometimes multiple tools come from the same publication, so if we only check citations of this publication we will add edges between these tools (their primary publication )

Tools with the same name and different publication - may refer to the updated version of a tool, and then both are referenced. These edges might not be of interest. 

In [67]:
adjacency_filtered = graph_tools.filter_edges_names_ids(adjacency)
num_edges = (adjacency_filtered.values > 0).sum()
print("Total edges (including undirected duplicates):", num_edges)

Total edges (including undirected duplicates): 502884


#### Filter edges based on edge weight (number of publications in common)

In [None]:
adjacency_thresholded = graph_tools.filter_adjacency_by_threshold(
    adjacency_filtered, threshold=50
)
num_edges = (adjacency_thresholded.values > 0).sum()
print("Total edges (including undirected duplicates):", num_edges)

Total edges (including undirected duplicates): 4804


#### Check number of isolated nodes after this filtering

In [None]:
# Identify nodes (rows) where all connections are zero
isolated_nodes = adjacency_thresholded.index[adjacency_thresholded.sum(axis=1) == 0]

# Remove these nodes from both rows and columns
adjacency_matrix_cleaned = adjacency_thresholded.drop(
    index=isolated_nodes, columns=isolated_nodes
)

print(
    f"Removed {len(isolated_nodes)} isolated nodes. Now {adjacency_matrix_cleaned.shape[0]} nodes left."
)

Removed 9064 isolated nodes. Now 371 nodes left.


In [None]:
adjacency_matrix_cleaned

Unnamed: 0,Seurat_34062119,Bakta_34739369,BCFtools_19505943,BCFtools_33590861,SAMtools_19505943,SAMtools_33590861,REPET_24786468,REPET_21304975,PASTEClassifier_24786468,Integrated Microbial Genomes (IMG)_22194640,...,DIALS_29533234,FEELnc_28053114,GUIDANCE2_25883146,iPro54-PseKNC_25361964,iRSpot-PseDNC_23303794,Pse-in-One_25958395,chromVAR_28825706,PLEK_25239089,W-IQ-TREE_27084950,CIPRes_25861210
Seurat_34062119,0,0,115,60,115,60,0,0,0,0,...,0,0,0,0,0,0,96,0,0,0
Bakta_34739369,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
BCFtools_19505943,115,0,0,0,0,411,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
BCFtools_33590861,60,0,0,0,411,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
SAMtools_19505943,115,0,0,411,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Pse-in-One_25958395,0,0,0,0,0,0,0,0,0,0,...,0,0,0,51,50,0,0,0,0,0
chromVAR_28825706,96,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
PLEK_25239089,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
W-IQ-TREE_27084950,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


### Generate Cytoscape-compatible Graph from Adjacency

First, we generate iGraph object from which network in Cytoscape can be created. We also add functionality to save iGraph object as CytoscapeJS object for the further use for Cytoscapa Java script plug in.

#### Generate iGraph


In [None]:
# Get the values as np.array, it's more convenient.
A = adjacency_matrix_cleaned.values

# Create graph, A.astype(bool).tolist() or (A / A).tolist() can also be used.
g = ig.Graph.Adjacency((A > 0).tolist(), mode="undirected")

# Add edge weights and node labels.
g.es["weight"] = A[A.nonzero()]
# Split the names into two parts: before and after '_'
g.vs["name"] = adjacency_matrix_cleaned.index
g.vs["tool_name"] = [
    name.split("_")[0] for name in adjacency_matrix_cleaned.columns
]  # Before '_'
g.vs["pubmedid"] = [
    name.split("_")[1] for name in adjacency_matrix_cleaned.columns
]  # After '_'
g.vs["degree"] = g.degree()

### Run cytoscape

#### Connecting Jupyter to Cytoscape

Jupyter Bridge allows a remote Jupyter Notebook to execute functions on a locally running Cytoscape instance. If you are using a local Jupyter Notebook, this step is not required—Py4Cytoscape can communicate with Cytoscape directly.

In [None]:
print(
    f"Loading Javascript client ... {p4c.get_browser_client_channel()} on {p4c.get_jupyter_bridge_url()}"
)
browser_client_js = p4c.get_browser_client_js()
IPython.display.Javascript(browser_client_js)  # Start browser client

Loading Javascript client ... 5aaa35ce-1276-4c39-b922-4fdd3aa6f1b7 on https://jupyter-bridge.cytoscape.org


Check the connection

In [79]:
p4c.cytoscape_ping()
p4c.cytoscape_version_info()

You are connected to Cytoscape!


{'apiVersion': 'v1',
 'cytoscapeVersion': '3.10.3',
 'automationAPIVersion': '1.11.0',
 'py4cytoscapeVersion': '1.11.0'}

#### Load igraph network to Cytoscape

In [None]:
p4c.create_network_from_igraph(
    g, title="Cocitation Network", collection="Cocitation Network Collection"
)

# Set visual properties

p4c.set_edge_color_mapping(
    **p4c.style_auto_mappings.gen_edge_color_map("weight", mapping_type="d")
)
p4c.set_node_color_mapping(
    **p4c.style_auto_mappings.gen_node_color_map("degree", mapping_type="d")
)
p4c.set_node_shape_default("ELLIPSE")
p4c.set_node_width_default(30)
p4c.set_node_height_default(30)

Applying default style...
Applying preferred layout
style_name not specified, so updating "default" style.
style_name not specified, so updating "default" style.
style_name not specified, so updating "default" style.
style_name not specified, so updating "default" style.
style_name not specified, so updating "default" style.
style_name not specified, so updating "default" style.
style_name not specified, so updating "default" style.
style_name not specified, so updating "default" style.
style_name not specified, so updating "default" style.
style_name not specified, so updating "default" style.


''

#### Create CytoscapeJS JSON from network loded to Cytoscape

Take the “current” network active in Cytoscape and generate network file for CytoscapeJS JSON and style file for CytoscapeJS JSON 

Rename current style

In [89]:
# Step 1: Get the current style
current_style = p4c.get_current_style()
print(f"Current Style: {current_style}")
# Define new style name
new_style_name = "cocitation_style"
# Step 2: Duplicate the style with a new name
p4c.copy_visual_style(current_style, new_style_name)
# Step 3: Set this style as active
p4c.set_visual_style(new_style_name)
print(f"Style renamed to: {new_style_name}")

Current Style: cocitation_style
Style renamed to: cocitation_style


Save current style

In [None]:
path_out = Path().cwd().parent.parent / "data" / "graph_generation"
style_filename = path_out / "cocitation_style.json"
p4c.export_visual_styles(style_filename, type="json")

Generate and save network

In [93]:
network = p4c.create_cytoscapejs_from_network()

In [None]:
json_filename = path_out / "cocitation_graph.cyjs"
with open(json_filename, "w") as json_file:
    json.dump(network, json_file, indent=4)

print(f"Data saved to {json_filename}")