### Generating Cytoscape-Compatible Graphs for Co-Mentioning and Co-Citation Networks from Adjacency Matrix

In [None]:
import sys 
import os
from bh24_literature_mining.europepmc_api import EuropePMCClient
from bh24_literature_mining.utils import load_biotools_pub
from bh24_literature_mining.utils import load_biotools_from_zip
from bh24_literature_mining.utils import load_biotools_from_json
from bh24_literature_mining import graph_tools
import numpy as np
import json
import pandas as pd
import requests
import igraph as ig
import py4cytoscape as p4c
import pickle
import gzip


### Load adjacency matrix.
This matrix was generated using the notebook *"create_adjacency.ipynb"*.

In [None]:
with gzip.open('biohackathon2024/biotoolspub/adjancency_cites_filt.pkl', 'rb') as f:
    adjacency = pickle.load(f)

Check total number of edges (matrix is symmetric)

In [4]:
num_edges = (adjacency.values > 0).sum()
print("Total edges (including undirected duplicates):", num_edges)

Total edges (including undirected duplicates): 506784


### Apply optional filtering to prune edges

#### Filter edges based on tool names and publication IDs

Filters the adjacency matrix by removing edges between nodes that share the same prefix (tool name before "\_")  or a suffix (pubmed ID after "\_"). Useful for cocitation matrices - sometimes multiple tools come from the same publication, so if we only check citations of this publication we will add edges between these tools (their primary publication )

Tools with the same name and different publication - may refer to the updated version of a tool, and then both are referenced. These edges might not be of interest. 

In [8]:
adjacency_filtered = graph_tools.filter_edges_names_ids(adjacency)
num_edges = (adjacency_filtered.values > 0).sum()
print("Total edges (including undirected duplicates):", num_edges)

Total edges (including undirected duplicates): 502884


#### Filter edges based on edge weight (number of publications in common)

In [9]:
adjacency_thresholded = graph_tools.filter_adjacency_by_threshold(adjacency_filtered, threshold = 5)
num_edges = (adjacency_thresholded.values > 0).sum()
print("Total edges (including undirected duplicates):", num_edges)

Total edges (including undirected duplicates): 56484


#### Check number of isolated nodes after this filtering

In [10]:
# Identify nodes (rows) where all connections are zero
isolated_nodes = adjacency_thresholded.index[adjacency_thresholded.sum(axis=1) == 0]

# Remove these nodes from both rows and columns
adjacency_matrix_cleaned = adjacency_thresholded.drop(index=isolated_nodes, columns=isolated_nodes)

print(f"Removed {len(isolated_nodes)} isolated nodes. Now {adjacency_matrix_cleaned.shape[0]} nodes left.")

Removed 6776 isolated nodes. Now 2659 nodes left.


In [11]:
adjacency_matrix_cleaned 

Unnamed: 0,Seurat_34062119,Bakta_34739369,BCFtools_19505943,BCFtools_33590861,SAMtools_19505943,SAMtools_33590861,HTSlib_33594436,REPET_24786468,REPET_21304975,REPET_16110336,...,Evidence Finder_25378340,CIPRes_25861210,NetworkAnalys_24861621,STarMir_24803672,PTHGRN_24875471,LINCS Canvas Browser (LCB)_24906883,MeDeCom_28340624,MToolBox_25028726,EXTRACT_26896844,GPS-SUMO_24880689
Seurat_34062119,0,0,115,60,115,60,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Bakta_34739369,0,0,18,15,18,15,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
BCFtools_19505943,115,18,0,0,0,411,38,7,11,10,...,0,0,0,0,0,0,0,6,0,0
BCFtools_33590861,60,15,0,0,411,0,48,0,0,0,...,0,0,0,0,0,0,0,0,0,0
SAMtools_19505943,115,18,0,411,0,0,38,7,11,10,...,0,0,0,0,0,0,0,6,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
LINCS Canvas Browser (LCB)_24906883,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
MeDeCom_28340624,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
MToolBox_25028726,0,0,6,0,6,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
EXTRACT_26896844,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


### Generate Cytoscape-compatible Graph from Adjacency

First, we generate iGraph object from which network in Cytoscape can be created. We also add functionality to save iGraph object as CytoscapeJS object for the further use for Cytoscapa Java script plug in.

#### Generate iGraph


In [12]:
# Get the values as np.array, it's more convenient.
A = adjacency_matrix_cleaned.values

# Create graph, A.astype(bool).tolist() or (A / A).tolist() can also be used.
g = ig.Graph.Adjacency((A > 0).tolist(), mode="undirected")

# Add edge weights and node labels.
g.es['weight'] = A[A.nonzero()]
g.vs['name'] = adjacency_matrix_cleaned.columns 


### Run cytoscape

#### Connecting Jupyter to Cytoscape

Jupyter Bridge allows a remote Jupyter Notebook to execute functions on a locally running Cytoscape instance. If you are using a local Jupyter Notebook, this step is not required—Py4Cytoscape can communicate with Cytoscape directly.

In [None]:
print(f'Loading Javascript client ... {p4c.get_browser_client_channel()} on {p4c.get_jupyter_bridge_url()}')
browser_client_js = p4c.get_browser_client_js()
IPython.display.Javascript(browser_client_js) # Start browser client

Check the connection

In [18]:
p4c.cytoscape_ping()
p4c.cytoscape_version_info()

You are connected to Cytoscape!


{'apiVersion': 'v1',
 'cytoscapeVersion': '3.10.3',
 'automationAPIVersion': '1.11.0',
 'py4cytoscapeVersion': '1.11.0'}

#### Load igraph network to Cytoscape

In [23]:
p4c.create_network_from_igraph(g, title="Cocitation Network", collection="Cocitation Network Collection")

# Set visual properties

p4c.set_edge_color_mapping(**p4c.style_auto_mappings.gen_edge_color_map('weight',mapping_type='d'))
p4c.set_node_shape_default("ELLIPSE")
p4c.set_node_width_default(30)
p4c.set_node_height_default(30)



Applying default style...
Applying preferred layout
style_name not specified, so updating "default" style.
style_name not specified, so updating "default" style.
style_name not specified, so updating "default" style.
style_name not specified, so updating "default" style.
style_name not specified, so updating "default" style.
style_name not specified, so updating "default" style.
style_name not specified, so updating "default" style.


''

#### Create CytoscapeJS JSON from network loded to Cytoscape

Take the “current” network active in Cytoscape and generate dictionary that corresponds to CytoscapeJS JSON

In [25]:
network = p4c.create_cytoscapejs_from_network()

In [27]:
network['elements']

{'nodes': [{'data': {'id': '2838348',
    'shared_name': 'GPS-SUMO_24880689',
    'name': 'GPS-SUMO_24880689',
    'SUID': 2838348,
    'id_original': 'GPS-SUMO_24880689',
    'selected': False},
   'position': {'x': 114.13510711150934, 'y': 68.28128142480591},
   'selected': False},
  {'data': {'id': '2838345',
    'shared_name': 'EXTRACT_26896844',
    'name': 'EXTRACT_26896844',
    'SUID': 2838345,
    'id_original': 'EXTRACT_26896844',
    'selected': False},
   'position': {'x': 617.2433453317242, 'y': 236.15859313134888},
   'selected': False},
  {'data': {'id': '2838342',
    'shared_name': 'MToolBox_25028726',
    'name': 'MToolBox_25028726',
    'SUID': 2838342,
    'id_original': 'MToolBox_25028726',
    'selected': False},
   'position': {'x': -229.53790466827581, 'y': -114.95654969091675},
   'selected': False},
  {'data': {'id': '2838339',
    'shared_name': 'MeDeCom_28340624',
    'name': 'MeDeCom_28340624',
    'SUID': 2838339,
    'id_original': 'MeDeCom_28340624',
   

In [None]:
json_filename = "biohackathon2024/graph_generation/graph_data/cocitation_graph.json"
with open(json_filename, "w") as json_file:
    json.dump(network['elements'], json_file, indent=4)

print(f"Data saved to {json_filename}")