## A notebook to visualize KGs and the ontologies they use

In [60]:
# install and import libraries
%pip install networkx pyvis requests pandas gspread

import networkx as nx
from pyvis.network import Network
import random
import requests
import pandas as pd
import gspread

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.3.1 -> 25.0
[notice] To update, run: python.exe -m pip install --upgrade pip


In [61]:
# load data
url = "https://docs.google.com/spreadsheets/d/1oiiRew9waUj5ze-yDdNByUNiAzlkQ8w-5QMRdQWleDo/export?format=csv"
df = pd.read_csv(url)
df

Unnamed: 0,kg_name,ontology
0,HRA KG,CCF
1,HRA KG,CL
2,HRA KG,FMA
3,HRA KG,HGNC
4,HRA KG,HRAVS
...,...,...
415,Ubergraph,Zebrafish anatomy and development ontology (ZFA)
416,Ubergraph,Ascomycete phenotype ontology (APO)
417,Ubergraph,Mouse Developmental Stages (MmusDV)
418,Ubergraph,Vertebrate Breed Ontology (VBO)


## Create node list

In [62]:
# process Google Sheet into CSVs for nodes and edges 
nodes = set()

# Get unique values for each column
unique_values = {col: df[col].unique() for col in df.columns}

# Print results
for col, values in unique_values.items():
  for v in [v for v in values]:
   nodes.add(v)

# convert to df
df_nodes = pd.DataFrame(list(nodes), columns=["Label"])

# create id and label cols
df_nodes['ID'] = df_nodes['Label'].copy()

# export to CSV
df_nodes.to_csv('output/nodes.csv', index=False)

## Create edge list

In [63]:
df.columns = ['Source', 'Target']
df.to_csv('output/edges.csv', index=False)

## Create network with `pyvis`

In [64]:
# create graph
G = nx.Graph()

# get columns
entity1_nodes = df['kg_name']
entity2_nodes = df['ontology']

# add Nodes
G.add_nodes_from(entity1_nodes, bipartite=0)  # Group 1
G.add_nodes_from(entity2_nodes, bipartite=1)  # Group 2

# add edges
for _, row in df.iterrows():
    G.add_edge(row['kg_name'], row['ontology'])



# visualize with pyvis
net = Network(height="1000px", width="100%", notebook=True)

# Disable physics to keep nodes fixed
# net.toggle_physics(False)

net.from_nx(G)

net.set_options("var options = {\"nodes\": {\"font\": {\"size\": 16}}}"
)

# Start with physics enabled for better layout
net.force_atlas_2based(gravity=-50, central_gravity=0.005,
                       spring_length=200, spring_strength=0.01)

# Show the initial layout to avoid overlap
net.show("network_initial_layout.html")

# After initial layout, disable physics
net.toggle_physics(False)

# Save and view the final network
net.show("network_final_no_physics.html")

# net.show("network.html") 

KeyError: 'kg_name'

In [None]:
# Create Graph
G = nx.Graph()

# Example Data
entity1_nodes = ["A", "B", "C", "D", "E", "F"]
entity2_nodes = [f"E{i}" for i in range(1, 256)]

# Add Nodes
G.add_nodes_from(entity1_nodes, bipartite=0)  # Group 1
G.add_nodes_from(entity2_nodes, bipartite=1)  # Group 2

# Add Edges (Links)
for _ in range(100):  # Simulated links
    G.add_edge(random.choice(entity1_nodes), random.choice(entity2_nodes))

# Visualize with Pyvis
net = Network(height="700px", width="100%", notebook=True)
net.from_nx(G)
net.show("network.html")  # Opens in browser

network.html
