## A notebook to visualize KGs and the ontologies they use using Gephi

In [1]:
# install and import libraries
%pip install pandas numpy

import pandas as pd
import numpy as np


[notice] A new release of pip is available: 24.3.1 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


Note: you may need to restart the kernel to use updated packages.


In [2]:
# load data
url = "https://docs.google.com/spreadsheets/d/1oiiRew9waUj5ze-yDdNByUNiAzlkQ8w-5QMRdQWleDo/export?format=csv"
df_original = pd.read_csv(url)
df_original.columns = ['Source', 'Target']
df_original

Unnamed: 0,Source,Target
0,HRA KG,CCF
1,HRA KG,CL
2,HRA KG,FMA
3,HRA KG,HGNC
4,HRA KG,HRAVS
...,...,...
456,ORKG,ZFS
457,ORKG,DCT
458,ORKG,BAO
459,ORKG,FOBI


## Create node list

In [3]:
# process Google Sheet into CSVs for nodes and edges 
nodes = set()

# Get unique values for each column
unique_values = {col: df_original[col].unique() for col in df_original.columns}

# Print results
for col, values in unique_values.items():
  for v in [v for v in values]:
   nodes.add(v)

# convert to df
df_nodes = pd.DataFrame(list(nodes), columns=["Label"])
df_nodes['Type'] = np.where(df_nodes['Label'].isin(
    df_original['Source']), "KG", "Ontology")

# create id and label cols
df_nodes['ID'] = df_nodes['Label'].copy()

# export to CSV
df_nodes.to_csv('input/nodes.csv', index=False)
df_nodes

Unnamed: 0,Label,Type,ID
0,PathoPhenoDB,Ontology,PathoPhenoDB
1,ICPC2EENG,Ontology,ICPC2EENG
2,SRC,Ontology,SRC
3,REACTOME,Ontology,REACTOME
4,PR,Ontology,PR
...,...,...,...
289,CPM,Ontology,CPM
290,MONDO,Ontology,MONDO
291,MTHSPL,Ontology,MTHSPL
292,LNC,Ontology,LNC


## Create edge list

In [4]:
df_original.to_csv('input/edges.csv', index=False)