## A notebook to visualize KGs and the ontologies they use using Gephi

In [36]:
# install and import libraries
%pip install pandas numpy

import pandas as pd
import numpy as np

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.3.1 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [37]:
# load data
url = "https://docs.google.com/spreadsheets/d/1oiiRew9waUj5ze-yDdNByUNiAzlkQ8w-5QMRdQWleDo/export?format=csv"
df_original = pd.read_csv(url)


In [38]:
# remove SPOKE (for now)
df_filtered = df_original[~df_original.isin(['SPOKE']).any(axis=1)]

In [39]:
df_filtered.columns = ['Source', 'Target']
df_filtered

Unnamed: 0,Source,Target
0,HRA KG,CCF
1,HRA KG,CL
2,HRA KG,FMA
3,HRA KG,HGNC
4,HRA KG,HRAVS
...,...,...
456,ORKG,ZFS
457,ORKG,DCT
458,ORKG,BAO
459,ORKG,FOBI


## Create node list

In [40]:
# process Google Sheet into CSVs for nodes and edges 
nodes = set()

# Get unique values for each column
unique_values = {col: df_filtered[col].unique() for col in df_filtered.columns}

# Print results
for col, values in unique_values.items():
  for v in [v for v in values]:
   nodes.add(v)

# convert to df
df_nodes = pd.DataFrame(list(nodes), columns=["Label"])
df_nodes['Type'] = np.where(df_nodes['Label'].isin(
    df_filtered['Source']), "KG", "Ontology")

# create id and label cols
df_nodes['ID'] = df_nodes['Label'].copy()

# export to CSV
df_nodes.to_csv('input/nodes.csv', index=False)
df_nodes

Unnamed: 0,Label,Type,ID
0,GTEXEQTL,Ontology,GTEXEQTL
1,PSY,Ontology,PSY
2,CHV,Ontology,CHV
3,RCDSA,Ontology,RCDSA
4,HL7V2.5,Ontology,HL7V2.5
...,...,...,...
230,HPO,Ontology,HPO
231,CST,Ontology,CST
232,ERCCRBP,Ontology,ERCCRBP
233,MONDO,Ontology,MONDO


## Create edge list

In [41]:
df_filtered.to_csv('input/edges.csv', index=False)