## A notebook to visualize KGs and the ontologies they use using Gephi

In [95]:
# install and import libraries
%pip install pandas numpy

import pandas as pd
import numpy as np

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.3.1 -> 25.0
[notice] To update, run: python.exe -m pip install --upgrade pip


In [96]:
# load data
url = "https://docs.google.com/spreadsheets/d/1oiiRew9waUj5ze-yDdNByUNiAzlkQ8w-5QMRdQWleDo/export?format=csv"
df_original = pd.read_csv(url)
df_original.columns = ['Source', 'Target']
df_original

Unnamed: 0,Source,Target
0,HRA KG,CCF
1,HRA KG,CL
2,HRA KG,FMA
3,HRA KG,HGNC
4,HRA KG,HRAVS
...,...,...
415,Ubergraph,Zebrafish anatomy and development ontology (ZFA)
416,Ubergraph,Ascomycete phenotype ontology (APO)
417,Ubergraph,Mouse Developmental Stages (MmusDV)
418,Ubergraph,Vertebrate Breed Ontology (VBO)


## Create node list

In [97]:
# process Google Sheet into CSVs for nodes and edges 
nodes = set()

# Get unique values for each column
unique_values = {col: df_original[col].unique() for col in df_original.columns}

# Print results
for col, values in unique_values.items():
  for v in [v for v in values]:
   nodes.add(v)

# convert to df
df_nodes = pd.DataFrame(list(nodes), columns=["Label"])
df_nodes['Type'] = np.where(df_nodes['Label'].isin(
    df_original['Source']), "KG", "Ontology")

# create id and label cols
df_nodes['ID'] = df_nodes['Label'].copy()

# export to CSV
df_nodes.to_csv('output/nodes.csv', index=False)
df_nodes

Unnamed: 0,Label,Type,ID
0,GLYCORDF,Ontology,GLYCORDF
1,PREDICT,Ontology,PREDICT
2,CDCREC,Ontology,CDCREC
3,TISSUES,Ontology,TISSUES
4,LabeledIn,Ontology,LabeledIn
...,...,...,...
256,ALT,Ontology,ALT
257,NANDA-I,Ontology,NANDA-I
258,MOTRPAC,Ontology,MOTRPAC
259,ICPC2EENG,Ontology,ICPC2EENG


## Create edge list

In [98]:
df_original.to_csv('output/edges.csv', index=False)