# Gephi (GEXF) datasets

GEXF provides a small set of example datasets at https://gexf.net/datasets.html.
This notebook downloads a few of them and renders with Graphistry.


In [1]:
import os
from pathlib import Path
from urllib.request import Request, urlopen


import graphistry

# To specify Graphistry account & server, use:
# graphistry.register(api=3, username='...', password='...', protocol='https', server='hub.graphistry.com')
# For more options: https://pygraphistry.readthedocs.io/en/latest/server/register.html


In [2]:
GRAPHISTRY_SERVER = os.environ.get("GRAPHISTRY_SERVER", "hub.graphistry.com")
GRAPHISTRY_PROTOCOL = os.environ.get("GRAPHISTRY_PROTOCOL", "https")
GRAPHISTRY_USERNAME = os.environ.get("GRAPHISTRY_USERNAME")
GRAPHISTRY_PASSWORD = os.environ.get("GRAPHISTRY_PASSWORD")

if not GRAPHISTRY_USERNAME or not GRAPHISTRY_PASSWORD:
    raise RuntimeError("Set GRAPHISTRY_USERNAME and GRAPHISTRY_PASSWORD to upload.")

graphistry.register(
    api=3,
    protocol=GRAPHISTRY_PROTOCOL,
    server=GRAPHISTRY_SERVER,
    username=GRAPHISTRY_USERNAME,
    password=GRAPHISTRY_PASSWORD,
)


<graphistry.pygraphistry.GraphistryClient at 0x7bc24bf15d90>

We will download these datasets into a local `data/` folder:

- C. elegans
- Yeast
- EuroSiS web graph


In [3]:
DATA_DIR = Path("demos/demos_databases_apis/gexf/data")
if not DATA_DIR.exists():
    DATA_DIR = Path("data")
DATA_DIR.mkdir(parents=True, exist_ok=True)

def download_gexf(url, path):
    req = Request(url, headers={"User-Agent": "Mozilla/5.0"})
    with urlopen(req) as response, open(path, "wb") as f:
        f.write(response.read())

DATASETS = [
    ("C. elegans", "https://gexf.net/data/celegans.gexf", DATA_DIR / "celegans.gexf"),
    ("Yeast", "https://gexf.net/data/yeast.gexf", DATA_DIR / "yeast.gexf"),
    ("EuroSiS", "https://gexf.net/data/WebAtlas_EuroSiS.gexf", DATA_DIR / "WebAtlas_EuroSiS.gexf"),
]

for name, url, path in DATASETS:
    if not path.exists():
        download_gexf(url, path)

[path.exists() for _, _, path in DATASETS]


[True, True, True]

## C. elegans

In [4]:
g_celegans = graphistry.gexf(str(DATA_DIR / "celegans.gexf"))
counts = {"nodes": len(g_celegans._nodes), "edges": len(g_celegans._edges)}
bindings = {
    "point_color": g_celegans._point_color,
    "point_size": g_celegans._point_size,
    "point_x": g_celegans._point_x,
    "point_y": g_celegans._point_y,
    "edge_color": g_celegans._edge_color,
    "play": g_celegans._url_params.get("play"),
}
counts, bindings


({'nodes': 306, 'edges': 2345},
 {'point_color': None,
  'point_size': None,
  'point_x': None,
  'point_y': None,
  'edge_color': None,
  'play': None})

In [5]:
g_celegans._nodes.head()


Unnamed: 0,node_id,label
0,0,1
1,1,2
2,10,11
3,100,101
4,101,102


In [6]:
g_celegans.name("C. elegans (GEXF)").plot()


## Yeast

In [7]:
g_yeast = graphistry.gexf(str(DATA_DIR / "yeast.gexf"))
counts = {"nodes": len(g_yeast._nodes), "edges": len(g_yeast._edges)}
bindings = {
    "point_color": g_yeast._point_color,
    "point_size": g_yeast._point_size,
    "point_x": g_yeast._point_x,
    "point_y": g_yeast._point_y,
    "edge_color": g_yeast._edge_color,
    "play": g_yeast._url_params.get("play"),
}
counts, bindings


({'nodes': 2361, 'edges': 7182},
 {'point_color': None,
  'point_size': None,
  'point_x': None,
  'point_y': None,
  'edge_color': None,
  'play': None})

In [8]:
g_yeast._nodes.head()


Unnamed: 0,node_id,label
0,4941,YBR236C
1,4942,YOR151C
2,4943,YML010W
3,4944,YNR016C
4,4945,YLR386W


In [9]:
g_yeast.name("Yeast (GEXF)").plot()


## EuroSiS web graph

In [10]:
g_eurosis = graphistry.gexf(str(DATA_DIR / "WebAtlas_EuroSiS.gexf"))
counts = {"nodes": len(g_eurosis._nodes), "edges": len(g_eurosis._edges)}
bindings = {
    "point_color": g_eurosis._point_color,
    "point_size": g_eurosis._point_size,
    "point_x": g_eurosis._point_x,
    "point_y": g_eurosis._point_y,
    "edge_color": g_eurosis._edge_color,
    "play": g_eurosis._url_params.get("play"),
}
counts, bindings


({'nodes': 1285, 'edges': 7524},
 {'point_color': None,
  'point_size': None,
  'point_x': None,
  'point_y': None,
  'edge_color': None,
  'play': None})

In [11]:
g_eurosis._nodes.head()


Unnamed: 0,node_id,label,country,tag_gender,tag_governance,tag_health,tag_info,tag_internat,tag_nano,tag_people,...,tag_socioeco,tag_space,tag_transport,tag_agri,tag_biotech,tag_business,tag_comm,tag_energy,tag_environment,tag_food
0,10,Astronomical Institute,Czech Republic,0,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
1,1002,CCSTI La Turbine Rhône-Alpes,France,0,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
2,1003,Laurea University of Applied Sciences,Finland,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,1004,European Association for Education Law and Policy,International,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,1006,Les petits débrouillards,Belgium,0,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0


In [12]:
g_eurosis.name("EuroSiS (GEXF)").plot()
