## Railways Knowledge Graph Demo

In [44]:
# Install necessary libraries, uncomment if required
# !pip install graphdatascience

from graphdatascience import GraphDataScience

In [45]:
# Connect to the database
host = "bolt://localhost:7687"
user = "neo4j"
password= ""

In [46]:
# Create Python Object
gds = GraphDataScience(host, auth=(user, password), database="neo4j")

Download Dataset from:
https://github.com/jbarrasa/gc-2022/tree/main/interop/data

And drop the CSV files in the graph database import folder.

In [47]:
# Load stations as nodes
gds.run_cypher(
    """
    LOAD CSV WITH HEADERS FROM "file:///nr-stations-all.csv" AS station
    CREATE (:Station {name: station.name, crs: station.crs})
    """
)

# Load tracks bewteen stations as relationships
gds.run_cypher(
    """
    LOAD CSV WITH HEADERS FROM "file:///nr-station-links.csv" AS track
    MATCH (from:Station {crs: track.from})
    MATCH (to:Station {crs: track.to})
    MERGE (from)-[:TRACK {distance: round( toFloat(track.distance), 2 )}]->(to)
    """
)

In [48]:
# Create Graph Projection
gds.graph.project.cypher(
    graph_name='trains',
    node_spec='MATCH (s:Station) RETURN id(s) AS id',
    relationship_spec=
    """
    MATCH (s1:Station)-[t:TRACK]->(s2:Station)
    RETURN id(s1) AS source, id(s2) AS target, t.distance AS distance
    """
)

(Graph({'graphName': 'trains', 'nodeCount': 2593, 'relationshipCount': 5782, 'database': 'neo4j', 'configuration': {'relationshipQuery': 'MATCH (s1:Station)-[t:TRACK]->(s2:Station)\n    RETURN id(s1) AS source, id(s2) AS target, t.distance AS distance', 'creationTime': neo4j.time.DateTime(2023, 9, 28, 15, 12, 4, 11708900, tzinfo=<DstTzInfo 'America/Denver' MDT-1 day, 18:00:00 DST>), 'validateRelationships': True, 'nodeQuery': 'MATCH (s:Station) RETURN id(s) AS id', 'sudo': True, 'readConcurrency': 4, 'parameters': {}, 'username': None}, 'schema': {'relationships': {'__ALL__': {'distance': 'Float (DefaultValue(NaN), PERSISTENT, Aggregation.NONE)'}}, 'nodes': {'__ALL__': {}}}, 'memoryUsage': '7059 KiB'}),
 nodeQuery                         MATCH (s:Station) RETURN id(s) AS id
 relationshipQuery    MATCH (s1:Station)-[t:TRACK]->(s2:Station)\n  ...
 graphName                                                       trains
 nodeCount                                                         2593

In [49]:
#  compute the shortest path between Birmingham New Street and Edinburgh

bham = gds.find_node_id(["Station"], {"name": "Birmingham New Street"})
eboro = gds.find_node_id(["Station"], {"name": "Edinburgh"})

shortest_path = gds.shortestPath.dijkstra.stream(
    gds.graph.get("trains"),
    sourceNode=bham,
    targetNode=eboro,
    relationshipWeightProperty="distance"
)

print("Shortest distance: %s" % shortest_path.get('costs').get(0)[-1])

Shortest distance: 295.91


In [50]:
# compute centrality scores for all railway stations in Great Britain

graph = gds.graph.get("trains")
result = gds.betweenness.stream(graph)
highest_score = result.sort_values(by="score", ascending=False).iloc[0:1].get('nodeId')

n = gds.run_cypher(f"MATCH (s:Station) WHERE ID(s) = {int(highest_score)} RETURN s.name")
print("Station with highest centrality: %s" % n["s.name"][0])

Station with highest centrality: Tamworth


In [51]:
# compute centrality scores for all railway stations in Great Britain


graph = gds.graph.get("trains")
result = gds.betweenness.write(gds.graph.get("trains"), writeProperty="betweenness" )



total_stations = gds.run_cypher("MATCH (s:Station) RETURN count(s) AS total_stations")
print(f'Total number of stations: {total_stations.iloc[0][0]}')



processed_stations = gds.run_cypher(
  """
  MATCH (s:Station)
  WHERE s.betweenness IS NOT NULL
  RETURN count(s) AS stations_processed
  """)

print(f'Number of stations with betweenness score: {processed_stations.iloc[0][0]}')

gds.close()

Total number of stations: 2593
Number of stations with betweenness score: 2593
