In [1]:
!pip install gremlinpython

Collecting gremlinpython
  Downloading gremlinpython-3.7.4-py3-none-any.whl.metadata (6.4 kB)
Collecting aiohttp<4.0.0,>=3.8.0 (from gremlinpython)
  Downloading aiohttp-3.13.0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl.metadata (8.1 kB)
Collecting aenum<4.0.0,>=1.4.5 (from gremlinpython)
  Downloading aenum-3.1.16-py3-none-any.whl.metadata (3.8 kB)
Collecting isodate<1.0.0,>=0.6.0 (from gremlinpython)
  Downloading isodate-0.7.2-py3-none-any.whl.metadata (11 kB)
Collecting async-timeout<5.0.0,>=4.0.3 (from gremlinpython)
  Downloading async_timeout-4.0.3-py3-none-any.whl.metadata (4.2 kB)
Collecting aiohappyeyeballs>=2.5.0 (from aiohttp<4.0.0,>=3.8.0->gremlinpython)
  Downloading aiohappyeyeballs-2.6.1-py3-none-any.whl.metadata (5.9 kB)
Collecting aiosignal>=1.4.0 (from aiohttp<4.0.0,>=3.8.0->gremlinpython)
  Downloading aiosignal-1.4.0-py3-none-any.whl.metadata (3.7 kB)
Collecting frozenlist>=1.1.1 (from aiohttp<4.0.0,>=3.8.0->gremlinpython)
  Do

In [None]:
#need to migrate to gremlin query
def compute_shortest_path(
    edges: List[Tuple[str, str, float, int]],
    source: str,
    sink: str,
    cost_scale: int = 1,
    undirected: bool = False,
) -> Tuple[List[str], float]:
    if SimpleMinCostFlow is None:
        raise ShortestPathError(
            "ortools not available or incompatible. Install with 'pip install ortools'."
        )

    if source == sink:
        return [source], 0.0

    if cost_scale <= 0:
        raise ShortestPathError("cost_scale must be a positive integer")

    # Map node names to integer ids and preserve reverse mapping for output
    node_name_to_id: Dict[str, int] = {}
    node_id_to_name: Dict[int, str] = {}

    def get_node_id(name: str) -> int:
        if name not in node_name_to_id:
            new_id = len(node_name_to_id)
            node_name_to_id[name] = new_id
            node_id_to_name[new_id] = name
        return node_name_to_id[name]

    start_nodes: List[int] = []
    end_nodes: List[int] = []
    capacities: List[int] = []
    unit_costs: List[int] = []

    for u_name, v_name, cost_value, capacity_value in edges:
        u_id = get_node_id(u_name)
        v_id = get_node_id(v_name)

        # Scale cost to integer for OR-Tools
        scaled_cost = int(round(cost_value * cost_scale))

        start_nodes.append(u_id)
        end_nodes.append(v_id)
        capacities.append(max(1, int(capacity_value)))
        unit_costs.append(scaled_cost)

        if undirected:
            start_nodes.append(v_id)
            end_nodes.append(u_id)
            capacities.append(max(1, int(capacity_value)))
            unit_costs.append(scaled_cost)

    # Ensure source and sink are in the mapping even if isolated in the edge list
    source_id = get_node_id(source)
    sink_id = get_node_id(sink)

    flow_solver = SimpleMinCostFlow()

    # Compatibility helpers across pywrapgraph (CamelCase) and python (snake_case) APIs
    def add_arc_with_capacity_and_unit_cost(tail: int, head: int, capacity: int, unit_cost: int) -> None:
        if hasattr(flow_solver, "AddArcWithCapacityAndUnitCost"):
            flow_solver.AddArcWithCapacityAndUnitCost(tail, head, capacity, unit_cost)
        else:
            flow_solver.add_arc_with_capacity_and_unit_cost(tail, head, capacity, unit_cost)

    def set_node_supply(node_id: int, supply: int) -> None:
        if hasattr(flow_solver, "SetNodeSupply"):
            flow_solver.SetNodeSupply(node_id, supply)
        else:
            flow_solver.set_node_supply(node_id, supply)

    def solve() -> object:
        if hasattr(flow_solver, "Solve"):
            return flow_solver.Solve()
        return flow_solver.solve()

    def num_arcs() -> int:
        return flow_solver.NumArcs() if hasattr(flow_solver, "NumArcs") else flow_solver.num_arcs()

    def flow(i: int) -> int:
        return flow_solver.Flow(i) if hasattr(flow_solver, "Flow") else flow_solver.flow(i)

    def tail(i: int) -> int:
        return flow_solver.Tail(i) if hasattr(flow_solver, "Tail") else flow_solver.tail(i)

    def head(i: int) -> int:
        return flow_solver.Head(i) if hasattr(flow_solver, "Head") else flow_solver.head(i)

    def optimal_cost() -> int:
        return (
            flow_solver.OptimalCost() if hasattr(flow_solver, "OptimalCost") else flow_solver.optimal_cost()
        )

    for i in range(len(start_nodes)):
        add_arc_with_capacity_and_unit_cost(
            start_nodes[i], end_nodes[i], capacities[i], unit_costs[i]
        )

    all_node_ids = list(node_id_to_name.keys())
    for node_id in all_node_ids:
        set_node_supply(node_id, 0)

    set_node_supply(source_id, 1)
    set_node_supply(sink_id, -1)

    status = solve()

    # Determine the OPTIMAL status constant in both APIs
    optimal_status = getattr(flow_solver, "OPTIMAL", None)
    if optimal_status is None and hasattr(flow_solver, "Status"):
        optimal_status = flow_solver.Status.OPTIMAL

    if status != optimal_status:
        raise ShortestPathError(
            f"Min-cost flow did not find a solution (status={status})."
        )

    # Extract the unique unit-flow path from source to sink
    next_by_node: Dict[int, int] = {}
    for i in range(num_arcs()):
        if flow(i) > 0:
            t = tail(i)
            h = head(i)
            next_by_node[t] = h

    if source_id not in next_by_node:
        raise ShortestPathError("No path found carrying unit flow from source to sink")

    ordered_path_ids: List[int] = [source_id]
    visited: set[int] = set([source_id])

    while ordered_path_ids[-1] != sink_id:
        current = ordered_path_ids[-1]
        if current not in next_by_node:
            raise ShortestPathError(
                "Disconnected flow: could not reconstruct a full path to sink"
            )
        nxt = next_by_node[current]
        if nxt in visited:
            raise ShortestPathError("Cycle encountered while reconstructing path")
        ordered_path_ids.append(nxt)
        visited.add(nxt)

    ordered_path_names = [node_id_to_name[nid] for nid in ordered_path_ids]

    total_cost_scaled = optimal_cost()
    total_cost = float(total_cost_scaled) / float(cost_scale)

    return ordered_path_names, total_cost


##### Convert GR to CSV

In [None]:
data_path = "data/datasets/roads/"
in_gr_path  = data_path+"USA-road-d.USA.gr"
out_csv_path = data_path+"USA-road-d.USA.csv"

In [None]:
with open("../"+in_gr_path,'r') as f:
    with open("../"+out_csv_path,'w') as g:
        g.write("src,dest,dist\n")
        for line in f:
            if line.startswith('a '):
                line = line.replace('a ','').replace(' ',',')
                g.write(line)

In [None]:
!pip install gremlinpython

In [None]:
#query Neptune using gremlin
from gremlin_python import statics
from gremlin_python.structure.graph import Graph
from gremlin_python.process.graph_traversal import __
from gremlin_python.process.strategies import *
from gremlin_python.driver.driver_remote_connection import DriverRemoteConnection
from gremlin_python.driver.aiohttp.transport import AiohttpTransport
from gremlin_python.process.traversal import *
import os

port = 8182
server = '(your server endpoint)'

endpoint = f'wss://{server}:{port}/gremlin'

graph=Graph()

connection = DriverRemoteConnection(endpoint,'g',
                 transport_factory=lambda:AiohttpTransport(call_from_event_loop=True))

g = graph.traversal().withRemote(connection)

results = (g.V().hasLabel('airport')
                .sample(10)
                .order()
                .by('code')
                .local(__.values('code','city').fold())
                .toList())

# Print the results in a tabular form with a row index
for i,c in enumerate(results,1):
    print("%3d %4s %s" % (i,c[0],c[1]))

connection.close()