In [None]:
!pip install gremlinpython

In [None]:
#need to migrate to gremlin query
def compute_shortest_path(
    edges: List[Tuple[str, str, float, int]],
    source: str,
    sink: str,
    cost_scale: int = 1,
    undirected: bool = False,
) -> Tuple[List[str], float]:
    if SimpleMinCostFlow is None:
        raise ShortestPathError(
            "ortools not available or incompatible. Install with 'pip install ortools'."
        )

    if source == sink:
        return [source], 0.0

    if cost_scale <= 0:
        raise ShortestPathError("cost_scale must be a positive integer")

    # Map node names to integer ids and preserve reverse mapping for output
    node_name_to_id: Dict[str, int] = {}
    node_id_to_name: Dict[int, str] = {}

    def get_node_id(name: str) -> int:
        if name not in node_name_to_id:
            new_id = len(node_name_to_id)
            node_name_to_id[name] = new_id
            node_id_to_name[new_id] = name
        return node_name_to_id[name]

    start_nodes: List[int] = []
    end_nodes: List[int] = []
    capacities: List[int] = []
    unit_costs: List[int] = []

    for u_name, v_name, cost_value, capacity_value in edges:
        u_id = get_node_id(u_name)
        v_id = get_node_id(v_name)

        # Scale cost to integer for OR-Tools
        scaled_cost = int(round(cost_value * cost_scale))

        start_nodes.append(u_id)
        end_nodes.append(v_id)
        capacities.append(max(1, int(capacity_value)))
        unit_costs.append(scaled_cost)

        if undirected:
            start_nodes.append(v_id)
            end_nodes.append(u_id)
            capacities.append(max(1, int(capacity_value)))
            unit_costs.append(scaled_cost)

    # Ensure source and sink are in the mapping even if isolated in the edge list
    source_id = get_node_id(source)
    sink_id = get_node_id(sink)

    flow_solver = SimpleMinCostFlow()

    # Compatibility helpers across pywrapgraph (CamelCase) and python (snake_case) APIs
    def add_arc_with_capacity_and_unit_cost(tail: int, head: int, capacity: int, unit_cost: int) -> None:
        if hasattr(flow_solver, "AddArcWithCapacityAndUnitCost"):
            flow_solver.AddArcWithCapacityAndUnitCost(tail, head, capacity, unit_cost)
        else:
            flow_solver.add_arc_with_capacity_and_unit_cost(tail, head, capacity, unit_cost)

    def set_node_supply(node_id: int, supply: int) -> None:
        if hasattr(flow_solver, "SetNodeSupply"):
            flow_solver.SetNodeSupply(node_id, supply)
        else:
            flow_solver.set_node_supply(node_id, supply)

    def solve() -> object:
        if hasattr(flow_solver, "Solve"):
            return flow_solver.Solve()
        return flow_solver.solve()

    def num_arcs() -> int:
        return flow_solver.NumArcs() if hasattr(flow_solver, "NumArcs") else flow_solver.num_arcs()

    def flow(i: int) -> int:
        return flow_solver.Flow(i) if hasattr(flow_solver, "Flow") else flow_solver.flow(i)

    def tail(i: int) -> int:
        return flow_solver.Tail(i) if hasattr(flow_solver, "Tail") else flow_solver.tail(i)

    def head(i: int) -> int:
        return flow_solver.Head(i) if hasattr(flow_solver, "Head") else flow_solver.head(i)

    def optimal_cost() -> int:
        return (
            flow_solver.OptimalCost() if hasattr(flow_solver, "OptimalCost") else flow_solver.optimal_cost()
        )

    for i in range(len(start_nodes)):
        add_arc_with_capacity_and_unit_cost(
            start_nodes[i], end_nodes[i], capacities[i], unit_costs[i]
        )

    all_node_ids = list(node_id_to_name.keys())
    for node_id in all_node_ids:
        set_node_supply(node_id, 0)

    set_node_supply(source_id, 1)
    set_node_supply(sink_id, -1)

    status = solve()

    # Determine the OPTIMAL status constant in both APIs
    optimal_status = getattr(flow_solver, "OPTIMAL", None)
    if optimal_status is None and hasattr(flow_solver, "Status"):
        optimal_status = flow_solver.Status.OPTIMAL

    if status != optimal_status:
        raise ShortestPathError(
            f"Min-cost flow did not find a solution (status={status})."
        )

    # Extract the unique unit-flow path from source to sink
    next_by_node: Dict[int, int] = {}
    for i in range(num_arcs()):
        if flow(i) > 0:
            t = tail(i)
            h = head(i)
            next_by_node[t] = h

    if source_id not in next_by_node:
        raise ShortestPathError("No path found carrying unit flow from source to sink")

    ordered_path_ids: List[int] = [source_id]
    visited: set[int] = set([source_id])

    while ordered_path_ids[-1] != sink_id:
        current = ordered_path_ids[-1]
        if current not in next_by_node:
            raise ShortestPathError(
                "Disconnected flow: could not reconstruct a full path to sink"
            )
        nxt = next_by_node[current]
        if nxt in visited:
            raise ShortestPathError("Cycle encountered while reconstructing path")
        ordered_path_ids.append(nxt)
        visited.add(nxt)

    ordered_path_names = [node_id_to_name[nid] for nid in ordered_path_ids]

    total_cost_scaled = optimal_cost()
    total_cost = float(total_cost_scaled) / float(cost_scale)

    return ordered_path_names, total_cost


##### Convert GR to CSV

In [None]:
data_path = "data/datasets/roads/"
in_gr_path  = data_path+"USA-road-d.USA.gr"
out_csv_path = data_path+"USA-road-d.USA.csv"

In [None]:
with open("../"+in_gr_path,'r') as f:
    with open("../"+out_csv_path,'w') as g:
        # ~id,~from,~to,~label,distance:Double
        # Empty string ("") is a valid id, and the edge is created with an empty string as the id.
        # Labels are case sensitive and cannot be empty. A value of "" will result in an error.
        g.write("src,dest,dist\n")
        for line in f:
            if line.startswith('a '):
                line = line.replace('a ','').replace(' ',',')
                g.write(line)

In [7]:
#query Neptune using gremlin
from gremlin_python import statics
from gremlin_python.structure.graph import Graph
from gremlin_python.process.graph_traversal import __
from gremlin_python.process.strategies import *
from gremlin_python.driver.driver_remote_connection import DriverRemoteConnection
from gremlin_python.driver.aiohttp.transport import AiohttpTransport
from gremlin_python.process.traversal import *
import os
from boto3 import Session
from botocore.auth import SigV4Auth
from botocore.awsrequest import AWSRequest

port = 8182
server = 'db-neptune-1.cluster-ro-criq8uemaejw.us-west-2.neptune.amazonaws.com'
endpoint = f'wss://{server}:{port}/gremlin'
default_region = 'us-west-2'
service = 'neptune-db'



#sigv4 auth
credentials = Session().get_credentials()
if credentials is None:
    raise Exception("No AWS credentials found")
creds = credentials.get_frozen_credentials()
# region set inside config profile or via AWS_DEFAULT_REGION environment variable will be loaded
region = Session().region_name if Session().region_name else default_region

request = AWSRequest(method='GET', url=endpoint, data=None)
SigV4Auth(creds, service, region).add_auth(request)

connection = DriverRemoteConnection(
                endpoint,'g',
                headers=request.headers.items(),
                transport_factory=lambda:AiohttpTransport(call_from_event_loop=True)
                )
#    rc = DriverRemoteConnection(conn_string, 'g', headers=request.headers.items())
graph=Graph()
g = graph.traversal().withRemote(connection)

results = (g.V().hasLabel('airport')
                .sample(10)
                .order()
                .by('code')
                .local(__.values('code','city').fold())
                .toList())

# Print the results in a tabular form with a row index
print(results)
for i,c in enumerate(results,1):
    print("%3d %4s %s" % (i,c[0],c[1]))

connection.close()

[]


In [None]:
from boto3 import Session
from botocore.auth import SigV4Auth
from botocore.awsrequest import AWSRequest

from gremlin_python.process.anonymous_traversal import traversal
from gremlin_python.driver.driver_remote_connection import DriverRemoteConnection

def main():
    endpoint = 'db-neptune-1.cluster-criq8uemaejw.us-west-2.neptune.amazonaws.com'
    conn_string = 'wss://' + endpoint + ':8182/gremlin'
    default_region = 'us-west-2'
    service = 'neptune-db'

    credentials = Session().get_credentials()
    if credentials is None:
        raise Exception("No AWS credentials found")
    creds = credentials.get_frozen_credentials()
    # region set inside config profile or via AWS_DEFAULT_REGION environment variable will be loaded
    region = Session().region_name if Session().region_name else default_region

    request = AWSRequest(method='GET', url=conn_string, data=None)
    SigV4Auth(creds, service, region).add_auth(request)

    rc = DriverRemoteConnection(conn_string, 'g', headers=request.headers.items())
    g = traversal().with_remote(rc)

    # simple query to verify connection
    count = g.V().count().next()
    print('Vertex count: ' + str(count))

    # cleanup
    rc.close()

if __name__ == "__main__":
    main()