In [2]:
!pip install gremlinpython

Collecting gremlinpython
  Downloading gremlinpython-3.7.4-py3-none-any.whl.metadata (6.4 kB)
Collecting aiohttp<4.0.0,>=3.8.0 (from gremlinpython)
  Downloading aiohttp-3.13.0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl.metadata (8.1 kB)
Collecting aenum<4.0.0,>=1.4.5 (from gremlinpython)
  Using cached aenum-3.1.16-py3-none-any.whl.metadata (3.8 kB)
Collecting isodate<1.0.0,>=0.6.0 (from gremlinpython)
  Downloading isodate-0.7.2-py3-none-any.whl.metadata (11 kB)
Collecting async-timeout<5.0.0,>=4.0.3 (from gremlinpython)
  Using cached async_timeout-4.0.3-py3-none-any.whl.metadata (4.2 kB)
Collecting aiohappyeyeballs>=2.5.0 (from aiohttp<4.0.0,>=3.8.0->gremlinpython)
  Downloading aiohappyeyeballs-2.6.1-py3-none-any.whl.metadata (5.9 kB)
Collecting aiosignal>=1.4.0 (from aiohttp<4.0.0,>=3.8.0->gremlinpython)
  Downloading aiosignal-1.4.0-py3-none-any.whl.metadata (3.7 kB)
Collecting frozenlist>=1.1.1 (from aiohttp<4.0.0,>=3.8.0->gremlinpython)
  

In [11]:
!pip install requests-auth-aws-sigv4

Collecting requests-auth-aws-sigv4
  Downloading requests_auth_aws_sigv4-0.7-py3-none-any.whl.metadata (5.5 kB)
Downloading requests_auth_aws_sigv4-0.7-py3-none-any.whl (12 kB)
Installing collected packages: requests-auth-aws-sigv4
Successfully installed requests-auth-aws-sigv4-0.7


In [None]:
#need to migrate to gremlin query
def compute_shortest_path(
    edges: List[Tuple[str, str, float, int]],
    source: str,
    sink: str,
    cost_scale: int = 1,
    undirected: bool = False,
) -> Tuple[List[str], float]:
    if SimpleMinCostFlow is None:
        raise ShortestPathError(
            "ortools not available or incompatible. Install with 'pip install ortools'."
        )

    if source == sink:
        return [source], 0.0

    if cost_scale <= 0:
        raise ShortestPathError("cost_scale must be a positive integer")

    # Map node names to integer ids and preserve reverse mapping for output
    node_name_to_id: Dict[str, int] = {}
    node_id_to_name: Dict[int, str] = {}

    def get_node_id(name: str) -> int:
        if name not in node_name_to_id:
            new_id = len(node_name_to_id)
            node_name_to_id[name] = new_id
            node_id_to_name[new_id] = name
        return node_name_to_id[name]

    start_nodes: List[int] = []
    end_nodes: List[int] = []
    capacities: List[int] = []
    unit_costs: List[int] = []

    for u_name, v_name, cost_value, capacity_value in edges:
        u_id = get_node_id(u_name)
        v_id = get_node_id(v_name)

        # Scale cost to integer for OR-Tools
        scaled_cost = int(round(cost_value * cost_scale))

        start_nodes.append(u_id)
        end_nodes.append(v_id)
        capacities.append(max(1, int(capacity_value)))
        unit_costs.append(scaled_cost)

        if undirected:
            start_nodes.append(v_id)
            end_nodes.append(u_id)
            capacities.append(max(1, int(capacity_value)))
            unit_costs.append(scaled_cost)

    # Ensure source and sink are in the mapping even if isolated in the edge list
    source_id = get_node_id(source)
    sink_id = get_node_id(sink)

    flow_solver = SimpleMinCostFlow()

    # Compatibility helpers across pywrapgraph (CamelCase) and python (snake_case) APIs
    def add_arc_with_capacity_and_unit_cost(tail: int, head: int, capacity: int, unit_cost: int) -> None:
        if hasattr(flow_solver, "AddArcWithCapacityAndUnitCost"):
            flow_solver.AddArcWithCapacityAndUnitCost(tail, head, capacity, unit_cost)
        else:
            flow_solver.add_arc_with_capacity_and_unit_cost(tail, head, capacity, unit_cost)

    def set_node_supply(node_id: int, supply: int) -> None:
        if hasattr(flow_solver, "SetNodeSupply"):
            flow_solver.SetNodeSupply(node_id, supply)
        else:
            flow_solver.set_node_supply(node_id, supply)

    def solve() -> object:
        if hasattr(flow_solver, "Solve"):
            return flow_solver.Solve()
        return flow_solver.solve()

    def num_arcs() -> int:
        return flow_solver.NumArcs() if hasattr(flow_solver, "NumArcs") else flow_solver.num_arcs()

    def flow(i: int) -> int:
        return flow_solver.Flow(i) if hasattr(flow_solver, "Flow") else flow_solver.flow(i)

    def tail(i: int) -> int:
        return flow_solver.Tail(i) if hasattr(flow_solver, "Tail") else flow_solver.tail(i)

    def head(i: int) -> int:
        return flow_solver.Head(i) if hasattr(flow_solver, "Head") else flow_solver.head(i)

    def optimal_cost() -> int:
        return (
            flow_solver.OptimalCost() if hasattr(flow_solver, "OptimalCost") else flow_solver.optimal_cost()
        )

    for i in range(len(start_nodes)):
        add_arc_with_capacity_and_unit_cost(
            start_nodes[i], end_nodes[i], capacities[i], unit_costs[i]
        )

    all_node_ids = list(node_id_to_name.keys())
    for node_id in all_node_ids:
        set_node_supply(node_id, 0)

    set_node_supply(source_id, 1)
    set_node_supply(sink_id, -1)

    status = solve()

    # Determine the OPTIMAL status constant in both APIs
    optimal_status = getattr(flow_solver, "OPTIMAL", None)
    if optimal_status is None and hasattr(flow_solver, "Status"):
        optimal_status = flow_solver.Status.OPTIMAL

    if status != optimal_status:
        raise ShortestPathError(
            f"Min-cost flow did not find a solution (status={status})."
        )

    # Extract the unique unit-flow path from source to sink
    next_by_node: Dict[int, int] = {}
    for i in range(num_arcs()):
        if flow(i) > 0:
            t = tail(i)
            h = head(i)
            next_by_node[t] = h

    if source_id not in next_by_node:
        raise ShortestPathError("No path found carrying unit flow from source to sink")

    ordered_path_ids: List[int] = [source_id]
    visited: set[int] = set([source_id])

    while ordered_path_ids[-1] != sink_id:
        current = ordered_path_ids[-1]
        if current not in next_by_node:
            raise ShortestPathError(
                "Disconnected flow: could not reconstruct a full path to sink"
            )
        nxt = next_by_node[current]
        if nxt in visited:
            raise ShortestPathError("Cycle encountered while reconstructing path")
        ordered_path_ids.append(nxt)
        visited.add(nxt)

    ordered_path_names = [node_id_to_name[nid] for nid in ordered_path_ids]

    total_cost_scaled = optimal_cost()
    total_cost = float(total_cost_scaled) / float(cost_scale)

    return ordered_path_names, total_cost


##### Convert GR to CSV

In [None]:
data_path = "data/datasets/roads/"
in_gr_path  = data_path+"USA-road-d.USA.gr"
out_csv_path = data_path+"USA-road-d.USA.csv"

In [None]:
with open("../"+in_gr_path,'r') as f:
    with open("../"+out_csv_path,'w') as g:
        # ~id,~from,~to,~label,distance:Double
        # Empty string ("") is a valid id, and the edge is created with an empty string as the id.
        # Labels are case sensitive and cannot be empty. A value of "" will result in an error.
        g.write("src,dest,dist\n")
        for line in f:
            if line.startswith('a '):
                line = line.replace('a ','').replace(' ',',')
                g.write(line)

In [20]:
#query Neptune using gremlin
from gremlin_python import statics
from gremlin_python.structure.graph import Graph
from gremlin_python.process.graph_traversal import __
from gremlin_python.process.strategies import *
from gremlin_python.driver.driver_remote_connection import DriverRemoteConnection
from gremlin_python.driver.aiohttp.transport import AiohttpTransport
from gremlin_python.process.traversal import *
import os
from boto3 import Session
from botocore.auth import SigV4Auth
from botocore.awsrequest import AWSRequest
from requests_auth_aws_sigv4 import AWSSigV4
port = 8182
server = 'db-neptune-1.cluster-ro-criq8uemaejw.us-west-2.neptune.amazonaws.com'
endpoint = f'wss://{server}:{port}/gremlin'
default_region = 'us-west-2'
service = 'neptune-db'



#sigv4 auth
credentials = Session().get_credentials()
if credentials is None:
    raise Exception("No AWS credentials found")
creds = credentials.get_frozen_credentials()
# region set inside config profile or via AWS_DEFAULT_REGION environment variable will be loaded
region = Session().region_name if Session().region_name else default_region

request = AWSRequest(method='GET', url=endpoint, data=None)
SigV4Auth(creds, service, region).add_auth(request)
aws_auth = AWSSigV4(service, region=region)
connection = DriverRemoteConnection(
                endpoint,'g',
                headers=request.headers.items(),
                #auth=aws_auth,
                transport_factory=lambda:AiohttpTransport(call_from_event_loop=True)
                )
# rc = DriverRemoteConnection(conn_string, 'g', headers=request.headers.items())
graph=Graph()
g = graph.traversal().withRemote(connection)

results = (g.V().hasLabel('airport')
                .sample(10)
                .order()
                .by('code')
                .local(__.values('code','city').fold())
                .toList())

# Print the results in a tabular form with a row index
print(results)
for i,c in enumerate(results,1):
    print("%3d %4s %s" % (i,c[0],c[1]))

connection.close()

[]


In [25]:
request.headers.items()

[('X-Amz-Date', '20251014T192816Z'),
 ('X-Amz-Security-Token',
  'IQoJb3JpZ2luX2VjELv//////////wEaCXVzLXdlc3QtMiJGMEQCIC0YaFg0ueRfZL7/jhhuKbSkL2X0r0nLfDGAcRgRGGnjAiBCh8E9uJ47n94FT6W3MPMJniWtygG9GXBq7IhmqP5uLiq0AghkEAAaDDA2MzI5OTg0MzkxNSIMmtreFe7peanjYyGnKpECJ5+/dRs/7fvHQARefao+HXB9NKlaFZB3oUC4TCp43wS5khacdG34hYAdImv3lqEPFHqCUKuQEmeUmjJOqIPxrbD0r7S3YITZG9V38PgMoCVtFSQkZBsiKIQO8+D7JQEEnBVIv+nMP/eAE0th8rtGRHgmsXf3MJFOA5q9dqiFs4UCb6a3rDHmNrsxT/P/N/ANvUq5xnxt8m0lvJyXqdMzlbrc6Zw2diGRpz/F349SjYcYPHRdlTn3/lwKKEDFFT0PPX39hoMZUXOQzYW46y/MK01vqNk/+4eRD7BOTs1GUuhqAlbXg8mP3VOrEFOtx7+7+i/DKHH9u499crmIwgNQjC2buZS5cudM7e1Nrh6F1rvRMKG7uscGOpQBYclDZLK/HK3TYHGuX3rBBLQgaOC4KBger/J+B/eqp6VVixNJSWpfuFfsFBz5qedvdg/ClRzYZ0QFIfIjuQ9Nq7zKx4VIJRa1Wz1wONPE+HJbI/uaxtbQ/x8GZFyEX5IO3eCkgIRj/pPTLzcF02Ps2vq5LyD1l2XVED2CTIwGLqYhHiB3P7IoJUS7RwJqB9w9nMWUYg=='),
 ('Authorization',
  'AWS4-HMAC-SHA256 Credential=ASIAQ5PHW65FS3SN7KF6/20251014/us-west-2/neptune-db/aws4_request, SignedHeaders=host;x-amz-date;x-amz-security-to

In [32]:
headers = [(x,request.headers[x]) for x in request.headers]

In [33]:
headers.items()

AttributeError: 'list' object has no attribute 'items'

In [40]:
import requests 
VERIFY_TLS = True

payload = {
    "source": "s3://datasets-in-out/input/road-usa/",
    "format": "csv",  # Neptune CSV (nodes/edges)
    "iamRoleArn": "arn:aws:iam::063299843915:role/service-role/AWSNeptuneNotebookRole-NeptuneNbUser",
    "region": "us-west-2",
    "failOnError": False,
    "parallelism": "LOW",
    "queueRequest": True,
    # Set to True if your edge CSV has ~id; otherwise False to auto-generate
    # "userProvidedEdgeIds": user_provided_edge_ids,
    "edgeOnlyLoad": True,
    # Optional parser tweaks:
    # "parserConfiguration": {"ignoreEmptyStrings": True, "allowNull": True}
}
neptune_endpoint = "db-neptune-1-road-usa.criq8uemaejw.us-west-2.neptune.amazonaws.com"
url = f"https://{neptune_endpoint}:{port}/loader"

{'source': 's3://datasets-in-out/input/road-usa/', 'format': 'csv', 'iamRoleArn': 'arn:aws:iam::063299843915:role/service-role/AWSNeptuneNotebookRole-NeptuneNbUser', 'region': 'us-west-2', 'failOnError': False, 'parallelism': 'LOW', 'queueRequest': True, 'edgeOnlyLoad': True}

resp = requests.post(url, 
                 json=payload, 
                 timeout=60, 
                 verify=VERIFY_TLS,
                 # auth=aws_auth
                 # headers=headers
                )                        
                                                            

In [42]:
resp.text

'{"detailedMessage":"Failed to start new load from the source s3://datasets-in-out/input/road-usa/. Couldn\'t find the aws credential for iam_role_arn: arn:aws:iam::063299843915:role/service-role/AWSNeptuneNotebookRole-NeptuneNbUser","code":"InvalidParameterException","requestId":"18ccf224-7385-26b1-2af3-6d6908164b57","message":"Failed to start new load from the source s3://datasets-in-out/input/road-usa/. Couldn\'t find the aws credential for iam_role_arn: arn:aws:iam::063299843915:role/service-role/AWSNeptuneNotebookRole-NeptuneNbUser"}'