Skip to content

Commit

Permalink
Enhancements for Gremlin HTTP (#624)
Browse files Browse the repository at this point in the history
* Gremlin HTTP enhancments

* remove unused import

* update changelog
  • Loading branch information
michaelnchin committed Jun 18, 2024
1 parent 35a3a51 commit 9c176c0
Show file tree
Hide file tree
Showing 5 changed files with 58 additions and 21 deletions.
1 change: 1 addition & 0 deletions ChangeLog.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ Starting with v1.31.6, this file will contain a record of major features and upd

- Added `--connection-protocol` option to `%%gremlin` ([Link to PR](https://github.com/aws/graph-notebook/pull/617))
- Added global Gremlin `connection_protocol` setting to `%%graph_notebook_config` ([Link to PR](https://github.com/aws/graph-notebook/pull/621))
- Added various enhancements for `%%gremlin` HTTP connections to Neptune ([Link to PR](https://github.com/aws/graph-notebook/pull/624))
- Restored left alignment of numeric value columns in results table widget ([Link to PR](https://github.com/aws/graph-notebook/pull/620))

## Release 4.4.0 (June 10, 2024)
Expand Down
8 changes: 6 additions & 2 deletions src/graph_notebook/configuration/get_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
SparqlSection, GremlinSection, Neo4JSection
from graph_notebook.neptune.client import NEPTUNE_CONFIG_HOST_IDENTIFIERS, is_allowed_neptune_host, false_str_variants, \
DEFAULT_NEO4J_USERNAME, DEFAULT_NEO4J_PASSWORD, DEFAULT_NEO4J_DATABASE, \
NEPTUNE_DB_SERVICE_NAME, NEPTUNE_ANALYTICS_SERVICE_NAME, NEPTUNE_DB_CONFIG_NAMES, NEPTUNE_ANALYTICS_CONFIG_NAMES
NEPTUNE_DB_SERVICE_NAME, DEFAULT_WS_PROTOCOL, DEFAULT_HTTP_PROTOCOL

neptune_params = ['neptune_service', 'auth_mode', 'load_from_s3_arn', 'aws_region']
neptune_gremlin_params = ['connection_protocol']
Expand All @@ -33,11 +33,15 @@ def get_config_from_dict(data: dict, neptune_hosts: list = NEPTUNE_CONFIG_HOST_I
neptune_service = data['neptune_service'] if 'neptune_service' in data else NEPTUNE_DB_SERVICE_NAME
if 'gremlin' in data:
data['gremlin']['include_protocol'] = True
if 'connection_protocol' not in data['gremlin']:
data['gremlin']['connection_protocol'] = DEFAULT_WS_PROTOCOL \
if neptune_service == NEPTUNE_DB_SERVICE_NAME else DEFAULT_HTTP_PROTOCOL
gremlin_section = GremlinSection(**data['gremlin'])
if gremlin_section.to_dict()['traversal_source'] != 'g':
print('Ignoring custom traversal source, Amazon Neptune does not support this functionality.\n')
else:
gremlin_section = GremlinSection(include_protocol=True)
protocol = DEFAULT_WS_PROTOCOL if neptune_service == NEPTUNE_DB_SERVICE_NAME else DEFAULT_HTTP_PROTOCOL
gremlin_section = GremlinSection(include_protocol=True, connection_protocol=protocol)
if neo4j_section.to_dict()['username'] != DEFAULT_NEO4J_USERNAME \
or neo4j_section.to_dict()['password'] != DEFAULT_NEO4J_PASSWORD:
print('Ignoring Neo4J custom authentication, Amazon Neptune does not support this functionality.\n')
Expand Down
21 changes: 13 additions & 8 deletions src/graph_notebook/magics/graph_magic.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,15 +44,16 @@
neptune_db_only, neptune_graph_only
from graph_notebook.magics.ml import neptune_ml_magic_handler, generate_neptune_ml_parser
from graph_notebook.magics.streams import StreamViewer
from graph_notebook.neptune.client import ClientBuilder, Client, PARALLELISM_OPTIONS, PARALLELISM_HIGH, \
from graph_notebook.neptune.client import (ClientBuilder, Client, PARALLELISM_OPTIONS, PARALLELISM_HIGH, \
LOAD_JOB_MODES, MODE_AUTO, FINAL_LOAD_STATUSES, SPARQL_ACTION, FORMAT_CSV, FORMAT_OPENCYPHER, FORMAT_NTRIPLE, \
DB_LOAD_TYPES, ANALYTICS_LOAD_TYPES, VALID_BULK_FORMATS, VALID_INCREMENTAL_FORMATS, \
FORMAT_NQUADS, FORMAT_RDFXML, FORMAT_TURTLE, STREAM_RDF, STREAM_PG, STREAM_ENDPOINTS, \
NEPTUNE_CONFIG_HOST_IDENTIFIERS, is_allowed_neptune_host, \
STATISTICS_LANGUAGE_INPUTS, STATISTICS_LANGUAGE_INPUTS_SPARQL, STATISTICS_MODES, SUMMARY_MODES, \
SPARQL_EXPLAIN_MODES, OPENCYPHER_EXPLAIN_MODES, OPENCYPHER_PLAN_CACHE_MODES, OPENCYPHER_DEFAULT_TIMEOUT, \
OPENCYPHER_STATUS_STATE_MODES, normalize_service_name, GRAPH_PG_INFO_METRICS, \
DEFAULT_GREMLIN_PROTOCOL, GREMLIN_PROTOCOL_FORMATS, DEFAULT_HTTP_PROTOCOL, normalize_protocol_name
SPARQL_EXPLAIN_MODES, OPENCYPHER_EXPLAIN_MODES, GREMLIN_EXPLAIN_MODES, \
OPENCYPHER_PLAN_CACHE_MODES, OPENCYPHER_DEFAULT_TIMEOUT, OPENCYPHER_STATUS_STATE_MODES, \
normalize_service_name, GRAPH_PG_INFO_METRICS, \
DEFAULT_GREMLIN_PROTOCOL, GREMLIN_PROTOCOL_FORMATS, DEFAULT_HTTP_PROTOCOL, normalize_protocol_name)
from graph_notebook.network import SPARQLNetwork
from graph_notebook.network.gremlin.GremlinNetwork import parse_pattern_list_str, GremlinNetwork
from graph_notebook.visualization.rows_and_columns import sparql_get_rows_and_columns, opencypher_get_rows_and_columns
Expand Down Expand Up @@ -534,7 +535,7 @@ def stream_viewer(self, line):

language = args.language
limit = args.limit
uri = self.client.get_uri_with_port()
uri = self.client.get_uri(include_port=True)
viewer = StreamViewer(self.client, uri, language, limit=limit)
viewer.show()

Expand Down Expand Up @@ -1034,8 +1035,9 @@ def gremlin(self, line, cell, local_ns: dict = None):
f'If not specified, defaults to the value of the gremlin.connection_protocol field '
f'in %graph_notebook_config. Please note that this option has no effect on the '
f'Profile and Explain modes, which must use HTTP.')
parser.add_argument('--explain-type', type=str.lower, default='',
help='Explain mode to use when using the explain query mode.')
parser.add_argument('--explain-type', type=str.lower, default='dynamic',
help=f'Explain mode to use when using the explain query mode. '
f'Accepted values: {GREMLIN_EXPLAIN_MODES}')
parser.add_argument('-p', '--path-pattern', default='', help='path pattern')
parser.add_argument('-g', '--group-by', type=str, default='',
help='Property used to group nodes (e.g. code, T.region) default is T.label')
Expand Down Expand Up @@ -1074,6 +1076,8 @@ def gremlin(self, line, cell, local_ns: dict = None):
'TinkerPop driver "Serializers" enum values. Default is GRAPHSON_V3_UNTYPED')
parser.add_argument('--profile-indexOps', action='store_true', default=False,
help='Show a detailed report of all index operations.')
parser.add_argument('--profile-debug', action='store_true', default=False,
help='Enable debug mode.')
parser.add_argument('--profile-misc-args', type=str, default='{}',
help='Additional profile options, passed in as a map.')
parser.add_argument('-sp', '--stop-physics', action='store_true', default=False,
Expand Down Expand Up @@ -1154,7 +1158,8 @@ def gremlin(self, line, cell, local_ns: dict = None):
profile_args = {"profile.results": args.profile_no_results,
"profile.chop": args.profile_chop,
"profile.serializer": serializer,
"profile.indexOps": args.profile_indexOps}
"profile.indexOps": args.profile_indexOps,
"profile.debug": args.profile_debug}
try:
profile_misc_args_dict = json.loads(args.profile_misc_args)
profile_args.update(profile_misc_args_dict)
Expand Down
7 changes: 5 additions & 2 deletions src/graph_notebook/magics/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,8 +204,11 @@ def build_gremlin_metadata_from_query(query_type: str, results: any, res: Respon
if query_type == 'explain':
gremlin_metadata = create_propertygraph_metadata_obj('explain')
gremlin_metadata.set_request_metrics(res)
gremlin_metadata.set_metric_value('predicates', int((re.search(r'# of predicates: (.*?)\n', results).group(1))
.replace(".", '').replace(",", '')))
try:
gremlin_metadata.set_metric_value('predicates', int((re.search(r'# of predicates: (.*?)\n', results).group(1))
.replace(".", '').replace(",", '')))
except AttributeError:
pass
return gremlin_metadata
elif query_type == 'profile':
gremlin_metadata = create_propertygraph_metadata_obj('profile')
Expand Down
42 changes: 33 additions & 9 deletions src/graph_notebook/neptune/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,7 @@

SPARQL_EXPLAIN_MODES = ['dynamic', 'static', 'details']
OPENCYPHER_EXPLAIN_MODES = ['dynamic', 'static', 'details']
GREMLIN_EXPLAIN_MODES = ['dynamic', 'static', 'details']
OPENCYPHER_PLAN_CACHE_MODES = ['auto', 'enabled', 'disabled']
OPENCYPHER_DEFAULT_TIMEOUT = 120000
OPENCYPHER_STATUS_STATE_MODES = ['ALL', 'RUNNING', 'WAITING', 'CANCELLING']
Expand Down Expand Up @@ -257,7 +258,7 @@ def is_neptune_domain(self):
def is_analytics_domain(self):
return self.service == NEPTUNE_ANALYTICS_SERVICE_NAME

def get_uri_with_port(self, use_websocket=False, use_proxy=False):
def get_uri(self, use_websocket=False, use_proxy=False, include_port=True):
if use_websocket is True:
protocol = self._ws_protocol
else:
Expand All @@ -270,7 +271,9 @@ def get_uri_with_port(self, use_websocket=False, use_proxy=False):
uri_host = self.target_host
uri_port = self.target_port

uri = f'{protocol}://{uri_host}:{uri_port}'
uri = f'{protocol}://{uri_host}'
if include_port:
uri += f':{uri_port}'
return uri

def get_graph_id(self):
Expand Down Expand Up @@ -347,9 +350,9 @@ def sparql_cancel(self, query_id: str, silent: bool = False):
def get_gremlin_connection(self, transport_kwargs) -> client.Client:
nest_asyncio.apply()

ws_url = f'{self.get_uri_with_port(use_websocket=True, use_proxy=False)}/gremlin'
ws_url = f'{self.get_uri(use_websocket=True, use_proxy=False)}/gremlin'
if self.proxy_host != '':
proxy_http_url = f'{self.get_uri_with_port(use_websocket=False, use_proxy=True)}/gremlin'
proxy_http_url = f'{self.get_uri(use_websocket=False, use_proxy=True)}/gremlin'
transport_factory_args = lambda: AiohttpTransport(call_from_event_loop=True, proxy=proxy_http_url,
**transport_kwargs)
request = self._prepare_request('GET', proxy_http_url)
Expand Down Expand Up @@ -387,9 +390,17 @@ def gremlin_http_query(self, query, headers=None) -> requests.Response:
if headers is None:
headers = {}

data = {}
use_proxy = True if self.proxy_host != '' else False
uri = f'{self.get_uri_with_port(use_websocket=False, use_proxy=use_proxy)}/gremlin'
data = {'gremlin': query}
if self.is_analytics_domain():
uri = f'{self.get_uri(use_websocket=False, use_proxy=use_proxy, include_port=False)}/queries'
data['language'] = 'gremlin'
data['gremlin'] = query
headers['content-type'] = 'application/json'
else:
uri = f'{self.get_uri(use_websocket=False, use_proxy=use_proxy)}/gremlin'
data['gremlin'] = query

req = self._prepare_request('POST', uri, data=json.dumps(data), headers=headers)
res = self._http_session.send(req, verify=self.ssl_verify)
return res
Expand All @@ -412,12 +423,25 @@ def gremlin_profile(self, query: str, args={}) -> requests.Response:
return self._gremlin_query_plan(query=query, plan_type='profile', args=args)

def _gremlin_query_plan(self, query: str, plan_type: str, args: dict, ) -> requests.Response:
url = f'{self._http_protocol}://{self.host}:{self.port}/gremlin/{plan_type}'
data = {'gremlin': query}
data = {}
headers = {}
url = f'{self._http_protocol}://{self.host}'
if self.is_analytics_domain():
url += '/queries'
data['gremlin'] = query
data['language'] = 'gremlin'
headers['content-type'] = 'application/json'
if plan_type == 'explain':
data['explain.mode'] = args.pop('explain.mode')
elif plan_type == 'profile':
data['profile.debug'] = args.pop('profile.debug')
else:
url += f':{self.port}/gremlin/{plan_type}'
data['gremlin'] = query
if args:
for param, value in args.items():
data[param] = value
req = self._prepare_request('POST', url, data=json.dumps(data))
req = self._prepare_request('POST', url, data=json.dumps(data), headers=headers)
res = self._http_session.send(req, verify=self.ssl_verify)
return res

Expand Down

0 comments on commit 9c176c0

Please sign in to comment.