Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Upgrade to Gremlin-Python 3.7 #597

Merged
merged 5 commits into from
Jun 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions ChangeLog.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ Starting with v1.31.6, this file will contain a record of major features and upd
## Upcoming
- Added `%reset_graph` line magic ([Link to PR](https://github.com/aws/graph-notebook/pull/610))
- Added `%get_graph` line magic and enabled `%status` for Neptune Analytics ([Link to PR](https://github.com/aws/graph-notebook/pull/611))
- Upgraded to Gremlin-Python 3.7 ([Link to PR](https://github.com/aws/graph-notebook/pull/597))

## Release 4.3.1 (June 3, 2024)

Expand Down
4 changes: 2 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ jupyterlab_widgets>=1.0.0,<3.0.0
nbclient<=0.7.0
jupyter-contrib-nbextensions<=0.7.0
widgetsnbextension<=3.6.1
gremlinpython>=3.5.1,<=3.6.2
requests>=2.32.0,<=2.32.2
gremlinpython>=3.5.1,<=3.7.2
requests>=2.27.0,<=2.31.0
ipython>=7.16.1,<=8.10.0
ipykernel==5.3.4
ipyfilechooser==0.6.0
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ def get_version():
package_dir={'': 'src'},
include_package_data=True,
install_requires=[
'gremlinpython>=3.5.1,<=3.6.2',
'gremlinpython>=3.5.1,<=3.7.2',
'SPARQLWrapper==2.0.0',
'requests>=2.32.0,<=2.32.2',
'ipywidgets==7.7.2',
Expand Down
1 change: 1 addition & 0 deletions src/graph_notebook/configuration/generate_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ def __init__(self, traversal_source: str = '', username: str = '', password: str
traversal_source = DEFAULT_GREMLIN_TRAVERSAL_SOURCE

serializer_lower = message_serializer.lower()
# TODO: Update with untyped serializers once supported in GremlinPython
if serializer_lower == '':
message_serializer = DEFAULT_GREMLIN_SERIALIZER
elif serializer_lower in GRAPHSONV3_VARIANTS:
Expand Down
16 changes: 10 additions & 6 deletions src/graph_notebook/magics/graph_magic.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,12 +135,16 @@
SEED_NO_DATASETS_FOUND_MSG = "(No datasets available)"
SEED_WIDGET_STYLE = {'description_width': '95px'}

# Tokens as currently defined in TinkerPop 3.7: https://github.com/apache/tinkerpop/blob/3.7-dev/gremlin-util/src/main/java/org/apache/tinkerpop/gremlin/util/ser/SerTokens.java
serializers_map = {
"MIME_JSON": "application/json",
"GRAPHSON_V2D0": "application/vnd.gremlin-v2.0+json",
"GRAPHSON_V3D0": "application/vnd.gremlin-v3.0+json",
"GRYO_V3D0": "application/vnd.gremlin-v3.0+gryo",
"GRAPHBINARY_V1D0": "application/vnd.graphbinary-v1.0"
"GRAPHSON_V1": "application/vnd.gremlin-v1.0+json",
"GRAPHSON_V1_UNTYPED": "application/vnd.gremlin-v1.0+json;types=false",
"GRAPHSON_V2": "application/vnd.gremlin-v2.0+json",
"GRAPHSON_V2_UNTYPED": "application/vnd.gremlin-v2.0+json;types=false",
"GRAPHSON_V3": "application/vnd.gremlin-v3.0+json",
"GRAPHSON_V3_UNTYPED": "application/vnd.gremlin-v3.0+json;types=false",
"GRAPHBINARY_V1": "application/vnd.graphbinary-v1.0"
}

DEFAULT_NAMEDGRAPH_URI = "http://aws.amazon.com/neptune/vocab/v01/DefaultNamedGraph"
Expand Down Expand Up @@ -1057,9 +1061,9 @@ def gremlin(self, line, cell, local_ns: dict = None):
'the profile report by default.')
parser.add_argument('--profile-chop', type=int, default=250,
help='Property to specify max length of profile results string. Default is 250')
parser.add_argument('--profile-serializer', type=str, default='application/json',
parser.add_argument('--profile-serializer', type=str, default='GRAPHSON_V3_UNTYPED',
help='Specify how to serialize results. Allowed values are any of the valid MIME type or '
'TinkerPop driver "Serializers" enum values. Default is application/json')
'TinkerPop driver "Serializers" enum values. Default is GRAPHSON_V3_UNTYPED')
parser.add_argument('--profile-indexOps', action='store_true', default=False,
help='Show a detailed report of all index operations.')
parser.add_argument('--profile-misc-args', type=str, default='{}',
Expand Down
6 changes: 3 additions & 3 deletions src/graph_notebook/neptune/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,9 +118,9 @@

false_str_variants = [False, 'False', 'false', 'FALSE']

GRAPHSONV3_VARIANTS = ['graphsonv3', 'graphsonv3d0', 'graphsonserializersv3d0']
GRAPHSONV2_VARIANTS = ['graphsonv2', 'graphsonv2d0', 'graphsonserializersv2d0']
GRAPHBINARYV1_VARIANTS = ['graphbinaryv1', 'graphbinary', 'graphbinaryserializersv1']
GRAPHSONV3_VARIANTS = ['graphsonv3', 'graphsonv3d0', 'graphsonserializersv3d0', 'graphsonmessageserializerv3']
GRAPHSONV2_VARIANTS = ['graphsonv2', 'graphsonv2d0', 'graphsonserializersv2d0', 'graphsonmessageserializerv2']
GRAPHBINARYV1_VARIANTS = ['graphbinaryv1', 'graphbinary', 'graphbinaryserializersv1', 'graphbinarymessageserializerv1']

STATISTICS_MODES = ["", "status", "disableAutoCompute", "enableAutoCompute", "refresh", "delete"]
SUMMARY_MODES = ["", "basic", "detailed"]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,5 @@ Results
=======
Count: 999.999
Output: [v[3], v[3600], v[3614], v[4], v[5], v[6], v[7], v[8], v[9], v[10], v[11], v[12], v[47], v[49], v[136], v[13], v[15], v[16], v[17], v[18], v[389], v[20], v[21], v[22], v[23], v[24], v[25], v[26], v[27], v[28], v[416], v[29], v[30], v[430], v[31], v[9...
Response serializer: GRYO_V3D0
Response serializer: application/vnd.gremlin-v3.0+json
Response size (bytes): 23566
120 changes: 63 additions & 57 deletions test/unit/graph_magic/gremlin_profile_sample_response.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,100 +4,106 @@

Query String
==================
g.V().hasLabel("airport").has("code", "AUS").emit().repeat(in().simplePath()).times(2).limit(100)
g.V().has('code','LHR').outE().inV().path().by(valueMap(true)).limit(5)


Original Traversal
==================
[GraphStep(vertex,[]), HasStep([~label.eq(airport), code.eq(AUS)]), RepeatStep(emit(true),[VertexStep(IN,vertex), PathFilterStep(simple), RepeatEndStep],until(loops(2))), RangeGlobalStep(0,100)]
[GraphStep(vertex,[]), HasStep([code.eq(LHR)]), VertexStep(OUT,edge), EdgeVertexStep(IN), PathStep([[PropertyMapStep(value)]]), RangeGlobalStep(0,5)]

Optimized Traversal
===================
Neptune steps:
[
NeptuneGraphQueryStep(Vertex) {
JoinGroupNode {
PatternNode[(?1, <code>, "AUS", ?) . project ?1 .], {estimatedCardinality=1, indexTime=84, hashJoin=true, joinTime=3, actualTotalOutput=1}
PatternNode[(?1, <~label>, ?2=<airport>, <~>) . project ask .], {estimatedCardinality=3374, indexTime=29, hashJoin=true, joinTime=0, actualTotalOutput=61}
RepeatNode {
Repeat {
PatternNode[(?3, ?5, ?1, ?6) . project ?1,?3 . IsEdgeIdFilter(?6) . SimplePathFilter(?1, ?3)) .], {hashJoin=true, estimatedCardinality=50148, indexTime=0, joinTime=3}
}
Emit {
Filter(true)
}
LoopsCondition {
LoopsFilter([?1, ?3],eq(2))
PatternNode[VP(?1, <code>, "LHR", <~>) . project ?1 .], {estimatedCardinality=1, expectedTotalOutput=1, indexTime=0, joinTime=0, numSearches=1, actualTotalOutput=1}
PatternNode[EL(?1, ?5, ?3, ?6) . project ?1,?6,?3 . IsEdgeIdFilter(?6) .], {estimatedCardinality=INFINITY, expectedTotalInput=1, indexTime=0, joinTime=0, numSearches=1}
}, finishers=[limit(5)], {path=[Vertex(?1):GraphStep, Edge(?6,?1,?3):VertexStep, Vertex(?3):EdgeVertexStep], joinStats=true, optimizationTime=2, maxVarId=15, executionTime=14}
},
NeptunePathStep {
NeptunePropertyMapStep {
JoinGroupNode {
PatternNode[VP(?1, ?8, ?9, <~>) .]
}, {initialValues={?1=null, ?3=null, ?6=null}}
},
NeptunePropertyMapStep {
JoinGroupNode {
UnionNode {
PatternNode[EP(?6, ?10, ?11, <~>) .], {estimatedCardinality=63580}
PatternNode[EL(?, ?12, ?, ?6) .], {estimatedCardinality=INFINITY}
}
}, annotations={repeatMode=BFS, emitFirst=true, untilFirst=false, leftVar=?1, rightVar=?3}
}, finishers=[limit(100)], annotations={path=[Vertex(?1):GraphStep, Repeat[Vertex(?3):VertexStep]], joinStats=true, optimizationTime=495, maxVarId=7, executionTime=323}
}, {initialValues={?1=null, ?3=null, ?6=null}}
},
NeptunePropertyMapStep {
JoinGroupNode {
PatternNode[VP(?3, ?13, ?14, <~>) .]
}, {initialValues={?1=null, ?3=null, ?6=null}}
}
},
NeptuneTraverserConverterStep
]


Physical Pipeline
=================
NeptuneGraphQueryStep
|-- StartOp
|-- JoinGroupOp
|-- SpoolerOp(100)
|-- DynamicJoinOp(PatternNode[(?1, <code>, "AUS", ?) . project ?1 .], {estimatedCardinality=1, indexTime=84, hashJoin=true})
|-- SpoolerOp(100)
|-- DynamicJoinOp(PatternNode[(?1, <~label>, ?2=<airport>, <~>) . project ask .], {estimatedCardinality=3374, indexTime=29, hashJoin=true})
|-- RepeatOp
|-- <upstream input> (Iteration 0) [visited=1, output=1 (until=0, emit=1), next=1]
|-- BindingSetQueue (Iteration 1) [visited=61, output=61 (until=0, emit=61), next=61]
|-- SpoolerOp(100)
|-- DynamicJoinOp(PatternNode[(?3, ?5, ?1, ?6) . project ?1,?3 . IsEdgeIdFilter(?6) . SimplePathFilter(?1, ?3)) .], {hashJoin=true, estimatedCardinality=50148, indexTime=0})
|-- BindingSetQueue (Iteration 2) [visited=38, output=38 (until=38, emit=0), next=0]
|-- SpoolerOp(100)
|-- DynamicJoinOp(PatternNode[(?3, ?5, ?1, ?6) . project ?1,?3 . IsEdgeIdFilter(?6) . SimplePathFilter(?1, ?3)) .], {hashJoin=true, estimatedCardinality=50148, indexTime=0})
|-- LimitOp(100)
|-- JoinGroupOp@2f500db7
|-- DynamicJoinOp@78b965ae(PipelineJoinOp(PatternNode[VP(?1, <code>, "LHR", <~>) . project ?1 .], {estimatedCardinality=1, expectedTotalOutput=1}))
|-- SpoolerOp(100, @78b965ae, null)
|-- DynamicJoinOp@67012ec9(PipelineJoinOp(PatternNode[EL(?1, ?5, ?3, ?6) . project ?1,?6,?3 . IsEdgeIdFilter(?6) .], {estimatedCardinality=INFINITY, expectedTotalInput=1}))
|-- LimitOp(5)

NeptunePathStep
|-- BindingSetQueue
|-- JoinGroupOp@6a996745
|-- DynamicJoinOp@417c3e4f(PipelineJoinOp(PatternNode[VP(?1, ?8, ?9, <~>) .]))

|-- BindingSetQueue
|-- JoinGroupOp@2b0c033
|-- UnionOp@44764c1
|-- MultiplexerOp

|-- BindingSetQueue
|-- JoinGroupOp@60bf88a9
|-- DynamicJoinOp@32903d93(PipelineJoinOp(PatternNode[VP(?3, ?13, ?14, <~>) .]))

Runtime (ms)
============
Query Execution: 392.686
Serialization: 2636.380
Query Execution: 18.669
Serialization: 15.464

Traversal Metrics
=================
Step Count Traversers Time (ms) % Dur
-------------------------------------------------------------------------------------------------------------
NeptuneGraphQueryStep(Vertex) 100 100 314.162 82.78
NeptuneTraverserConverterStep 100 100 65.333 17.22
>TOTAL - - 379.495 -

Repeat Metrics
==============
Iteration Visited Output Until Emit Next
------------------------------------------------------
0 1 1 0 1 1
1 61 61 0 61 61
2 38 38 38 0 0
------------------------------------------------------
100 100 38 62 62
NeptuneGraphQueryStep(Vertex) 5 5 2.152 20.56
NeptunePathStep([[NeptunePropertyMapStep], [Nep... 5 5 4.317 41.25
NeptuneTraverserConverterStep 5 5 3.997 38.19
>TOTAL - - 10.466 -

Predicates
==========
# of predicates: 16

WARNING: reverse traversal with no edge label(s) - .in() / .both() may impact query performance
# of predicates: 18

Results
=======
Count: 100
Output: [v[3], v[3600], v[3614], v[4], v[5], v[6], v[7], v[8], v[9], v[10], v[11], v[12], v[47], v[49], v[136], v[13], v[15], v[16], v[17], v[18], v[389], v[20], v[21], v[22], v[23], v[24], v[25], v[26], v[27], v[28], v[416], v[29], v[30], v[430], v[31], v[9...
Response serializer: GRYO_V3D0
Response size (bytes): 23566
Count: 5
Output: [path[{country=[UK], code=[LHR], longest=[12799], city=[London], lon=[-0.461941003799], type=[airport], label=airport, elev=[83], icao=[EGLL], id=49, runways=[2], region=[GB-ENG], lat=[51.4706001282], desc=[London Heathrow]}, {dist=3533, id=9144, lab...
Response serializer: application/vnd.gremlin-v3.0+json
Response size (bytes): 10162


Index Operations
================
Query execution:
# of statement index ops: 3
# of unique statement index ops: 3
# of statement index ops: 18
# of unique statement index ops: 18
Duplication ratio: 1.0
# of terms materialized: 0
Serialization:
# of statement index ops: 200
# of unique statement index ops: 140
Duplication ratio: 1.43
# of terms materialized: 393
# of statement index ops: 18
# of unique statement index ops: 18
Duplication ratio: 1.0
# of terms materialized: 0
24 changes: 12 additions & 12 deletions test/unit/graph_magic/metadata_gremlin_profile.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,20 +11,20 @@
class TestMetadataClassFunctions(unittest.TestCase):

def test_gremlin_profile_metadata_func(self):
time_expected = 392.686
predicates_expected = 16
results_num_expected = 100
serialization_expected = 2636.380
serializer_type_expected = "GRYO_V3D0"
results_size_expected = 23566
query_total_index_ops_expected = 3
query_unique_index_ops_expected = 3
time_expected = 18.669
predicates_expected = 18
results_num_expected = 5
serialization_expected = 15.464
serializer_type_expected = "application/vnd.gremlin-v3.0+json"
results_size_expected = 10162
query_total_index_ops_expected = 18
query_unique_index_ops_expected = 18
query_duplication_ratio_expected = 1
query_terms_materialized_expected = 0
seri_total_index_ops_expected = 200
seri_unique_index_ops_expected = 140
seri_duplication_ratio_expected = 1.43
seri_terms_materialized_expected = 393
seri_total_index_ops_expected = 18
seri_unique_index_ops_expected = 18
seri_duplication_ratio_expected = 1.0
seri_terms_materialized_expected = 0

gremlin_metadata = Metadata()
with open('gremlin_profile_sample_response.txt', 'r') as profile_file:
Expand Down
Loading