## Load configuration

    Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
    SPDX-License-Identifier: MIT-0

The configuration file should be in this format:

    neptune:
        endpoint: YOUR-NEPTUNE-ENDPOINT

Do not submit this file to version control.

In [1]:
import yaml
config = yaml.safe_load(open("config.yaml"))

In [2]:
%status

{'status': 'healthy',
 'startTime': 'Fri Apr 12 16:12:56 UTC 2024',
 'dbEngineVersion': '1.3.1.0.R1',
 'role': 'writer',
 'dfeQueryEngine': 'viaQueryHint',
 'gremlin': {'version': 'tinkerpop-3.6.4'},
 'sparql': {'version': 'sparql-1.1'},
 'opencypher': {'version': 'Neptune-9.0.20190305-1.0'},
 'labMode': {'ObjectIndex': 'disabled',
  'ReadWriteConflictDetection': 'enabled'},
 'features': {'SlowQueryLogs': 'disabled',
  'ResultCache': {'status': 'disabled'},
  'IAMAuthentication': 'disabled',
  'Streams': 'disabled',
  'AuditLog': 'disabled'},
 'settings': {'clusterQueryTimeoutInMs': '120000',
  'SlowQueryLogsThreshold': '5000'},
 'serverlessConfiguration': {'minCapacity': '1.0', 'maxCapacity': '128.0'}}

## Prereqs

Restart the kernel after running this line.

In [None]:
%pip install --upgrade --quiet boto3 botocore langchain datasets neo4j

## Populate graph

In [3]:
import boto3
dzclient = boto3.client('datazone')

In [4]:
domainId = 'ENTER YOUR DOMAIN ID HERE'

### Projects

In [5]:
response = dzclient.list_projects(
    domainIdentifier=domainId
)

In [6]:
projects = {}
for item in response['items']:
    projects[item['id']] = item['name']

In [7]:
projects

{'6m5dcvzlipijkg': 'Underwriting',
 'cy7ls2lfj0oxsg': 'Marketing',
 'aj0r6a9dx7m2kw': 'Claims',
 'bewpdtw9ctp8gg': 'Admin'}

In the next cell, the authentication parameters are not used.

In [8]:
from neo4j import GraphDatabase
uri = f"bolt://{config['neptune']['endpoint']}:8182"
with GraphDatabase.driver(uri, auth=("username", "password"), encrypted=True) as gdriver:
    for p in projects.keys():
        nprops = {'name': projects[p]}
        propstr = []
        for p in nprops.keys():
            propstr.append(f"{p}: '{nprops[p]}'")
        q = "MERGE (:Project {" + ",".join(propstr) + "})"
        print(f"Query: {q}")
        gdriver.execute_query(q)

Query: MERGE (:Project {name: 'Underwriting'})
Query: MERGE (:Project {name: 'Marketing'})
Query: MERGE (:Project {name: 'Claims'})
Query: MERGE (:Project {name: 'Admin'})


In [30]:
%%gremlin -p v
g.V().path()

Tab(children=(Output(layout=Layout(max_height='600px', max_width='940px', overflow='scroll')), Force(network=<…

### Insert assets

In [18]:
response = dzclient.search_listings(
    domainIdentifier=domainId
)

In [19]:
response['items'][0]

{'assetListing': {'createdAt': datetime.datetime(2024, 4, 12, 16, 18, 54, 207000, tzinfo=tzlocal()),
  'description': 'Custom asset 9 for asset type JohnMarketingAssetType and form JohnMarketingFormType',
  'entityId': '4d7ht4aw373spc',
  'entityRevision': '1',
  'entityType': 'JohnMarketingAssetType',
  'glossaryTerms': [],
  'listingCreatedBy': 'bf12f32a-d85b-4bcd-b330-b57064416d32',
  'listingId': 'ag59yujxqem4nk',
  'listingRevision': '2',
  'listingUpdatedBy': 'bf12f32a-d85b-4bcd-b330-b57064416d32',
  'name': 'JohnMarketingAssetType9',
  'owningProjectId': 'cy7ls2lfj0oxsg'}}

In [20]:
assets = []
asset_types = []
for r in response['items']:
    item = r['assetListing']
    try:
        assets.append({
            'name': item['name'],
            'project': item['owningProjectId'],
            'listingId': item['listingId'],
            'assetType': item['entityType'],
            'assetId': item['entityId'],
            'description': item['description']
        })
        if item['entityType'] not in asset_types:
            asset_types.append(item['entityType'])
    except:
        pass

In [21]:
assets[0]

{'name': 'JohnMarketingAssetType9',
 'project': 'cy7ls2lfj0oxsg',
 'listingId': 'ag59yujxqem4nk',
 'assetType': 'JohnMarketingAssetType',
 'assetId': '4d7ht4aw373spc',
 'description': 'Custom asset 9 for asset type JohnMarketingAssetType and form JohnMarketingFormType'}

In [23]:
projects[assets[0]['project']]

'Marketing'

In [26]:
with GraphDatabase.driver(uri, auth=("username", "password"), encrypted=True) as gdriver:
    for a in assets:
        propstr = []
        for p in a.keys():
            propstr.append(f"{p}: '{a[p]}'")
        q = "MERGE (:Asset {" + ",".join(propstr) + "})"
        print(f"Query: {q}")
        gdriver.execute_query(q)
        
        proj_name = projects[a['project']]
        propstr = ["role: 'producer'"]
        q = "MATCH (p:Project {name: '" + proj_name + "'}), (a:Asset {name: '" + a['name'] + "'}) " + \
        "CREATE (p)-[:producer {" + ",".join(propstr) + "}]->(a)"
        print(f"Query: {q}")
        gdriver.execute_query(q)

Query: MERGE (:Asset {name: 'JohnMarketingAssetType9',project: 'cy7ls2lfj0oxsg',listingId: 'ag59yujxqem4nk',assetType: 'JohnMarketingAssetType',assetId: '4d7ht4aw373spc',description: 'Custom asset 9 for asset type JohnMarketingAssetType and form JohnMarketingFormType'})
Query: MATCH (p:Project {name: 'Marketing'}), (a:Asset {name: 'JohnMarketingAssetType9'}) CREATE (p)-[:producer {role: 'producer'}]->(a)
Query: MERGE (:Asset {name: 'JohnMarketingAssetType8',project: 'cy7ls2lfj0oxsg',listingId: 'az4k2f4ii70tjk',assetType: 'JohnMarketingAssetType',assetId: '4w561m1p2lrseo',description: 'Custom asset 8 for asset type JohnMarketingAssetType and form JohnMarketingFormType'})
Query: MATCH (p:Project {name: 'Marketing'}), (a:Asset {name: 'JohnMarketingAssetType8'}) CREATE (p)-[:producer {role: 'producer'}]->(a)
Query: MERGE (:Asset {name: 'JohnMarketingAssetType7',project: 'cy7ls2lfj0oxsg',listingId: '3hyfe8n2hd82a8',assetType: 'JohnMarketingAssetType',assetId: 'd0zidga50p6vio',description: '

In [36]:
%%gremlin -p v,e
g.V().outE().inV().path()

Tab(children=(Output(layout=Layout(max_height='600px', max_width='940px', overflow='scroll')), Force(network=<…

### Subscriptions

In [46]:
subs = []
for p in projects.keys():
    response = dzclient.list_subscription_requests(
        domainIdentifier=domainId,
        owningProjectId=p
    )
    
    for item in response['items']:
        subs.append({
            'comment': item['decisionComment'],
            'id': item['id'],
            'reason': item['requestReason'],
            'status': item['status'],
            'listingId': item['subscribedListings'][0]['id'],
            'assetId': item['subscribedListings'][0]['item']['assetListing']['entityId'],
            'project': item['subscribedListings'][0]['ownerProjectId'],
            'subproject': item['subscribedPrincipals'][0]['project']['id']
        })

In [47]:
len(subs)

90

In [48]:
subs[0]

{'comment': 'optimize scalable paradigms',
 'id': '55gvrbzpl9c9w0',
 'reason': 'empower bleeding-edge vortals',
 'status': 'REJECTED',
 'listingId': '44649ilew9wo40',
 'assetId': '4z8tuhezo5ij68',
 'project': 'aj0r6a9dx7m2kw',
 'subproject': '6m5dcvzlipijkg'}

In [52]:
with GraphDatabase.driver(uri, auth=("username", "password"), encrypted=True) as gdriver:
    for s in subs:
        proj_name = projects[s['subproject']]
        propstr = []
        for p in s.keys():
            propstr.append(f"{p}: '{s[p]}'")
        q = "MATCH (p:Project {name: '" + proj_name + "'}), (a:Asset {assetId: '" + s['assetId'] + "'}) " + \
        "CREATE (p)-[:consumer {" + ",".join(propstr) + "}]->(a)"
        print(f"Query: {q}")
        gdriver.execute_query(q)

Query: MATCH (p:Project {name: 'Underwriting'}), (a:Asset {assetId: '4z8tuhezo5ij68'}) CREATE (p)-[:consumer {comment: 'optimize scalable paradigms',id: '55gvrbzpl9c9w0',reason: 'empower bleeding-edge vortals',status: 'REJECTED',listingId: '44649ilew9wo40',assetId: '4z8tuhezo5ij68',project: 'aj0r6a9dx7m2kw',subproject: '6m5dcvzlipijkg'}]->(a)
Query: MATCH (p:Project {name: 'Underwriting'}), (a:Asset {assetId: 'c7ky7hp8xd7spc'}) CREATE (p)-[:consumer {comment: 'synergize impactful vortals',id: 'dix627xvrf918w',reason: 'iterate out-of-the-box experiences',status: 'REJECTED',listingId: 'cq8pyy9y6v1jww',assetId: 'c7ky7hp8xd7spc',project: 'aj0r6a9dx7m2kw',subproject: '6m5dcvzlipijkg'}]->(a)
Query: MATCH (p:Project {name: 'Underwriting'}), (a:Asset {assetId: '41btdxlonez1cg'}) CREATE (p)-[:consumer {comment: 'engineer real-time networks',id: '5k1b3c9x7bgbgw',reason: 'productize end-to-end eyeballs',status: 'REJECTED',listingId: 'cgamebu8moik5s',assetId: '41btdxlonez1cg',project: 'aj0r6a9dx7m

In [74]:
%%gremlin -p v,e
g.V().has("name", "Underwriting").outE().inV().path()

Tab(children=(Output(layout=Layout(max_height='600px', max_width='940px', overflow='scroll')), Force(network=<…

In [78]:
%%gremlin
g.V('84e79e21-5f68-40ee-9acd-4c3c1499027d0').valueMap(true)

Tab(children=(Output(layout=Layout(max_height='600px', max_width='940px', overflow='scroll')), Force(network=<…

## Graph RAG

In [55]:
from langchain_community.graphs import NeptuneGraph

host = config['neptune']['endpoint']
port = 8182
use_https = True

graph = NeptuneGraph(host=host, port=port, use_https=use_https)

In [63]:
from langchain.llms.bedrock import Bedrock
from langchain.chains import NeptuneOpenCypherQAChain
from langchain_community.chat_models import BedrockChat

#modelId = 'anthropic.claude-v2:1' 
modelId = 'anthropic.claude-3-sonnet-20240229-v1:0'

bedrock_runtime = boto3.client(
    service_name="bedrock-runtime",
    region_name="us-west-2",
)

model_kwargs =  { 
    "max_tokens": 2048,
    "temperature": 0.0,
    "top_k": 250,
    "top_p": 1,
    "stop_sequences": ["\n\nHuman"],
}

llm = BedrockChat(
    client=bedrock_runtime,
    model_id=modelId,
    model_kwargs=model_kwargs,
)

In [64]:
chain = NeptuneOpenCypherQAChain.from_llm(llm = llm, graph=graph,verbose=True,)

In [65]:
chain.run("Which project has the most connections to assets with the connection role set to 'consumer' and a status of 'REJECTED'?")



[1m> Entering new NeptuneOpenCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (p:Project)-[c:consumer]->(a:Asset)
WHERE c.status = 'REJECTED'
WITH p, count(c) as connections
RETURN p.name, connections
ORDER BY connections DESC
LIMIT 1
[0m
Full Context:
[32;1m[1;3m[{'p.name': 'Claims', 'connections': 11}][0m

[1m> Finished chain.[0m


"The project with the name 'Claims' has 11 connections."

In [66]:
chain.run("How many connections does each Project have to an Asset?")



[1m> Entering new NeptuneOpenCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (p:Project)-[r]->(a:Asset)
RETURN p.name, count(r) AS connections
ORDER BY connections DESC;
[0m
Full Context:
[32;1m[1;3m[{'p.name': 'Marketing', 'connections': 25}, {'p.name': 'Underwriting', 'connections': 13}, {'p.name': 'Claims', 'connections': 12}][0m

[1m> Finished chain.[0m


'The Marketing Project has 25 connections to Assets, the Underwriting Project has 13 connections to Assets, and the Claims Project has 12 connections to Assets.'

In [79]:
chain.run("Do consumer connection requests from the Underwriting department to the asset named ShawnaMarketingAssetType6 usually get rejected or accepted?")



[1m> Entering new NeptuneOpenCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mMATCH (p:Project)-[c:consumer]->(a:Asset {name: 'ShawnaMarketingAssetType6'})
WHERE p.name = 'Underwriting'
RETURN c.status, count(*) AS count
ORDER BY count DESC
LIMIT 1
[0m
Full Context:
[32;1m[1;3m[{'c.status': 'REJECTED', 'count': 1}][0m

[1m> Finished chain.[0m


'Based on the provided information, consumer connection requests from the Underwriting department to the asset named ShawnaMarketingAssetType6 usually get rejected.'