In [None]:
# Movie Search - Neptune
https://aws.amazon.com/blogs/database/make-relevant-movie-recommendations-using-amazon-neptune-amazon-neptune-machine-learning-and-amazon-opensearch-service/
https://github.com/aws-samples/amazon-neptune-ontology-example-blog.git



In [2]:
!pip install -q opensearch-py

In [None]:
# Reference 
https://docs.aws.amazon.com/opensearch-service/latest/developerguide/serverless-clients.html#serverless-python

In [2]:
import boto3

REGION = boto3.Session().region_name

# Replace with your S3 bucket where you prepared the IMDB data for Neptune use
SOURCE_S3_PATH_NOSLASH=f"s3://ekg-da-poc"

# Replace with your Amazon OpenSearch Service domain endpoint
FTS_AOS_ENDPOINT="https://z9urozgne3v5p4ywe2fj.us-east-1.aoss.amazonaws.com"

VS_AOS_ENDPOINT="https://8kd01utb7q4h0s0lec3g.us-east-1.aoss.amazonaws.com"

# Replace with your IAM role for the GNN embedding pipeline.
SAGEMAKER_ROLE="arn:aws:iam::196856463470:role/SandboxServiceRole"

In [9]:
from opensearchpy import OpenSearch, RequestsHttpConnection, AWSV4SignerAuth, RequestsAWSV4SignerAuth
from opensearchpy.helpers import bulk
import boto3

service = 'aoss'
region2 = 'us-east-1'

service = 'aoss'

credentials = boto3.Session().get_credentials()
auth2 = RequestsAWSV4SignerAuth(credentials, region2, service)

vs_aos_client = OpenSearch(
    hosts = [{'host': VS_AOS_ENDPOINT.split("//")[1], 'port': 443}],
    http_auth=auth2,
    use_ssl = True,
    verify_certs = True,
    connection_class = RequestsHttpConnection,
    pool_maxsize=20,
)


In [10]:
vs_aos_client.indices.get_alias(index="*")

{'movie_sentence': {'aliases': {}},
 'python-test-index': {'aliases': {}},
 'books-index': {'aliases': {}},
 '.opensearch_dashboards_1': {'aliases': {'.opensearch_dashboards': {}}},
 'movie': {'aliases': {}},
 'movie_gnn': {'aliases': {}}}

In [None]:
#Reference
https://docs.aws.amazon.com/opensearch-service/latest/developerguide/serverless-clients.html#serverless-python
https://opensearch.org/docs/latest/clients/python-low-level/

In [9]:
# create an index
index_name = 'books-index'
create_response = vs_aos_client.indices.create(
    index_name
)

print('\nCreating index:')
print(create_response)


RequestError: RequestError(400, 'resource_already_exists_exception', 'OpenSearch exception [type=resource_already_exists_exception, reason=index [books-index/Iyy3m5EBIRMOPYHx6vl7] already exists]- server : [envoy]')

In [11]:
vs_aos_client.indices.get(index="books-index")

{'books-index': {'aliases': {},
  'mappings': {'properties': {'director': {'type': 'text',
     'fields': {'keyword': {'type': 'keyword', 'ignore_above': 256}}},
    'title': {'type': 'text',
     'fields': {'keyword': {'type': 'keyword', 'ignore_above': 256}}},
    'year': {'type': 'text',
     'fields': {'keyword': {'type': 'keyword', 'ignore_above': 256}}}}},
  'settings': {'index': {'creation_date': '1724894256589',
    'number_of_shards': '2',
    'number_of_replicas': '0',
    'uuid': 'ICy1m5EBIRMOPYHxnflq',
    'version': {'created': '135217827'},
    'provided_name': 'books-index'}}}}

In [12]:
# index a document
document = {
  'title': 'The Green Mile2',
  'director': 'Stephen King2',
  'year': '1996'
}

print('\nIndexing - index1:')

index_response1 = vs_aos_client.index(
    index = 'books-index',
    body = document,
    id = '1'
    #refresh = True
)
print(index_response1)


Indexing - index1:


RequestError: RequestError(400, 'illegal_argument_exception', 'Document ID is not supported in create/index operation request')

In [16]:
q1 = 'King'
query1 = {
  'size': 5,
  'query': {
    'multi_match': {
      'query': q1,
      'fields': ['title^2', 'director']
    }
  }
}

print('\nSearching - index1:')
search_response1 = vs_aos_client.search(
    body = query1,
    index = 'books-index'
)
print(search_response1)


Searching - index1:
{'took': 133, 'timed_out': False, '_shards': {'total': 0, 'successful': 0, 'skipped': 0, 'failed': 0}, 'hits': {'total': {'value': 1, 'relation': 'eq'}, 'max_score': 0.2876821, 'hits': [{'_index': 'books-index', '_id': '1%3A0%3AzMy6m5EBKwGr2Tq_kK-j', '_score': 0.2876821, '_source': {'title': 'The Green Mile', 'director': 'Stephen King', 'year': '1996'}}]}}


In [None]:
# delete the index
#delete_response = vs_aos_client.indices.delete(
#    index_name
#)

#print('\nDeleting index:')
#print(delete_response)

In [17]:
index_name2 = 'python-test-index'
index_body2 = {
  'settings': {
    'index': {
      'number_of_shards': 4
    }
  }
}

print('\nCreating index2:')

create_response2 = vs_aos_client.indices.create(index_name2, body=index_body2)
print(create_response2)

vs_aos_client.indices.get(index="python-test-index")




Creating index2:
{'acknowledged': True, 'shards_acknowledged': True, 'index': 'python-test-index'}


{'python-test-index': {'aliases': {},
  'mappings': {},
  'settings': {'index': {'creation_date': '1724894679298',
    'number_of_shards': '2',
    'number_of_replicas': '0',
    'uuid': 'Jiy8m5EBIRMOPYHxEPmW',
    'version': {'created': '135217827'},
    'provided_name': 'python-test-index'}}}}

In [22]:
document2 = {
  'title': 'Moneyball',
  'director': 'Bennett Miller',
  'year': '2011'
}

print('\nIndexing - index1:')

index_response2 = vs_aos_client.index(
    index = 'python-test-index',
    body = document2
    #id = '1',
    #refresh = True
)


print(index_response2)


Indexing - index1:
{'_index': 'python-test-index', '_id': '1%3A0%3Azcy9m5EBKwGr2Tq_ha-l', '_version': 1, 'result': 'created', '_shards': {'total': 0, 'successful': 0, 'failed': 0}, '_seq_no': 0, '_primary_term': 0}


In [23]:
q2 = 'Miller'
query2 = {
  'size': 5,
  'query': {
    'multi_match': {
      'query': q2,
      'fields': ['title^2', 'director']
    }
  }
}

print('\nSearching - index2:')

search_response2 = vs_aos_client.search(
    body = query2,
    index = 'python-test-index'
)
print(search_response2)


Searching - index2:
{'took': 63, 'timed_out': False, '_shards': {'total': 0, 'successful': 0, 'skipped': 0, 'failed': 0}, 'hits': {'total': {'value': 1, 'relation': 'eq'}, 'max_score': 0.2876821, 'hits': [{'_index': 'python-test-index', '_id': '1%3A0%3AUjC8m5EBSE2l0lVlv8vF', '_score': 0.2876821, '_source': {'title': 'Moneyball', 'director': 'Bennett Miller', 'year': '2011'}}]}}


In [None]:
# delete the index
#delete_response2 = vs_aos_client.indices.delete(
#    index_name2
#)

#print('\nDeleting index2:')
#print(delete_response2)