## KNN Search

##### Install prerequisites

In [2]:
%%capture 

!pip install PyYAML

#### Imports

In [13]:
from requests.auth import HTTPBasicAuth
import requests
import logging 
import yaml

##### Setup logging

In [14]:
logger = logging.getLogger('sagemaker')
logger.setLevel(logging.DEBUG)
logger.addHandler(logging.StreamHandler())

##### Log versions of dependencies

In [15]:
logger.info(f'Using requests=={requests.__version__}')
logger.info(f'Using pyyaml=={yaml.__version__}')

Using requests==2.28.2
Using requests==2.28.2
Using pyyaml==5.4.1
Using pyyaml==5.4.1


#### Setup essentials 

In [16]:
with open('config.yml', 'r') as file:
    config = yaml.safe_load(file)

es_username = config['credentials']['username']
es_password = config['credentials']['password']

domain_endpoint = config['domain']['endpoint']
domain_index = config['domain']['index']

In [17]:
URL = f'{domain_endpoint}/{domain_index}'
logger.info(f'URL for Elasticsearch index = {URL}')

URL for Elasticsearch index = https://search-semantic-search-hryn56c5jy43yryimohz4ajvyi.us-east-1.es.amazonaws.com/legal-docs
URL for Elasticsearch index = https://search-semantic-search-hryn56c5jy43yryimohz4ajvyi.us-east-1.es.amazonaws.com/legal-docs


##### Define the index mapping with a k-NN vector field

In [18]:
mapping = {
    'settings': {
        'index': {
            'knn': True  # Enable k-NN search for this index
        }
    },
    'mappings': {
        'properties': {
            'embedding': {  # k-NN vector field
                'type': 'knn_vector',
                'dimension': 5  # Dimension of the vector
            },
            'name': {
                'type': 'keyword'
            }
        }
    }
}







##### Create the index with the specified mapping

In [28]:
# Check if the index exists using an HTTP HEAD request
response = requests.head(URL, auth=HTTPBasicAuth(es_username, es_password))

# If the index does not exist (status code 404), create the index
if response.status_code == 404:
    response = requests.put(url, auth=HTTPBasicAuth(es_username, es_password), json=mapping)
    logger.info(f'Index created: {response.text}')
else:
    logger.error('Index already exists!')

Index already exists!
Index already exists!


#### Index a sample document with a vector

In [31]:
document = {
    'name': '1.txt',
    'content': 'judgement on mumbai riots',
    'embedding': [0.3255, 0.2829, 0.0322, 0.1145, 0.5632]
}
response = requests.post(f'{URL}/_doc/1', auth=HTTPBasicAuth(es_username, es_password), json=document)

{"_index":"legal-docs","_type":"_doc","_id":"1","_version":1,"result":"created","_shards":{"total":2,"successful":2,"failed":0},"_seq_no":0,"_primary_term":1}
{"_index":"legal-docs","_type":"_doc","_id":"1","_version":1,"result":"created","_shards":{"total":2,"successful":2,"failed":0},"_seq_no":0,"_primary_term":1}
