## Get Top-k matching chunks 

In [None]:
%%capture

!pip install cohere-sagemaker

In [2]:
from cohere_sagemaker import CohereError
from cohere_sagemaker import Client
from requests.auth import HTTPBasicAuth
import requests
import logging 
import boto3
import yaml
import json
import os

In [3]:
logger = logging.getLogger('sagemaker')
logger.setLevel(logging.DEBUG)
logger.addHandler(logging.StreamHandler())

In [4]:
logger.info(f'Using requests=={requests.__version__}')
logger.info(f'Using pyyaml=={yaml.__version__}')

Using requests==2.28.2
Using pyyaml==6.0


In [5]:
ENCODER_ENDPOINT_NAME = 'jumpstart-dft-hf-textembedding-gpt-j-6b-fp16'
LLM_ENDPOINT_NAME = 'cohere-medium-1679931302'
CHUNKS_DIR_PATH = './data/chunks'
client = boto3.client('runtime.sagemaker')
llm_client = Client(endpoint_name=LLM_ENDPOINT_NAME)

In [6]:
with open('config.yml', 'r') as file:
    config = yaml.safe_load(file)

es_username = config['credentials']['username']
es_password = config['credentials']['password']

domain_endpoint = config['domain']['endpoint']
domain_index = config['domain']['index']

In [7]:
URL = f'{domain_endpoint}/{domain_index}/_search'
logger.info(f'URL for Elasticsearch index = {URL}')

URL for Elasticsearch index = https://search-semantic-search-hryn56c5jy43yryimohz4ajvyi.us-east-1.es.amazonaws.com/passages/_search


https://docs.aws.amazon.com/opensearch-service/latest/developerguide/knn.html

In [8]:
prompt = 'What is the punishment for bribing a government officer in India?'

In [9]:
payload = {'text_inputs': [prompt]}
payload = json.dumps(payload).encode('utf-8')
response = client.invoke_endpoint(EndpointName=ENCODER_ENDPOINT_NAME, 
                                      ContentType='application/json', 
                                      Body=payload)
model_predictions = json.loads(response['Body'].read())
embedding = model_predictions['embedding'][0]

In [10]:
k = 3

In [11]:
query = {
    'size': k,
    'query': {
        'knn': {
          'embedding': {
            'vector': embedding,
            'k': k
          }
        }
      }
    }

In [12]:
response = requests.post(URL, auth=HTTPBasicAuth(es_username, es_password), json=query)
response_json = response.json()
hits = response_json['hits']['hits']

In [14]:
for hit in hits:
    confidence = hit['_score']
    chunk = hit['_source']['passage']
    doc_id = hit['_source']['doc_id']
    passage_id = hit['_source']['passage_id']
    qa_prompt = f'Context={chunk}\nQuestion={prompt}\nAnswer='
    response = llm_client.generate(prompt=prompt, 
                                   max_tokens=512, 
                                   temperature=0.25, 
                                   return_likelihoods='GENERATION')
    answer = response.generations[0].text.strip().replace('\n', '')
    logger.info(f'Answer: {answer}')
    logger.info(f'Ref: [doc={doc_id} | passage={passage_id}]')
    logger.info(f'Confidence: {confidence}')
    
if not hits:
    logger.warn('No matching documents found!')


Answer: The punishment for bribing a government officer in India is a jail term of at least two years.
Ref: [doc=3817|chunk=12]
Confidence: 0.6849632
Answer: The punishment for bribing a government officer in India is a jail term of up to 10 years.
Ref: [doc=6354|chunk=10]
Confidence: 0.6752653
Answer: The punishment for bribing a government officer in India is a jail term of up to 10 years.
Ref: [doc=6301|chunk=10]
Confidence: 0.6734458
