In [1]:
from config import *

In [2]:
# Model Settings
MODEL_NAME="all-MiniLM-L6-v2"
user_input = "What is Coelom ?"

########################
region = 'us-east-1' 
index_name = 'ncert'
service = 'es'
aos_host = aos_host

In [3]:
from opensearchpy import OpenSearch, RequestsHttpConnection, AWSV4SignerAuth
import boto3
from requests_aws4auth import AWS4Auth
from sentence_transformers import SentenceTransformer
import pandas as pd
from azure_openai_helper import generate_answer_from_context



In [4]:
credentials = boto3.Session().get_credentials()
auth = AWSV4SignerAuth(credentials, region)


In [5]:
awsauth = AWS4Auth(credentials.access_key, credentials.secret_key, region, service, session_token=credentials.token)

In [6]:
aos_client = OpenSearch(
    hosts = [{'host': aos_host, 'port': 443}],
    http_auth = auth,
    use_ssl = True,
    verify_certs = True,
    connection_class = RequestsHttpConnection
)

In [7]:
model = SentenceTransformer(MODEL_NAME)

In [8]:
xq = model.encode([user_input],convert_to_tensor=True)
xq_list = xq.tolist()

In [9]:
xq_list[0]

[-0.0300223920494318,
 0.048663508147001266,
 -0.06351431459188461,
 0.05529145896434784,
 -0.06406121701002121,
 -0.048028770834207535,
 -0.03190869837999344,
 0.041287846863269806,
 -0.014191500842571259,
 0.01002271194010973,
 0.08567621558904648,
 -0.15061131119728088,
 -0.043759703636169434,
 0.04378319904208183,
 -0.031019968912005424,
 -0.14232447743415833,
 0.024583961814641953,
 -0.046959567815065384,
 -0.007293751463294029,
 0.0032512149773538113,
 0.03997112065553665,
 0.04799468070268631,
 -0.004991478752344847,
 0.043958742171525955,
 0.02892758697271347,
 0.02970418706536293,
 0.003424725728109479,
 0.033119771629571915,
 0.05541950464248657,
 -0.020585814490914345,
 -0.051039811223745346,
 0.017886526882648468,
 0.04796173423528671,
 0.009121406823396683,
 -0.04978470504283905,
 0.03709445893764496,
 0.004652613773941994,
 -0.0015409564366564155,
 0.028834762051701546,
 -0.014467782340943813,
 -0.08200501650571823,
 0.03564956784248352,
 0.054847799241542816,
 -0.0099688

In [10]:
query={
    "size": 10,
    "query": {
        "knn": {
            "text_vector":{
                "vector":xq_list[0],
                "k":10
            }
        }
    }
}

res = aos_client.search(index=index_name, 
                       body=query,
                       stored_fields=["text"])

In [11]:
print("Got %d Hits:" % res['hits']['total']['value'])

Got 50 Hits:


In [12]:
query_result=[]
contexts =""

In [13]:
counter = 0

In [14]:
for hit in res['hits']['hits']:
        if counter > 3:
            break
        contexts +=  hit['fields']['text'][0]+"\n---\n"
        counter += 1

In [15]:
type(contexts)

str

In [16]:
for hit in res['hits']['hits']:
    row=[hit['_id'],hit['_score'],hit['fields']['text'][0]]
    query_result.append(row)

In [17]:
query_result_df = pd.DataFrame(data=query_result,columns=["_id","_score","text"])

In [18]:
display(query_result_df)

Unnamed: 0,_id,_score,text
0,yhFl6okBM2uON6ywYSO2,0.710878,1 (a) Radial symmetry\nRationalised 2023-24\n...
1,yxFl6okBM2uON6ywYiPJ,0.625027,"In\nsome animals, the body cavity is not line..."
2,zBFl6okBM2uON6ywYyPP,0.623741,3 Diagrammatic sectional view of :\n(a) Coelom...
3,1BFl6okBM2uON6ywbCMk,0.619165,6 Examples of Coelenterata indicating outline...
4,5BFl6okBM2uON6ywfiOt,0.581922,\nThis phylum consists of a small group of wor...
5,5hFl6okBM2uON6ywgCO6,0.577049,"These are bilaterally\nsymmetrical, triplobla..."
6,9hFl6okBM2uON6ywkiNf,0.568989,Endoskeleton is fully ossified (bony) and the...
7,ARFl6okBM2uON6ywoCQY,0.566587,Aschelminthes are\npseudocoelomates and incl...
8,9xFl6okBM2uON6ywlCOX,0.565867,Air sacs connected to lungs supplement respir...
9,yBFl6okBM2uON6ywXyOl,0.56339,", any plane that passes through the centre\ndo..."


In [19]:
len(contexts)

1666

In [20]:
from pprint import pprint
pprint(contexts)

('1 (a)  Radial symmetry\n'
 'Rationalised 2023-24\n'
 'ANIMAL KINGDOM 3939\n'
 '4.1.4 Coelom\n'
 'Presence or absence of a cavity between the body\n'
 'wall and the gut wall is very important in\n'
 'classification. The body cavity, which is lined\n'
 'by mesoderm is called coelom . Animals\n'
 'possessing coelom are called coelomates,  e.g.,\n'
 'annelids, molluscs, arthropods, echinoderms,\n'
 'hemichordates and chordates (Figure 4.3a)\n'
 '---\n'
 ' In\n'
 'some animals, the body cavity is not lined by\n'
 'mesoderm, instead, the mesoderm is present as\n'
 'scattered pouches in between the ectoderm and\n'
 'endoderm. Such a body cavity is called\n'
 'pseudocoelom and the animals possessing them\n'
 'are called pseudocoelomates,  e.g.,\n'
 'aschelminthes (Figure 4.3b). The animals in\n'
 'which the body cavity is absent are called\n'
 'acoelomates,  e.g., platyhelminthes (Figure 4.3c).Figure 4\n'
 '---\n'
 '3 Diagrammatic sectional view of :\n'
 '(a) Coelomate (b) Pseudocoelomate\n'

In [21]:
import sagemaker

In [22]:
prompt = """Answer based on context:\n\n{context}\n\n{question}"""

In [23]:
text_input = prompt.replace("{context}", contexts)
text_input = text_input.replace("{question}", user_input)

In [24]:
pprint(text_input)

('Answer based on context:\n'
 '\n'
 '1 (a)  Radial symmetry\n'
 'Rationalised 2023-24\n'
 'ANIMAL KINGDOM 3939\n'
 '4.1.4 Coelom\n'
 'Presence or absence of a cavity between the body\n'
 'wall and the gut wall is very important in\n'
 'classification. The body cavity, which is lined\n'
 'by mesoderm is called coelom . Animals\n'
 'possessing coelom are called coelomates,  e.g.,\n'
 'annelids, molluscs, arthropods, echinoderms,\n'
 'hemichordates and chordates (Figure 4.3a)\n'
 '---\n'
 ' In\n'
 'some animals, the body cavity is not lined by\n'
 'mesoderm, instead, the mesoderm is present as\n'
 'scattered pouches in between the ectoderm and\n'
 'endoderm. Such a body cavity is called\n'
 'pseudocoelom and the animals possessing them\n'
 'are called pseudocoelomates,  e.g.,\n'
 'aschelminthes (Figure 4.3b). The animals in\n'
 'which the body cavity is absent are called\n'
 'acoelomates,  e.g., platyhelminthes (Figure 4.3c).Figure 4\n'
 '---\n'
 '3 Diagrammatic sectional view of :\n'
 '

In [25]:
# hyperparameters for llm
payload = {
    "inputs": text_input,
    "parameters": {
        "do_sample": True,
        "top_p": 0.9,
        "temperature": 0.1,
        "max_new_tokens": 1024,
        "stop": ["<|endoftext|>", "</s>"],
    },
}

In [26]:
payload

{'inputs': 'Answer based on context:\n\n1 (a)  Radial symmetry\nRationalised 2023-24\nANIMAL KINGDOM 3939\n4.1.4 Coelom\nPresence or absence of a cavity between the body\nwall and the gut wall is very important in\nclassification. The body cavity, which is lined\nby mesoderm is called coelom . Animals\npossessing coelom are called coelomates,  e.g.,\nannelids, molluscs, arthropods, echinoderms,\nhemichordates and chordates (Figure 4.3a)\n---\n In\nsome animals, the body cavity is not lined by\nmesoderm, instead, the mesoderm is present as\nscattered pouches in between the ectoderm and\nendoderm. Such a body cavity is called\npseudocoelom and the animals possessing them\nare called pseudocoelomates,  e.g.,\naschelminthes (Figure 4.3b). The animals in\nwhich the body cavity is absent are called\nacoelomates,  e.g., platyhelminthes (Figure 4.3c).Figure 4\n---\n3 Diagrammatic sectional view of :\n(a) Coelomate (b) Pseudocoelomate\n(c) AcoelomateThose animals in which the developing embryo 

In [27]:
endpoint_name = llm_endpoint

In [28]:
endpoint_name

'hf-llm-falcon-7b-instruct-bf16-2023-08-12-14-37-19-114'

In [29]:
def query_endpoint_with_json_payload(encoded_json, endpoint_name, content_type="application/json"):
    client = boto3.client("runtime.sagemaker")
    response = client.invoke_endpoint(
        EndpointName=endpoint_name, ContentType=content_type, Body=encoded_json
    )
    return response

In [30]:
import json

In [31]:
response2 = query_endpoint_with_json_payload(json.dumps(payload).encode("utf-8"), endpoint_name)

In [None]:
model_predictions = json.loads(response2["Body"].read())

In [None]:
model_predictions[0]['generated_text']