In [156]:
# Model Settings
MODEL_NAME="all-MiniLM-L6-v2"
user_input = "What is Coelom"

########################
region = 'us-east-1' 
index_name = 'ncert'
service = 'es'
aos_host = "search-biology-j6cjabt44maa5ju4lbxfca3jmq.us-east-1.es.amazonaws.com"

In [157]:
from opensearchpy import OpenSearch, RequestsHttpConnection, AWSV4SignerAuth
import boto3
from requests_aws4auth import AWS4Auth
from sentence_transformers import SentenceTransformer
import pandas as pd
from azure_openai_helper import generate_answer_from_context

In [158]:
credentials = boto3.Session().get_credentials()
auth = AWSV4SignerAuth(credentials, region)


In [159]:
awsauth = AWS4Auth(credentials.access_key, credentials.secret_key, region, service, session_token=credentials.token)

In [160]:
aos_client = OpenSearch(
    hosts = [{'host': aos_host, 'port': 443}],
    http_auth = auth,
    use_ssl = True,
    verify_certs = True,
    connection_class = RequestsHttpConnection
)

In [161]:
model = SentenceTransformer(MODEL_NAME)

In [162]:
xq = model.encode([user_input],convert_to_tensor=True)
xq_list = xq.tolist()

In [163]:
xq_list[0]

[-0.042863715440034866,
 0.041995562613010406,
 -0.056733760982751846,
 0.027966050431132317,
 -0.03282560035586357,
 -0.053680047392845154,
 -0.053033631294965744,
 0.02711592987179756,
 -0.0007371108513325453,
 0.026679690927267075,
 0.05748998746275902,
 -0.14638976752758026,
 -0.042849037796258926,
 0.05368601158261299,
 -0.022275950759649277,
 -0.1332804411649704,
 0.0108184814453125,
 -0.05613688379526138,
 0.0021505674812942743,
 0.008198272436857224,
 0.03028068318963051,
 0.038957685232162476,
 -0.019268125295639038,
 0.025642411783337593,
 0.044883161783218384,
 0.03785828500986099,
 0.025045769289135933,
 0.030045848339796066,
 0.046761654317379,
 -0.014629685319960117,
 -0.04331487789750099,
 -0.013703132048249245,
 0.0323970727622509,
 0.015361163765192032,
 -0.044113971292972565,
 0.01904839463531971,
 0.012730293907225132,
 -0.015088694170117378,
 0.009063957259058952,
 -0.019090885296463966,
 -0.07781611382961273,
 0.0628509670495987,
 0.06096912547945976,
 -0.006804461

In [164]:
query={
    "size": 10,
    "query": {
        "knn": {
            "text_vector":{
                "vector":xq_list[0],
                "k":10
            }
        }
    }
}

res = aos_client.search(index=index_name, 
                       body=query,
                       stored_fields=["text"])

In [165]:
print("Got %d Hits:" % res['hits']['total']['value'])

Got 50 Hits:


In [166]:
query_result=[]
contexts =""

In [167]:
counter = 0

In [168]:
for hit in res['hits']['hits']:
        if counter > 3:
            break
        contexts +=  hit['fields']['text'][0]+"\n---\n"
        counter += 1

In [169]:
type(contexts)

str

In [170]:
for hit in res['hits']['hits']:
    row=[hit['_id'],hit['_score'],hit['fields']['text'][0]]
    query_result.append(row)

In [171]:
query_result_df = pd.DataFrame(data=query_result,columns=["_id","_score","text"])

In [172]:
display(query_result_df)

Unnamed: 0,_id,_score,text
0,BNUu6YkBH0ejTxVjU250,0.707898,1 (a) Radial symmetry\nRationalised 2023-24\n...
1,BdUu6YkBH0ejTxVjVm6X,0.612489,"In\nsome animals, the body cavity is not line..."
2,DtUu6YkBH0ejTxVjYG4k,0.603072,6 Examples of Coelenterata indicating outline...
3,BtUu6YkBH0ejTxVjV26d,0.602643,3 Diagrammatic sectional view of :\n(a) Coelom...
4,HtUu6YkBH0ejTxVjcG7V,0.565705,\nThis phylum consists of a small group of wor...
5,INUu6YkBH0ejTxVjcm71,0.564052,"These are bilaterally\nsymmetrical, triplobla..."
6,AtUu6YkBH0ejTxVjUW5e,0.558436,", any plane that passes through the centre\ndo..."
7,MNUu6YkBH0ejTxVjg27T,0.555726,Endoskeleton is fully ossified (bony) and the...
8,O9Uu6YkBH0ejTxVjkW5R,0.552549,Aschelminthes are\npseudocoelomates and incl...
9,QNUu6YkBH0ejTxVjlm5c,0.55047,How useful is the study of the nature of body ...


In [173]:
len(contexts)

1666

In [196]:
from pprint import pprint
pprint(contexts)

('1 (a)  Radial symmetry\n'
 'Rationalised 2023-24\n'
 'ANIMAL KINGDOM 3939\n'
 '4.1.4 Coelom\n'
 'Presence or absence of a cavity between the body\n'
 'wall and the gut wall is very important in\n'
 'classification. The body cavity, which is lined\n'
 'by mesoderm is called coelom . Animals\n'
 'possessing coelom are called coelomates,  e.g.,\n'
 'annelids, molluscs, arthropods, echinoderms,\n'
 'hemichordates and chordates (Figure 4.3a)\n'
 '---\n'
 ' In\n'
 'some animals, the body cavity is not lined by\n'
 'mesoderm, instead, the mesoderm is present as\n'
 'scattered pouches in between the ectoderm and\n'
 'endoderm. Such a body cavity is called\n'
 'pseudocoelom and the animals possessing them\n'
 'are called pseudocoelomates,  e.g.,\n'
 'aschelminthes (Figure 4.3b). The animals in\n'
 'which the body cavity is absent are called\n'
 'acoelomates,  e.g., platyhelminthes (Figure 4.3c).Figure 4\n'
 '---\n'
 '6 Examples of  Coelenterata indicating outline of their body form :\n'
 '(a)

In [174]:
from pprint import pprint
pprint(contexts)

('1 (a)  Radial symmetry\n'
 'Rationalised 2023-24\n'
 'ANIMAL KINGDOM 3939\n'
 '4.1.4 Coelom\n'
 'Presence or absence of a cavity between the body\n'
 'wall and the gut wall is very important in\n'
 'classification. The body cavity, which is lined\n'
 'by mesoderm is called coelom . Animals\n'
 'possessing coelom are called coelomates,  e.g.,\n'
 'annelids, molluscs, arthropods, echinoderms,\n'
 'hemichordates and chordates (Figure 4.3a)\n'
 '---\n'
 ' In\n'
 'some animals, the body cavity is not lined by\n'
 'mesoderm, instead, the mesoderm is present as\n'
 'scattered pouches in between the ectoderm and\n'
 'endoderm. Such a body cavity is called\n'
 'pseudocoelom and the animals possessing them\n'
 'are called pseudocoelomates,  e.g.,\n'
 'aschelminthes (Figure 4.3b). The animals in\n'
 'which the body cavity is absent are called\n'
 'acoelomates,  e.g., platyhelminthes (Figure 4.3c).Figure 4\n'
 '---\n'
 '6 Examples of  Coelenterata indicating outline of their body form :\n'
 '(a)

In [175]:
import sagemaker

In [185]:
# define payload
prompt = f"""You are an helpful Assistant, called Falcon.

{contexts}
User: {user_input}
Falcon:"""

# hyperparameters for llm
payload = {
  "inputs": prompt,
  "parameters": {
    "do_sample": True,
    "top_p": 0.9,
    "temperature": 0.8,
    "max_new_tokens": 1024,
    "repetition_penalty": 1.03,
    "stop": ["\nUser:","<|endoftext|>","</s>"]
  }
}

In [189]:
payload

{'inputs': 'You are an helpful Assistant, called Falcon.\n\n1 (a)  Radial symmetry\nRationalised 2023-24\nANIMAL KINGDOM 3939\n4.1.4 Coelom\nPresence or absence of a cavity between the body\nwall and the gut wall is very important in\nclassification. The body cavity, which is lined\nby mesoderm is called coelom . Animals\npossessing coelom are called coelomates,  e.g.,\nannelids, molluscs, arthropods, echinoderms,\nhemichordates and chordates (Figure 4.3a)\n---\n In\nsome animals, the body cavity is not lined by\nmesoderm, instead, the mesoderm is present as\nscattered pouches in between the ectoderm and\nendoderm. Such a body cavity is called\npseudocoelom and the animals possessing them\nare called pseudocoelomates,  e.g.,\naschelminthes (Figure 4.3b). The animals in\nwhich the body cavity is absent are called\nacoelomates,  e.g., platyhelminthes (Figure 4.3c).Figure 4\n---\n6 Examples of  Coelenterata indicating outline of their body form :\n(a) Aurelia  (Medusa) (b) Adamsia (Polyp)

In [190]:
endpoint_name = 'falcon-7b-instruct--2023-08-12-11-38-33'

In [191]:
def query_endpoint_with_json_payload(encoded_json, endpoint_name, content_type="application/json"):
    client = boto3.client("runtime.sagemaker")
    response = client.invoke_endpoint(
        EndpointName=endpoint_name, ContentType=content_type, Body=encoded_json
    )
    return response

In [192]:
import json

In [193]:
response2 = query_endpoint_with_json_payload(json.dumps(payload).encode("utf-8"), endpoint_name)

In [194]:
response2

{'ResponseMetadata': {'RequestId': '7adaba9b-7cc3-482d-a9f0-9046db50f5dc',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '7adaba9b-7cc3-482d-a9f0-9046db50f5dc',
   'x-amzn-invoked-production-variant': 'AllTraffic',
   'date': 'Sat, 12 Aug 2023 12:19:50 GMT',
   'content-type': 'application/json',
   'content-length': '2053',
   'connection': 'keep-alive'},
  'RetryAttempts': 0},
 'ContentType': 'application/json',
 'InvokedProductionVariant': 'AllTraffic',
 'Body': <botocore.response.StreamingBody at 0x246ddbccaf0>}

In [195]:
response2["Body"].read().decode("utf-8")

'[{"generated_text":"You are an helpful Assistant, called Falcon.\\n\\n1 (a)  Radial symmetry\\nRationalised 2023-24\\nANIMAL KINGDOM 3939\\n4.1.4 Coelom\\nPresence or absence of a cavity between the body\\nwall and the gut wall is very important in\\nclassification. The body cavity, which is lined\\nby mesoderm is called coelom . Animals\\npossessing coelom are called coelomates,  e.g.,\\nannelids, molluscs, arthropods, echinoderms,\\nhemichordates and chordates (Figure 4.3a)\\n---\\n In\\nsome animals, the body cavity is not lined by\\nmesoderm, instead, the mesoderm is present as\\nscattered pouches in between the ectoderm and\\nendoderm. Such a body cavity is called\\npseudocoelom and the animals possessing them\\nare called pseudocoelomates,  e.g.,\\naschelminthes (Figure 4.3b). The animals in\\nwhich the body cavity is absent are called\\nacoelomates,  e.g., platyhelminthes (Figure 4.3c).Figure 4\\n---\\n6 Examples of  Coelenterata indicating outline of their body form :\\n(a) Au

In [2]:
from pprint import pprint

In [3]:
pprint("You are an helpful Assistant, called Falcon.\\n\\n1 (a)  Radial symmetry\\nRationalised 2023-24\\nANIMAL KINGDOM 3939\\n4.1.4 Coelom\\nPresence or absence of a cavity between the body\\nwall and the gut wall is very important in\\nclassification. The body cavity, which is lined\\nby mesoderm is called coelom . Animals\\npossessing coelom are called coelomates,  e.g.,\\nannelids, molluscs, arthropods, echinoderms,\\nhemichordates and chordates (Figure 4.3a)\\n---\\n In\\nsome animals, the body cavity is not lined by\\nmesoderm, instead, the mesoderm is present as\\nscattered pouches in between the ectoderm and\\nendoderm. Such a body cavity is called\\npseudocoelom and the animals possessing them\\nare called pseudocoelomates,  e.g.,\\naschelminthes (Figure 4.3b). The animals in\\nwhich the body cavity is absent are called\\nacoelomates,  e.g., platyhelminthes (Figure 4.3c).Figure 4\\n---\\n6 Examples of  Coelenterata indicating outline of their body form :\\n(a) Aurelia  (Medusa) (b) Adamsia (Polyp)\\n(b) (a)Examples: Sycon  (Scypha), Spongilla (Fresh water sponge)  and Euspongia\\n(Bath sponge).\\n4.2.2 Phylum – Coelenterata (Cnidaria)\\nThey are aquatic, mostly marine, sessile or free-swimming, radially\\nsymmetrical animals (Figure 4.6). The name cnidaria is derived from the\\nRationalised 2023-24\\n42 BIOLOGY\\n4.2\\n---\\n3 Diagrammatic sectional view of :\\n(a) Coelomate (b) Pseudocoelomate\\n(c) AcoelomateThose animals in which the developing embryo has a thir d germinal layer ,\\nmesoderm , in between the ectoderm and endoderm, are called\\ntriploblastic  animals (platyhelminthes to chordates, Figure 4.2b).\\n4.1.5 Segmentation\\nIn some animals, the body is externally and internally divided into\\nsegments with a serial repetition of at least some organs\\n---\\n\\nUser: What is Coelom\\nFalcon: Coelom is a body cavity present in animals, which is divided into segments. These segments are lined with body wall, in between which organs are arranged. The cavity is surrounded by three germ layers- coelom, mesoderm and endoderm.")

('You are an helpful Assistant, called Falcon.\\n\\n1 (a)  Radial '
 'symmetry\\nRationalised 2023-24\\nANIMAL KINGDOM 3939\\n4.1.4 '
 'Coelom\\nPresence or absence of a cavity between the body\\nwall and the gut '
 'wall is very important in\\nclassification. The body cavity, which is '
 'lined\\nby mesoderm is called coelom . Animals\\npossessing coelom are '
 'called coelomates,  e.g.,\\nannelids, molluscs, arthropods, '
 'echinoderms,\\nhemichordates and chordates (Figure 4.3a)\\n---\\n In\\nsome '
 'animals, the body cavity is not lined by\\nmesoderm, instead, the mesoderm '
 'is present as\\nscattered pouches in between the ectoderm and\\nendoderm. '
 'Such a body cavity is called\\npseudocoelom and the animals possessing '
 'them\\nare called pseudocoelomates,  e.g.,\\naschelminthes (Figure 4.3b). '
 'The animals in\\nwhich the body cavity is absent are called\\nacoelomates,  '
 'e.g., platyhelminthes (Figure 4.3c).Figure 4\\n---\\n6 Examples of  '
 'Coelenterata indicating outl