In [6]:
COLLECTION_NAME = "HYBRID_COLLECTION"

In [22]:
import json
import boto3
from pprint import pprint

In [19]:
def generate_completion(context:str,query:str)->str:

        prompt_data = f"""Human: Answer the question based on the following context:
        {context}\n\n {query}
        Assistant:"""

        body = json.dumps({"prompt": prompt_data, 
                           "max_tokens_to_sample": 500})
        modelId = "anthropic.claude-instant-v1"  
        accept = "application/json"
        contentType = "application/json"

        bedrock_runtime_client = boto3.client('bedrock-runtime')

        response = bedrock_runtime_client.invoke_model(
            body=body, modelId=modelId, accept=accept, contentType=contentType
        )
        response_body = json.loads(response.get("body").read())

        response_text = response_body.get("completion")
        
        return response_text

In [7]:
from qdrant_client import QdrantClient, models
import os

client = QdrantClient(url = os.getenv("QDRANT_URL"),
                      api_key=os.getenv("QDRANT_KEY"),timeout=600)

In [8]:
client.count(COLLECTION_NAME)

CountResult(count=49)

In [9]:
from fastembed import TextEmbedding, SparseTextEmbedding, LateInteractionTextEmbedding

dense_embedding_model = TextEmbedding("sentence-transformers/all-MiniLM-L6-v2")
bm25_embedding_model = SparseTextEmbedding("Qdrant/bm25")
late_interaction_embedding_model = LateInteractionTextEmbedding("colbert-ir/colbertv2.0")

In [10]:
query_text = "Describe about protons in the stroma"

### Dense Embedding 

In [15]:
results = client.query_points(
    collection_name=COLLECTION_NAME,
    query=next(dense_embedding_model.query_embed(query_text)),
    using="all-MiniLM-L6-v2",
    limit=10,
    with_payload=True,
)

In [26]:
def get_response(results,query_text):
    # Assuming results is the output from client.query_points
    results_to_return = []

# Extracting text from results
    for point in results.points:
        results_to_return.append(point.payload["content"])
    context = "\n".join(results_to_return)
    response_text = generate_completion(context,query_text)

    return response_text

In [27]:
pprint(get_response(results,query_text))

(' Based on the given context, here are the key points about protons in the '
 'stroma:\n'
 '\n'
 '- During the light reactions of photosynthesis, as electrons move through '
 'the photosystems, protons are transported across the thylakoid membrane. '
 'This is because the primary electron acceptor, which is located on the outer '
 'side of the membrane, transfers its electron to an H+ carrier. This removes '
 'a proton from the stroma while transporting an electron. \n'
 '\n'
 '- Hence, as the light reactions proceed, the number of protons in the stroma '
 'decreases. The protons are being pumped across the membrane into the '
 'lumen. \n'
 '\n'
 '- The NADP+ reductase enzyme, which reduces NADP+ to NADPH, is located on '
 'the stroma side of the membrane. It requires protons for this reaction. So '
 'more protons are removed from the stroma.\n'
 '\n'
 '- This leads to the accumulation of protons in the lumen and a decrease in '
 'proton concentration (and increase in pH) in the strom

### Sparse Embedding

In [28]:
sparse_query_vector = next(bm25_embedding_model.query_embed(query_text))

In [29]:
late_query_vector = next(late_interaction_embedding_model.query_embed(query_text))

In [33]:
results = client.query_points(
        collection_name=COLLECTION_NAME,
        query=models.SparseVector(**sparse_query_vector.as_object()),
        using="bm25",
        with_payload=True,
        limit=10,
    )

In [35]:
pprint(get_response(results,query_text))

(' Based on the context given, here are the key points about protons in the '
 'stroma:\n'
 '\n'
 '1. During photosynthesis, protons are removed from the stroma. This leads to '
 'a decrease in the number of protons in the stroma. \n'
 '\n'
 '2. Protons are removed from the stroma as electrons move through the '
 'photosystems. The primary electron acceptor, which is located on the outer '
 'side of the membrane, transfers its electron not to an electron carrier but '
 'to an H+ carrier. This removes a proton from the stroma while transporting '
 'an electron. \n'
 '\n'
 '3. As this process occurs across the membrane, it leads to an accumulation '
 'of protons in the lumen (inner side of the membrane) and a decrease in '
 'protons in the stroma (outer side). \n'
 '\n'
 '4. This creates a proton gradient across the thylakoid membrane, with more '
 'protons inside the lumen and fewer protons in the stroma. It also leads to a '
 'measurable decrease in the pH in the lumen. \n'
 '\n'
 '5. 

### RRF

In [36]:
prefetch = [
        models.Prefetch(
            query=next(dense_embedding_model.query_embed(query_text)),
            using="all-MiniLM-L6-v2",
            limit=20,
        ),
        models.Prefetch(
            query=models.SparseVector(**sparse_query_vector.as_object()),
            using="bm25",
            limit=20,
        ),
    ]
results = client.query_points(
        collection_name=COLLECTION_NAME,
        prefetch=prefetch,
        query=models.FusionQuery(
            fusion=models.Fusion.RRF,
        ),
        with_payload=True,
        limit=10,
    )

In [37]:
pprint(get_response(results,query_text))

(' Based on the context given, here are the key points about protons in the '
 'stroma:\n'
 '\n'
 '- During the process of photosynthesis within the chloroplast, protons in '
 'the stroma decrease in number. This is because:\n'
 '\n'
 '1. The primary electron acceptor which is located towards the outer side of '
 'the thylakoid membrane transfers its electron not to an electron carrier but '
 'to an H+ carrier. So this removes a proton from the stroma while '
 'transporting an electron. \n'
 '\n'
 '2. Along with electrons that come from the acceptor of electrons of PS I, '
 'protons are necessary for the reduction of NADP+ to NADPH+H+. These protons '
 'are also removed from the stroma. \n'
 '\n'
 '- This removal of protons from the stroma contributes to the development of '
 'a proton gradient across the thylakoid membrane, with more protons '
 'accumulating in the thylakoid lumen and fewer in the stroma. \n'
 '\n'
 '- This proton gradient is important because its breakdown leads to A

### Late Interaction Fusion

In [39]:
prefetch = [
        models.Prefetch(
            query=next(dense_embedding_model.query_embed(query_text)),
            using="all-MiniLM-L6-v2",
            limit=20,
        ),
        models.Prefetch(
            query=models.SparseVector(**sparse_query_vector.as_object()),
            using="bm25",
            limit=20,
        ),
    ]
results = client.query_points(
        collection_name=COLLECTION_NAME,
    prefetch=prefetch,
    query=late_query_vector,
    using="colbertv2.0",
    with_payload=True,
    limit=5,
)

In [40]:
pprint(get_response(results,query_text))

(' Based on the context provided, here are the key points about protons in the '
 'stroma:\n'
 '\n'
 '- During photosynthesis in the chloroplast, protons in the stroma decrease '
 'in number. This is because:\n'
 '\n'
 '1. The primary acceptor of electrons from photosystem II, which is located '
 'on the outer side of the thylakoid membrane, transports an electron by also '
 'removing a proton from the stroma side. \n'
 '\n'
 '2. The NADP+ reductase enzyme, which reduces NADP+ to NADPH, is located on '
 'the stroma side of the membrane. This process requires protons from the '
 'stroma. \n'
 '\n'
 '- So as electrons move through the photosystems and are transported across '
 'the membrane, protons are removed from the stroma side. \n'
 '\n'
 '- This creates a proton gradient across the thylakoid membrane, with fewer '
 'protons on the stroma side and accumulation of protons in the thylakoid '
 'lumen inside the membrane. \n'
 '\n'
 '- It is this proton gradient, with higher concentrati