## Multimodal Retrieval

### Deep Lake Rest API

In [None]:
def get_answer(queries: list, org_id: str, dataset_name: str, k=4, number_of_images=3):
    url = f"https://beta.activeloop.dev/api/query/colpali/{org_id}/{dataset_name}"

    data = {
        "queries": queries,
        "k": 4,
        "number_of_images": 3,
    }

    headers = {
        "Authorization": f"Bearer {os.getenv('TOKEN')}",
        "Content-Type": "application/json",
    }
    response = requests.post(url, headers=headers, json=data)
    return response.json()

In [None]:

def save_images(value_returned: dict):
    for idx_question, img_list in enumerate(value_returned["images"]):
        for idx_img, img in enumerate(img_list):
            image_data = base64.b64decode(img)
            image = Image.open(BytesIO(image_data))
            image.save(f"question_{idx_question}_image_{idx_img}.jpg")


### Retrieve the best images and get the answer

In [None]:
def send_request(query: list, org_id: str, dataset_name: str):

    value_returned = get_answer(query, org_id, dataset_name)
    save_images(value_returned)

    for img_list in value_returned["images"]:
        for img in img_list:
            byte_image = base64.b64decode(img)
            answer = get_bedrock_answer_with_images(query, byte_image)
            print("the answer is: ", answer)
            break

In [None]:
org_id = "emanuelebeta"
dataset_name = "ingestion_ml_test2_colpali"
questions = "describe the Gaussian distribution curve"
send_request(questions, org_id, dataset_name)


## Deep Memory

In [None]:
import openai
from dotenv import load_dotenv
import os

load_dotenv()
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")


def embedding_function(texts, model="text-embedding-3-large"):

    if isinstance(texts, str):
        texts = [texts]

    try:
        texts = [t.replace("\n", " ") for t in texts]
    except:
        pass
    return [
        data.embedding
        for data in openai.embeddings.create(input=texts, model=model).data
    ]


def retrieve_context_from_deeplake(vector_store_db, user_question, deep_memory):
    # deep memory inside the vectore store ==> deep_memory=True
    answer = vector_store_db.search(
        embedding_data=user_question,
        embedding_function=embedding_function,
        deep_memory=deep_memory,
        return_view=False,
        k=4,
    )
    return answer


### Load VectorStore

In [None]:
question = ""
legal_dataset = "hub://activeloop/deep_memory_legal_dataset_24"
vector_store = VectorStore(legal_dataset)

### Compare the answer with and without Deep Memory using Bedrock and Claude Sonnet

In [None]:
deep_memory_chunks = retrieve_context_from_deeplake(
    vector_store, el, deep_memory=True
)
no_deep_memory_chunks = retrieve_context_from_deeplake(
    vector_store, el, deep_memory=False
)

final_answer_deep_memory = get_bedrock_answer_with_text(el, deep_memory_chunks)
final_answer_no_deep_memory = get_bedrock_answer_with_text(
    el, no_deep_memory_chunks
)
print(
    f"final answer deep memory: {final_answer_deep_memory} for question {idx}"
)
print(
    f"final answer no deep memory: {final_answer_no_deep_memory} for question {idx}"
)