In [None]:
!pip install pandas --quiet
!pip install "datasets[s3]==2.14.4"
!pwd

In [None]:
dataset_s3_bucket = "sagemaker-eu-west-1-843197046435"
dataset_s3_path = "admin_ch_train_dataset/train"

In [None]:
from datasets import load_from_disk
import s3fs
import aiobotocore.session

session = aiobotocore.session.AioSession(profile="default")
fs = s3fs.S3FileSystem(session=session)

dataset = load_from_disk(
    f"s3://{dataset_s3_bucket}/{dataset_s3_path}", storage_options=fs.storage_options
)

In [None]:
dataset[0]

!pip uninstall aiobotocore -y
!pip install "../03_chatbot/bedrock-python-sdk/boto3-1.28.21-py3-none-any.whl"
!pip install "../03_chatbot/bedrock-python-sdk/botocore-1.31.21-py3-none-any.whl"

In [None]:
import boto3

boto3.setup_default_session(profile_name="default")
%env AWS_DEFAULT_REGION = eu-west-1

In [None]:
%pip install -e ../03_chatbot
%pip show chatbot
# Restart the kernel if "Note:" says so

In [None]:
import importlib

chatbot = importlib.import_module("03_chatbot")

In [None]:
chatbot.catalog.model_catalog.ModelCatalog

In [None]:
import os
from chatbot.catalog import ModelCatalog, RetrieverCatalog, PromptCatalog

BEDROCK_REGION = os.environ.get("BEDROCK_REGION", "us-west-2")
REGION = os.environ.get("AWS_DEFAULT_REGION", "us-east-1")

retriever_catalog = RetrieverCatalog([REGION])
retrievers = list(retriever_catalog)
model_catalog = ModelCatalog([REGION])
prompt_catalog = PromptCatalog()

llms = list(model_catalog)

print(f"Num Retriever: {len(retrievers)}")
print(f"Num LLMs: {len(llms)}")

In [None]:
import pandas as pd

In [None]:
pd.DataFrame(retrievers)

In [None]:
pd.DataFrame(llms)

In [None]:
from tqdm import tqdm
from langchain.memory import ChatMessageHistory
from chatbot.llm_app import get_app, generate_response

retriever_model_pairs = [{"retriever": retrievers[0], "model": llms[2]}]


def run_questions_pipeline(dataset, retriever, model):
    app = get_app(retriever, model, prompt_catalog)
    retriever_instance = retriever.get_instance()
    model_instance = model.get_instance()

    row_results = []
    for row in tqdm(dataset):
        prompt = row["question"]

        response, full_response = generate_response(
            app, prompt, retriever_instance, model_instance, ChatMessageHistory()
        )
        resources = [
            {**doc.metadata, **{"page_content": doc.page_content}}
            for doc in full_response["source_documents"]
        ]
        df_results = pd.DataFrame(resources)
        df_results["answer"] = response
        print(response)
        row_results.append(df_results)
    return row_results


results = []
for pair in retriever_model_pairs:
    retriever = pair["retriever"]
    model = pair["model"]
    res = run_questions_pipeline(dataset, retriever, model)
    results.append(res)