In [None]:
from dotenv import load_dotenv

load_dotenv()

# this notebook assumes the following env vars exist in a .env file:
#
# ASTRA_DB_ENDPOINT
# ASTRA_DB_TOKEN
# AZURE_OPENAI_ENDPOINT
# AZURE_OPENAI_API_KEY
# OPENAI_API_VERSION

## Setup Azure LLMs for LangChain

In [None]:
# Setup Azure-based models
from langchain.chat_models import AzureChatOpenAI
from langchain.embeddings import AzureOpenAIEmbeddings

temperature = 0.0

gpt_35_turbo = AzureChatOpenAI(
    azure_deployment="gpt-35-turbo",
    openai_api_version="2023-05-15",
    model_version="0613",
    temperature=temperature,
)

gpt_35_turbo_16k = AzureChatOpenAI(
    openai_api_version="2023-05-15",
    azure_deployment="gpt-35-turbo-16k",
    model_version="0613",
    temperature=temperature,
)

gpt_4 = AzureChatOpenAI(
    openai_api_version="2023-05-15",
    azure_deployment="gpt-4",
    model_version="1106-preview",
    temperature=temperature,
)

gpt_4_32k = AzureChatOpenAI(
    openai_api_version="2023-05-15",
    azure_deployment="gpt-4-32k",
    model_version="0613",
    temperature=temperature,
)

embeddings = AzureOpenAIEmbeddings(
    azure_deployment="text-embedding-ada-002",
    openai_api_version="2023-05-15"
)

## Init an AstraDB vector store

In [None]:
from langchain.vectorstores.astradb import AstraDB
from langchain.embeddings import OpenAIEmbeddings
import os

collection_name = "open_ai_512"
embeddings = OpenAIEmbeddings()
astra_db_vstore = AstraDB(
    collection_name=collection_name,
    embedding=embeddings,
    token=os.getenv("ASTRA_DB_APPLICATION_TOKEN"),
    api_endpoint=os.getenv("ASTRA_DB_API_ENDPOINT"),
)

## Load Datasets

In [None]:
# import json

# base_path = "./data/"

# datasets = {}
# golden_set = []

# for name in os.listdir(base_path):
#     if os.path.isdir(os.path.join(base_path, name)):
#         datasets[name] = []
#         with open(os.path.join(base_path, name, "rag_dataset.json")) as f:
#             examples = json.load(f)["examples"]
#             index = 0
#             for e in examples:
#                 datasets[name].append(e["query"])
#                 golden_set.append(
#                     {
#                         "query": e["query"],
#                         "response": e["reference_answer"],
#                     }
#                 )
#                 index += 1
#             print("Loaded dataset: ", name)

In [None]:
# Retrieve the text of a short story that will be indexed in the vector store
! curl https://raw.githubusercontent.com/CassioML/cassio-website/main/docs/frameworks/langchain/texts/amontillado.txt --output amontillado.txt
inputs = ["amontillado.txt"]

In [None]:
from langchain_community.document_loaders import TextLoader

loader = TextLoader("amontillado.txt")
documents = loader.load()

In [None]:
from langchain.text_splitter import TokenTextSplitter, CharacterTextSplitter

# text_splitter = TokenTextSplitter(
#     chunk_size=512, chunk_overlap=32,
# )

text_splitter = TokenTextSplitter(
    chunk_size=128, chunk_overlap=16,
)
split_docs = text_splitter.split_documents(documents)

In [None]:
# astra_db_vstore.add_documents(split_docs)

## Build an Agent

In [None]:
from langchain.tools.retriever import create_retriever_tool
from langchain.agents import initialize_agent
from langchain.chat_models import ChatOpenAI
# from langchain_openai import ChatOpenAi
from langchain import hub

model = "gpt-3.5-turbo-1106"

retriever = astra_db_vstore.as_retriever(search_kwargs={"k": 4})
retriever_tool = create_retriever_tool(
    retriever=retriever,
    name="retrieval_tool",
    description="Retrieves documents relevant to the given prompt",
)
tools = [retriever_tool]

prompt = hub.pull("hwchase17/openai-functions-agent")

llm = ChatOpenAI(model=model)
agent = initialize_agent(llm=llm, tools=tools, handle_parsing_errors=True, verbose=False)

In [None]:
# try a query
response = agent.invoke("Based on the morals the story, what is the theme? ")
print(response)

## Setup Evaluation 

### Init TruLens

In [None]:
from trulens_eval import Tru

# tru = Tru(database_url=os.getenv("TRULENS_DB_CONN_STRING"))
tru = Tru()
# tru.reset_database()

### Start Dashboard

In [None]:
tru.run_dashboard()

In [None]:
# Download the golden set of questions and ground_truths
! curl -X GET "https://datasets-server.huggingface.co/rows?dataset=explodinggradients%2Ffiqa&config=main&split=train&offset=0" --output train.json

In [None]:
import json
golden_set = []

with open("train.json", "r") as f:
    data = json.load(f)
    rows = data["rows"]
    for row in rows:
        row = row["row"]
        entry = {"query": row["question"], "response": row["ground_truths"]}
        golden_set.append(entry)


In [None]:
print(len(golden_set))

### Initialize Feedback Functions 

In [None]:
from trulens_eval.feedback.provider import AzureOpenAI, OpenAI
from trulens_eval.feedback import Groundedness, GroundTruthAgreement
from trulens_eval import TruChain, Feedback
from trulens_eval.app import App, Select
import numpy as np

# Initialize provider class
# azureOpenAI = AzureOpenAI(deployment_name="gpt-35-turbo")

provider = OpenAI(model_engine=model)
# context = App.select_context(retriever)

# Define a groundedness feedback function
grounded = Groundedness(groundedness_provider=provider)
f_groundedness = (
    Feedback(grounded.groundedness_measure_with_cot_reasons, name="Groundedness (A to C)")
    .on(Select.RecordCalls.tools[0]._run.rets[:].page_content.collect())
    # .on(get_page_content(Select.RecordCalls.tools[0]._run.rets))
    .on_output()
    .aggregate(grounded.grounded_statements_aggregator)
)

# Question/answer relevance between overall question and answer.
f_answer_relevance = Feedback(
    provider.relevance_with_cot_reasons, name="Answer Relevance (A to Q)"
).on_input_output()

# Question/statement relevance between question and each context chunk.
f_context_relevance = (
    Feedback(provider.qs_relevance_with_cot_reasons, name="Context Relevance (C to Q)")
    .on_input()
    .on(Select.RecordCalls.tools[0]._run.rets[:].page_content.collect())
    # .on(get_page_content(Select.RecordCalls.tools[0]._run.rets))
    .aggregate(np.mean)
)

# GroundTruth for comparing the Answer to the Ground-Truth Answer
ground_truth_agreement = GroundTruthAgreement(golden_set, provider=provider)
f_ground_truth = (
    Feedback(ground_truth_agreement.agreement_measure, name="Ground Truth Agreement (A to GT)")
    .on_input_output()
)

## Run Evaluation

In [None]:
# sample_questions = [
#     "What is the theme of the story?",
#     "What is the Cask of Amontillado?",
#     "What motivates Montresor to seek revenge against Fortunato, and how does Poe reveal this motivation to the reader",
#     "Analyze the character of Fortunato. How does Poe portray him, and what is the reader's impression of him?",
# ]

In [None]:
import time

# The number of questions in the dataset to evaluate
num_questions = 20

apps = []
for i in range(2):
    app = f"fiqa_agent_{i}_{model}"
    apps.append(app)

for app in apps:
    tru_recorder = TruChain(
        agent,
        app_id=app,
        feedbacks=[
            f_answer_relevance,
            f_context_relevance,
            f_groundedness,
            f_ground_truth,
        ],
        feedback_mode="deferred",
    )

    with tru_recorder as _:
        for i in range(0, num_questions):
            for attempt in range(5):
                try:
                    q = golden_set[i]["query"]
                    response = agent.invoke(q)
                    print(f"\nQ: {q}\nResponse: {response}")
                    break  # Exit the loop if invoke is successful
                except Exception as e:
                    print(
                        f"Encountered an exception: {e}. Backing off for {2 ** attempt} seconds."
                    )
                    time.sleep(2**attempt)

In [None]:
# Start evaluator. Let this process run until all evaluation are complete!
tru.start_evaluator()

In [None]:
tru.stop_evaluator()

In [None]:
tru.stop_dashboard()