# Setup

In [None]:
# %pip install -r requirements.txt

# Environment Variables

In [None]:
# Get environment variables

from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

# Utilities

In [11]:
import utils

# Test 1a - OpenAI and Qdrant

In [None]:
# Build chain using OpenAI and Qdrant

embeddings = utils.create_embeddings_openai()

docs = utils.process_directory(path="docs/10k/html", 
						  glob="**/*.html", 
						  loader_cls=None, 
						  use_multithreading=True)

chunks = utils.chunk_docs_recursive(docs=docs)

print(f'\nNumber of chunks = {len(chunks)}\n')

vector_store = utils.create_qdrant_vector_store(':memory:', 
                                          'holiday-test', 
                                          1536, 
                                          embeddings, 
                                          chunks)

retriever = utils.create_retriever_qdrant(vector_store)

chat_prompt_template = utils.create_chat_prompt_template()

chain = utils.create_chain('gpt-4o', 
                     chat_prompt_template, 
                     retriever)

In [None]:
# Test the chain with a few questions 

questions = ["What is the annual revenue of Uber?",
"What is the annual revenue of Lyft?",
"How does Uber's revenue compare to Lyft's revenue?",]

for question in questions:
	print(question)
	result = chain.invoke({"question" : question})
	print(result)
	print(result["response"].content)
	print("\n*****")

In [None]:
# Evaluate the chain using Ragas

import time

import pandas as pd
from ragas.metrics import (
    answer_correctness,
    answer_relevancy,
    context_precision,
    context_recall,
    faithfulness)

# Get the questions and groundtruths from the dataframe
testset_df = pd.read_csv("testsets/10k_testset.csv")

questions = testset_df["user_input"].values.tolist()
questions = [str(question) for question in questions]

groundtruths = testset_df["reference"].values.tolist()
groundtruths = [str(ground_truth) for ground_truth in groundtruths]

eval_metrics = [
	answer_correctness,
	answer_relevancy,
	context_precision,
	context_recall,
	faithfulness
]

ragas_results, ragas_results_df = utils.run_ragas_evaluation(chain, 
														questions, 
														groundtruths, 
														eval_metrics)

# Write the results to disk
timestr = time.strftime("%Y%m%d%H%M%S")
ragas_results_df.to_csv(f"evaluations/10x_test1_testset_evaluation_{timestr}.csv")

# Show the resutls
print(ragas_results)