**Step 0: Imports, constants, and API Keys!**

In [13]:
!pip install -q langchain==0.2.16 langchain_core==0.2.38 langchain_community==0.2.16 pymupdf openai 
!pip install -q langchain_openai==0.1.23 langchain-qdrant qdrant_client asyncio ragas==0.1.14 pandas


In [31]:
import os
import openai
from getpass import getpass

# collect OpenAI key
openai.api_key = getpass("OpenAI API Key: ")
os.environ["OPENAI_API_KEY"] = openai.api_key

**Step 1: Download and chunk the data**

We are going to use the following docs as our knowledge base:
1. Blueprint for an AI Bill of Rights: Making Automated Systems Work for the American People (PDF)
2. National Institute of Standards and Technology (NIST) Artificial Intelligent Risk Management Framework 

Let's start with a simple fixed chunking strategy as a baseline, and later evaluate parent-doc retrieval if we have time

In [15]:
import importlib
import vanilla_rag
from vars import PDFS, CHUNK_SIZE, OVERLAP

importlib.reload(vanilla_rag)
for pdf in PDFS:
    chunks = await vanilla_rag.load_and_chunk_pdf(pdf,CHUNK_SIZE,OVERLAP)


Loading https://www.whitehouse.gov/wp-content/uploads/2022/10/Blueprint-for-an-AI-Bill-of-Rights.pdf...
Chunking...
Loading https://nvlpubs.nist.gov/nistpubs/ai/NIST.AI.600-1.pdf...
Chunking...


**Step 2: Construct and test baseline RAG Pipeline**

In [32]:
from vars import BASELINE_CHAT_MODEL, BASELINE_EMBEDDING_MODEL

importlib.reload(vanilla_rag)
rag_chain = await vanilla_rag.vanilla_openai_rag_chain(texts=chunks, 
                                            openai_key=openai.api_key, 
                                            embedding_model=BASELINE_EMBEDDING_MODEL,
                                            chat_model="gpt-4o-mini")

created qdrant client
created embeddings
populated vector db
created chain


In [24]:
print(BASELINE_CHAT_MODEL)

gpt-4o-mini-2024-07-18


In [33]:
from pprint import pprint
response = await rag_chain.ainvoke({"input":"What are some key risks associated with modern LLMs?"})
pprint(response)

APIConnectionError: Connection error.

**Step 3: Evaluate baseline RAG system**

This assumes that gen_synthetic_data.ipynb has already been run to generate some test data!

In [28]:
# Load the dataset and run the RAG pipeline
import pandas as pd
from tqdm.asyncio import tqdm_asyncio
from vars import TEST_DATASET_FILE

test_df = pd.read_csv(TEST_DATASET_FILE)

test_questions = test_df["question"].to_list()
test_gt = test_df["ground_truth"].to_list()

answers = []
contexts = []

for question in tqdm_asyncio(test_questions,desc="Processing Questions"):
  response = await rag_chain.ainvoke({"input" : question})
  answers.append(response["response"].content)
  contexts.append([context.page_content for context in response["context"]])

Processing Questions:  13%|█▎        | 4/30 [00:20<02:15,  5.23s/it]

In [33]:
from datasets import Dataset
from vars import N_EVAL_QUESTIONS

# Put in huggingface dataset format and save it for later re-use
response_dataset = Dataset.from_dict({
    "question" : test_questions,
    "answer" : answers,
    "contexts" : contexts,
    "ground_truth" : test_gt
})
response_dataset.save_to_disk(f"baseline_response_dataset_{N_EVAL_QUESTIONS}")


Saving the dataset (0/1 shards):   0%|          | 0/100 [00:00<?, ? examples/s]

In [11]:
# Use ragas to evaluate
from datasets import load_from_disk
from vars import N_EVAL_QUESTIONS

from ragas import evaluate
from ragas.run_config import RunConfig
from ragas.metrics import (
    faithfulness,
    answer_relevancy,
    context_precision,
    context_recall
)

response_dataset = load_from_disk(f"baseline_response_dataset_{N_EVAL_QUESTIONS}")


metrics = [ faithfulness, answer_relevancy, context_precision, context_recall ]
results = evaluate(response_dataset, metrics, run_config=RunConfig(max_workers=1))

Evaluating:   0%|          | 0/120 [00:00<?, ?it/s]

Exception raised in Job[108]: APIConnectionError(Connection error.)
Exception raised in Job[77]: APIConnectionError(Connection error.)
Exception raised in Job[68]: APIConnectionError(Connection error.)
Exception raised in Job[67]: APIConnectionError(Connection error.)
Exception raised in Job[111]: APIConnectionError(Connection error.)
Exception raised in Job[69]: APIConnectionError(Connection error.)


KeyboardInterrupt: 

In [32]:
# Check out the results
print(results)

{'faithfulness': nan, 'answer_relevancy': nan, 'context_precision': nan, 'context_recall': nan}
