# Setup

In [40]:
# # Installs
%pip install -U langchain-community langchain-core==0.2.40 langchain_experimental langchain-google-community langchain-google-genai langchain-huggingface==0.0.3 langchain-openai langchain-qdrant langchain-google-vertexai
%pip install -U docx2txt
%pip install -U google-cloud-aiplatform
%pip install -U google-cloud-discoveryengine
%pip install -U nltk
%pip install -U openpyxl
%pip install -U pymupdf
%pip install -U python-dotenv
%pip install -U ragas==0.1.20
%pip install -U tqdm

# Verify installed packages have compatible dependencies
%pip check

Collecting langchain-community
  Using cached langchain_community-0.3.2-py3-none-any.whl.metadata (2.8 kB)
Collecting langchain-core
  Using cached langchain_core-0.3.12-py3-none-any.whl.metadata (6.3 kB)
Collecting langchain_experimental
  Using cached langchain_experimental-0.3.2-py3-none-any.whl.metadata (1.7 kB)
Collecting langchain-google-community
  Using cached langchain_google_community-2.0.1-py3-none-any.whl.metadata (3.4 kB)
Collecting langchain-google-genai
  Using cached langchain_google_genai-2.0.1-py3-none-any.whl.metadata (3.9 kB)
Collecting langchain-huggingface==0.0.3
  Using cached langchain_huggingface-0.0.3-py3-none-any.whl.metadata (1.2 kB)
Collecting langchain-openai
  Using cached langchain_openai-0.2.2-py3-none-any.whl.metadata (2.6 kB)
Collecting langchain-qdrant
  Using cached langchain_qdrant-0.1.4-py3-none-any.whl.metadata (1.7 kB)
Collecting langchain-google-vertexai
  Using cached langchain_google_vertexai-2.0.5-py3-none-any.whl.metadata (3.8 kB)
Collectin

# Utils

In [2]:
# Get environment variables

import os
from dotenv import load_dotenv
import uuid

# Load environment variables from .env file
load_dotenv()

PROJECT_ID = os.environ['PROJECT_ID']
print(PROJECT_ID)
REGION = os.environ['REGION']
print(REGION)
LOCATION_ID = os.environ['LOCATION_ID']
print(LOCATION_ID)
DATA_STORE_ID = os.environ['DATA_STORE_ID']
print(DATA_STORE_ID)

LANGCHAIN_PROJECT=os.environ['LANGCHAIN_PROJECT'] + f" - {uuid.uuid4().hex[0:8]}"

virtual-assist-poc-434617
us-central1
us
virtual-assist-poc-docs_1727390753315


In [3]:
# Download punkt_tab module that is used for sentence tokenizaiton

import nltk

nltk.download('punkt_tab')

[nltk_data] Downloading package punkt_tab to /Users/david/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


True

In [4]:
# Initialize Vertex AI

import vertexai

vertexai.init(
    project=os.environ['PROJECT_ID'], 
    location=os.environ['REGION'],
    )

In [8]:
# Load the docs

# TODO - add TextLoader refernece
# TODO - add CSVLoader reference
# https://python.langchain.com/docs/integrations/document_loaders/pymupdf/
# https://python.langchain.com/docs/integrations/document_loaders/microsoft_word/

from langchain_community.document_loaders import TextLoader
from langchain_community.document_loaders import PyMuPDFLoader
from langchain_community.document_loaders import Docx2txtLoader

def process_file(path: str):

    docs = None

    # Select the right loader
    if 'txt' in path.lower():
        loader = TextLoader(path)
    elif 'pdf' in path.lower():
        loader = PyMuPDFLoader(path)
    elif 'docx' in path.lower():
        loader = Docx2txtLoader(path)
    else:
        print(f'No document loader found for {path}')

    docs = loader.load()

    return docs

#####

def test_process_file():
    doc = process_file('docs/CHW_EOC_04-21-2017_ENG.pdf')
    print(doc[0].metadata)

test_process_file()

{'source': 'docs/CHW_EOC_04-21-2017_ENG.pdf', 'file_path': 'docs/CHW_EOC_04-21-2017_ENG.pdf', 'page': 0, 'total_pages': 134, 'format': 'PDF 1.5', 'title': '', 'author': 'PTN Consulting', 'subject': '', 'keywords': '', 'creator': 'Microsoft® Word 2013', 'producer': 'Microsoft® Word 2013', 'creationDate': "D:20170421101839-07'00'", 'modDate': "D:20170421103401-07'00'", 'trapped': ''}


In [9]:
# Create embeddings using Hugging Face 

from langchain_huggingface import HuggingFaceEmbeddings

def create_embeddings_opensource(model: str) -> HuggingFaceEmbeddings:

    # Initialize the OpenAIEmbeddings class
    embeddings = HuggingFaceEmbeddings(model=model)

    return embeddings

#####

In [12]:
# Create embeddings using OpenAI

from langchain_openai import OpenAIEmbeddings

def create_embeddings_openai(model='text-embedding-ada-002') -> OpenAIEmbeddings:

    # Initialize the OpenAIEmbeddings class
    embeddings = OpenAIEmbeddings(model=model)

    return embeddings

#####

def test_create_embeddings_vertexai():
    text = 'What is my benefit for acupuncture?'
    embeddings = create_embeddings_openai()
    vector = embeddings.embed_query(text)
    print(vector)
    return embeddings

test_create_embeddings_vertexai()

[-0.0131338806822896, -0.0037410028744488955, 0.04308881610631943, -0.043627090752124786, -0.03259247913956642, 0.013968205079436302, -0.02541998028755188, -0.019889216870069504, -0.01629623956978321, 0.0030984384939074516, -0.000914224365260452, 0.010550166480243206, 0.0035929775331169367, 0.004100973252207041, -0.005779714789241552, -0.0023313984274864197, 0.051297493278980255, -0.009197752922773361, 0.005379373673349619, 0.005295268259942532, -0.013833636417984962, 0.0024255963508039713, 0.0038419291377067566, -0.0010529981227591634, -0.01407585944980383, 0.018059086054563522, 0.010563623160123825, -0.012158258818089962, -0.021759718656539917, -0.021719347685575485, 0.03646804764866829, 0.016417350620031357, -0.009554360061883926, -0.008464355021715164, -0.012158258818089962, 0.022540215402841568, 0.00017956476949620992, -0.009863867424428463, 0.02264786884188652, -0.00040980297490023077, 0.009137197397649288, 0.003249828005209565, -0.0010302896844223142, 0.004935297649353743, -0.00

OpenAIEmbeddings(client=<openai.resources.embeddings.Embeddings object at 0x32fe36fd0>, async_client=<openai.resources.embeddings.AsyncEmbeddings object at 0x330a7ac90>, model='text-embedding-ada-002', dimensions=None, deployment='text-embedding-ada-002', openai_api_version=None, openai_api_base=None, openai_api_type=None, openai_proxy=None, embedding_ctx_length=8191, openai_api_key=SecretStr('**********'), openai_organization=None, allowed_special=None, disallowed_special=None, chunk_size=1000, max_retries=2, request_timeout=None, headers=None, tiktoken_enabled=True, tiktoken_model_name=None, show_progress_bar=False, model_kwargs={}, skip_empty=False, default_headers=None, default_query=None, retry_min_seconds=4, retry_max_seconds=20, http_client=None, http_async_client=None, check_embedding_ctx_length=True)

In [6]:
# Create embeddings using Vertex AI

# https://python.langchain.com/docs/integrations/text_embedding/google_vertex_ai_palm/

from langchain_google_vertexai import VertexAIEmbeddings

def create_embeddings_vertexai(model="text-embedding-004") -> VertexAIEmbeddings:

    # Initialize the VertexAIEmbeddings class
    embeddings = VertexAIEmbeddings(model_name=model)

    return embeddings

#####

def test_create_embeddings_vertexai():
    text = 'What is a DUR reject?'
    embeddings = create_embeddings_vertexai()
    vector = embeddings.embed_query(text)
    print(vector)
    return embeddings

test_create_embeddings_vertexai()

In [14]:
# Remove empty chunks 

def remove_empty_chunks(chunks_start: list) -> list:
    
    start = len(chunks_start)
    # print(f'start - {start} chunks')
    
    # Remove empty chunks
    chunks_end = [chunk for chunk in chunks_start if chunk.page_content.strip()]

    end = len(chunks_end)
    # print(f'end - {end} chunks')

    return chunks_end   

#####

from langchain.schema import Document

def test_remove_empty_chunks():
    docs = process_file('docs/CHW_EOC_04-21-2017_ENG.pdf')

    # Added a test doc
    doc = Document(
        page_content='',
        metadata=docs[0].metadata
    )
    docs.append(doc)
    print(len(docs))

    # Remove the empty doc (chunk)
    docs = remove_empty_chunks(docs)
    print(len(docs))

test_remove_empty_chunks()

135
134


In [15]:
# Create a text splitter using recursive character text splitter

# https://python.langchain.com/v0.1/docs/modules/data_connection/document_transformers/recursive_text_splitter/

from langchain.text_splitter import RecursiveCharacterTextSplitter

def chunk_docs_recursive(documents: list, chunk_size=500, chunk_overlap=50) -> list:

    text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)

    chunks_start = text_splitter.split_documents(documents)

    chunks_end = remove_empty_chunks(chunks_start=chunks_start)

    return chunks_end

#####

def test_chunk_docs_recursive(): 
    docs = process_file('docs/CHW_EOC_04-21-2017_ENG.pdf')
    chunks = chunk_docs_recursive(documents=docs)
    print(len(chunks))
    print(chunks[0].page_content)

test_chunk_docs_recursive()

615
Medi-Cal
Member Handbook
Combined Evidence of Coverage
and Disclosure Form                   
 
CAHealthWellness.com
For TTY, contact California Relay by dialing 711 and 
provide the Member Services number: 1-877-658-0305


In [16]:
# Create a text splitter using NLTK

# https://python.langchain.com/docs/how_to/split_by_token/

from langchain.text_splitter import NLTKTextSplitter

def chunk_docs_nltk(documents: list, chunk_size=500, chunk_overlap=50) -> list:

    text_splitter = NLTKTextSplitter(
    chunk_size=chunk_size, 
    chunk_overlap=chunk_overlap)

    chunks_start = text_splitter.split_documents(documents)

    chunks_end = remove_empty_chunks(chunks_start=chunks_start)

    return chunks_end

#####

def test_chunk_docs_nltk(): 
    docs = process_file('docs/CHW_EOC_04-21-2017_ENG.pdf')
    chunks = chunk_docs_nltk(documents=docs)
    print(len(chunks))
    print(chunks[0].page_content)

test_chunk_docs_nltk()


Created a chunk of size 744, which is longer than the specified 500
Created a chunk of size 796, which is longer than the specified 500
Created a chunk of size 653, which is longer than the specified 500
Created a chunk of size 1349, which is longer than the specified 500
Created a chunk of size 742, which is longer than the specified 500
Created a chunk of size 565, which is longer than the specified 500
Created a chunk of size 580, which is longer than the specified 500
Created a chunk of size 750, which is longer than the specified 500
Created a chunk of size 506, which is longer than the specified 500
Created a chunk of size 570, which is longer than the specified 500
Created a chunk of size 541, which is longer than the specified 500
Created a chunk of size 1038, which is longer than the specified 500
Created a chunk of size 577, which is longer than the specified 500
Created a chunk of size 587, which is longer than the specified 500
Created a chunk of size 605, which is longer t

630
Medi-Cal
Member Handbook
Combined Evidence of Coverage
and Disclosure Form                   
 
CAHealthWellness.com
For TTY, contact California Relay by dialing 711 and 
provide the Member Services number: 1-877-658-0305


In [18]:
# Create a text splitter semantic chunking 

# https://python.langchain.com/v0.1/docs/modules/data_connection/document_transformers/semantic-chunker/

from langchain_experimental.text_splitter import SemanticChunker

def chunk_docs_semantic(documents: list, ) -> list:

    # TODO - Use embeddings parameter
    text_splitter = SemanticChunker(create_embeddings_openai(), breakpoint_threshold_type="percentile")

    chunks_start = text_splitter.split_documents(documents)

    # Remove empty chunks
    chunks_end = remove_empty_chunks(chunks_start)

    return chunks_end

#####

def test_chunk_docs_semantic():
    docs = process_file('docs/CHW_EOC_04-21-2017_ENG.pdf')
    chunks = chunk_docs_semantic(docs)
    print(len(chunks))
    print(chunks[0].page_content)

test_chunk_docs_semantic()

252
Medi-Cal
Member Handbook
Combined Evidence of Coverage
and Disclosure Form                   
 
CAHealthWellness.com
For TTY, contact California Relay by dialing 711 and 
provide the Member Services number: 1-877-658-0305



In [21]:
# Create a Qdrant vector store

from langchain_core.embeddings import Embeddings
from langchain_qdrant import QdrantVectorStore
from qdrant_client import QdrantClient
from qdrant_client.http.models import Distance, VectorParams

def create_qdrant_vector_store(location: str, collection_name: str, vector_size: int, embeddings: Embeddings, documents: list) -> QdrantVectorStore:

    # Initialize the Qdrant client
    qdrant_client = QdrantClient(location=location)

    # Create a collection in Qdrant
    qdrant_client.create_collection(collection_name=collection_name, vectors_config=VectorParams(size=vector_size, distance=Distance.COSINE))

    # Initialize QdrantVectorStore with the Qdrant client
    qdrant_vector_store = QdrantVectorStore(client=qdrant_client, collection_name=collection_name, embedding=embeddings)
    
    qdrant_vector_store.add_documents(documents)
    
    return qdrant_vector_store

#####

def test_create_qdrant_vector_store():
    embeddings = create_embeddings_openai()
    docs = process_file('docs/CHW_EOC_04-21-2017_ENG.pdf')
    print(len(docs))
    chunks = chunk_docs_recursive(docs)
    print(len(chunks))
    vector_store = create_qdrant_vector_store(":memory:", "test", 1536, embeddings, chunks)
    print(vector_store.collection_name)

test_create_qdrant_vector_store()

134
615
test


In [25]:
# Create a Qdrant retriever

# TODO - Add reference 

from langchain_core.retrievers import BaseRetriever
from langchain_qdrant import QdrantVectorStore

def create_retriever_qdrant(vector_store: QdrantVectorStore) -> BaseRetriever:

    retriever = vector_store.as_retriever()

    return retriever

#####

def test_create_retriever_qdrant(text: str = None):
    embeddings = create_embeddings_openai()
    docs = process_file('docs/CHW_EOC_04-21-2017_ENG.pdf')
    chunks = chunk_docs_recursive(docs)
    vector_store = create_qdrant_vector_store(":memory:", "test", 1536, embeddings, chunks)
    retriever = create_retriever_qdrant(vector_store)
    if text:
        docs = retriever.invoke(text)
        print(docs[0])

print('\nQDRANT')
test_create_retriever_qdrant('What is my benefit for acupuncture?')


QDRANT
page_content='Acupuncture 
 
We will cover up to two outpatient acupuncture services in any calendar month.  Additional 
services may be covered if they are determined to be medically necessary.  You must receive 
prior authorization to receive more than two acupuncture services in a calendar month.   
 
There is no frequency limit if you are getting acupuncture services through the Early and 
Periodic Screening, Diagnosis, & Treatment program,    
 
Allergy Services' metadata={'source': 'docs/CHW_EOC_04-21-2017_ENG.pdf', 'file_path': 'docs/CHW_EOC_04-21-2017_ENG.pdf', 'page': 57, 'total_pages': 134, 'format': 'PDF 1.5', 'title': '', 'author': 'PTN Consulting', 'subject': '', 'keywords': '', 'creator': 'Microsoft® Word 2013', 'producer': 'Microsoft® Word 2013', 'creationDate': "D:20170421101839-07'00'", 'modDate': "D:20170421103401-07'00'", 'trapped': '', '_id': '02947cd1536945538bdf045ce004aba8', '_collection_name': 'test'}


In [None]:
# Create a Vertex AI retriever

# https://python.langchain.com/docs/integrations/retrievers/google_vertex_ai_search/

from langchain_google_community import VertexAISearchRetriever

def create_retriever_vertexai() -> VertexAISearchRetriever:

    retriever = VertexAISearchRetriever(
        project_id=os.environ['PROJECT_ID'],
        location_id=os.environ['LOCATION_ID'],
        data_store_id=os.environ['DATA_STORE_ID'],
        max_documents=3,
    )

    return retriever

#####

def test_create_retriever_vertexai(text: str = None):
    retriever = create_retriever_vertexai()
    if text:
        docs = retriever.invoke(text)
        print(docs[0])

print('\nVERTEX AI')
test_create_retriever_vertexai('What is a DUR reject?')

In [26]:
# Create a prompt template

# https://python.langchain.com/v0.1/docs/modules/model_io/prompts/quick_start/#chatprompttemplate
# https://python.langchain.com/v0.2/api_reference/core/prompts/langchain_core.prompts.chat.ChatPromptTemplate.html

from langchain.prompts import ChatPromptTemplate
from langchain.prompts import PromptTemplate

def create_chat_prompt_template(prompt: str = None) -> ChatPromptTemplate:
    
    template = '''
    You are a helpful conversational agent for the State of California.
    Your expertise is fully understanding the California Health & Wellness health  plan. 
    You need to answer questions posed by the member, who is trying to get answers about their health plan.  
    Your goal is to provide a helpful and detailed response, in at least 2-3 sentences. 

    You will be analyzing the health plan documents to derive a good answer, based on the following information:
    1. The question asked.
    2. The provided context, which comes from various documents of the pharmacy manuals repository. You will need to answer the question based on the provided context.

    The output MUST BE A VALID JSON. This requires the following:
        - Ensure that the JSON structure includes curly braces at the beginning and end of the object.
        - Use double quotes for all keys and string values.
        - Ensure that keys and values are separated by a colon.
        - Separate multiple key-value pairs with commas.
        - Avoid trailing commas after the last key-value pair.
        - Avoid ticks "`"" and triple ticks "```"" in the response.
        - Special characters in strings should be escaped using backslashes (e.g., use \\" for double quotes within strings).
        - Explain your reasoning for the answer.

    You need to provide the response in the following JSON format:
        {{
        answer: The answer to the question. The answer should be clear, helpful and neither too long nor too short,
        context_used: Summarize, in 50 words or less, the relevant information used to generate the answer,
        confidence_score: A Number between 0 and 100 indicating how confident you are in the answer correctness,
        answer_confidence_reasoning: A short summary on what information is missing to make the answer to the question completely correct
        }}

    Now it's your turn!

    {question}

    {context}

    '''
    
    prompt = PromptTemplate.from_template(template)

    return prompt

#####

def test_create_chat_prompt_template():
    prompt = create_chat_prompt_template()
    print(prompt)

test_create_chat_prompt_template()

input_variables=['context', 'question'] template='\n    You are a helpful conversational agent for the State of California.\n    Your expertise is fully understanding the California Health & Wellness health  plan. \n    You need to answer questions posed by the member, who is trying to get answers about their health plan.  \n    Your goal is to provide a helpful and detailed response, in at least 2-3 sentences. \n\n    You will be analyzing the health plan documents to derive a good answer, based on the following information:\n    1. The question asked.\n    2. The provided context, which comes from various documents of the pharmacy manuals repository. You will need to answer the question based on the provided context.\n\n    The output MUST BE A VALID JSON. This requires the following:\n        - Ensure that the JSON structure includes curly braces at the beginning and end of the object.\n        - Use double quotes for all keys and string values.\n        - Ensure that keys and value

In [30]:
# Create a Langchain chain..

# https://python.langchain.com/docs/integrations/llms/google_ai/
# https://python.langchain.com/docs/integrations/chat/google_generative_ai/
# https://ai.google.dev/gemini-api/docs/safety-settings 

from google.generativeai.types import HarmCategory, HarmBlockThreshold
from langchain_core.output_parsers import StrOutputParser
from langchain_core.retrievers import BaseRetriever
from langchain_core.runnables import RunnablePassthrough
from langchain_google_genai import ChatGoogleGenerativeAI
from operator import itemgetter

def create_chain (model_name: str, prompt: ChatPromptTemplate, retriever: BaseRetriever):

    if "gemini" in model_name.lower():
        llm = ChatGoogleGenerativeAI(
            model=model_name,
            temperature=0,
            safety_settings={
                    HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_ONLY_HIGH,
                    HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_ONLY_HIGH,
                    HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_ONLY_HIGH,
                    HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_ONLY_HIGH,
                },
            )
    else:
        print("Unsuported model name")
        
    chain = (
        {"context": itemgetter("question") | retriever, "question": itemgetter("question")} 
        | RunnablePassthrough.assign(context=itemgetter("context")) 
        | {"response": prompt | llm, "context": itemgetter("context")}
        )

    return chain

#####

def test_create_chain_qdrant():
    embeddings = create_embeddings_openai()
    docs = process_file('docs/CHW_EOC_04-21-2017_ENG.pdf')
    chunks = chunk_docs_recursive(docs)
    vector_store = create_qdrant_vector_store(":memory:", "test", 1536, embeddings, chunks)
    retriever = create_retriever_qdrant(vector_store)
    chat_prompt_template = create_chat_prompt_template()
    chain = create_chain('gemini-1.5-flash', chat_prompt_template, retriever)
    result = chain.invoke({'question' : 'What is my benefit for acupuncture?'})
    print(result)

print('\nQDRANT')
test_create_chain_qdrant()

# def test_create_chain_vertexai():
#     retreiver = create_retriever_vertexai()
#     chat_prompt_template = create_chat_prompt_template()
#     chain = create_chain('gemini-1.5-flash', chat_prompt_template, retreiver)
#     result = chain.invoke({'question' : 'What is my benefit for acupuncture?'})
#     print(result)

# print('\nVERTEX AI')
# test_create_chain_vertexai()


QDRANT
{'response': AIMessage(content='{\n"answer": "The California Health & Wellness plan covers up to two outpatient acupuncture services per calendar month.  If you need more than two services, you will need to get prior authorization.  There is no frequency limit if you are getting acupuncture services through the Early and Periodic Screening, Diagnosis, & Treatment program.",\n"context_used": "The California Health & Wellness plan covers up to two outpatient acupuncture services in any calendar month. Additional services may be covered if they are determined to be medically necessary. You must receive prior authorization to receive more than two acupuncture services in a calendar month. There is no frequency limit if you are getting acupuncture services through the Early and Periodic Screening, Diagnosis, & Treatment program.",\n"confidence_score": 95,\n"answer_confidence_reasoning": "The document does not specify what constitutes \\"medically necessary\\" for acupuncture service

In [33]:
# Generate answeers from a chain usin a list of questions

def generate_answers_contexts(chain, questions: list):
    
    answers = []
    contexts = []

    # Loop over the list of questions and call the chain to get the answer and context
    for question in questions:
        print(question)

        # Call the chain to get answers and contexts
        response = chain.invoke({"question" : question})
        print(response)
        
        # Capture the answer and context 
        answers.append(response["response"].content)
        contexts.append([context.page_content for context in response["context"]])

    return answers, contexts

In [32]:
# Run a Ragas evaluation 

from datasets import Dataset
from pandas import DataFrame
from ragas import evaluate
from ragas.metrics import (
    faithfulness,
    answer_relevancy,
    answer_correctness,
    context_recall,
    context_precision,
)

def run_ragas_evaluation(chain, 
                         questions: list, 
                         groundtruths: list, 
                         eval_metrics: list = [answer_correctness, 
                                               answer_relevancy, 
                                               context_recall, 
                                               context_precision, 
                                               faithfulness]):

  answers, contexts = []
  answers, contexts = generate_answers_contexts(chain, questions)

  # Create the input dataset 
  input_dataset = Dataset.from_dict({
  "question" : questions,         # From the dataframe
  "answer" : answers,             # From the chain
  "contexts" : contexts,          # From the chain
  "ground_truth" : groundtruths   # From the dataframe
  })

  # Run the Ragas evaluation using the input dataset and eval metrics
  ragas_results = evaluate(input_dataset, eval_metrics)
  ragas_results_df = ragas_results.to_pandas()
  
  return ragas_results, ragas_results_df
  
  #####

def test_run_ragas_evaluation():
  print("test")    

test_run_ragas_evaluation()

test


In [30]:
# Query Vertex AI Search datastore using HTTP Post

import json
import requests
import google.auth
from google.auth.transport.requests import Request

credentials, project_id = google.auth.default()
credentials.refresh(Request())
access_token = credentials.token
print(access_token)

def query_chunks(query, n=5):
    
  if LOCATION_ID == 'us':
    api_endpoint = 'us-discoveryengine.googleapis.com'
  else:
    api_endpoint = 'discoveryengine.googleapis.com'

  url = f"https://{api_endpoint}/v1alpha/projects/{PROJECT_ID}/locations/{LOCATION_ID}/collections/default_collection/dataStores/{DATA_STORE_ID}/servingConfigs/default_search:search"
  print(url)
  
  headers = {
      "Authorization": f"Bearer {access_token}",
      "Content-Type": "application/json",
  }
  
  post_data = {
      "servingConfig": f"projects/{PROJECT_ID}/locations/{LOCATION_ID}/collections/default_collection/dataStores/{DATA_STORE_ID}/servingConfigs/default_search",
      "pageSize": n,
      "query": query,
      "contentSearchSpec": {"searchResultMode": "CHUNKS"},
  }
  
  response = requests.post(url, headers=headers, json=post_data)

  if response.status_code != 200:
    print(
        f"Error retrieving search results: {response.status_code} -"
        f" {response.text}"
    )

  return response.json()

#####

test = query_chunks('What is my benefit for acupuncture?')
print(test)

I0000 00:00:1728070414.346501 1513933 fork_posix.cc:77] Other threads are currently calling into gRPC, skipping fork() handlers


ya29.a0AcM612xQhk7xeEHmuVvA83EtnvSUXrP4mcDHVSzDpzlSEBEGpZ_CJNDbSbXtCX95YhXfMtAX5nMolrBMbTrYeTjIv2ZI6Yi53Hm0r_VRBXholcmmS7j-eHYz1ds0vbboGq2hnlv6Y8KLAkioJNM07vfZaFrxB8zpAYRklKzbFAaCgYKATYSARESFQHGX2Miqjq7c4jyUvOtqGqcinGFPQ0177
https://us-discoveryengine.googleapis.com/v1alpha/projects/virtual-assist-poc-434617/locations/us/collections/default_collection/dataStores/virtual-assist-poc-docs_1727390753315/servingConfigs/default_search:search
{'results': [{'chunk': {'name': 'projects/695172254544/locations/us/collections/default_collection/dataStores/virtual-assist-poc-docs_1727390753315/branches/0/documents/8fc366005ca1959f1d792465b111e426/chunks/c2', 'id': 'c2', 'content': 'Lost Prescription – 04\n\nTherapy Change – 05\n\n\n\nFor claims greater than a 30-day supply, the pharmacy must submit individual claims, each of 30-day supplies (up to 6 claims for a total of 180-day supply).\n\n\n\nIf the pharmacy submits the claim with the Clarification Code and the claim still rejects, check the Subm

# Create RAG chain using Vertex AI vector store

In [18]:
# Build RAG chain using Vertex AI Agent Builder datastore

retreiver = create_retriever_vertexai()
chat_prompt_template = create_chat_prompt_template()
chain = create_chain('gemini-1.5-flash', chat_prompt_template, retreiver)

In [19]:
# Test the chain 

questions = ["What is a DUR reject?",
"What is the difference between hard and soft rejects?",
"What is TrOOP",
"What is the clarification code for lost prescription"]

for question in questions:
    print(question)
    result = chain.invoke({"question" : question})
    print(result)
    print(result["response"].content)

# Generate answers from the golden Q&A

In [None]:
import pandas as pd

# Load the Golden Q&A
testset_df = pd.read_excel('golden_qa/KN Virtual Assist POC_08.09.24 1_mk.xlsx', 'Consolidated Golden QnA')

questions = testset_df["Question"].values.tolist()
questions = [str(question) for question in questions]

answers, contexts = generate_answers_contexts(chain, questions)

# Create customer testset and evaluate using Ragas

In [24]:
# Load the golden Q&A and get questions and ground truths

import pandas as pd

testset_df = pd.read_excel('golden_qa/KN Virtual Assist POC_08.09.24 1_mk.xlsx', 'Consolidated Golden QnA')

questions = testset_df["Question"].values.tolist()
questions = [str(question) for question in questions]

groundtruths = testset_df["Answer"].values.tolist()
groundtruths = [str(ground_truth) for ground_truth in groundtruths]

print("Writing customer_testset.csv")
testset_df.to_csv("testsets/customer_testset.csv")
testset_df

In [None]:
# Evaluate the customer testset using Ragas

import pandas as pd

# Get the questions and groundtruths from the dataframe
testset_df = pd.read_csv("testsets/customer_testset.csv")

questions = testset_df["Question"].values.tolist()
questions = [str(question) for question in questions]

groundtruths = testset_df["Answer"].values.tolist()
groundtruths = [str(ground_truth) for ground_truth in groundtruths]  

# Specify the eval metrics
eval_metrics = [answer_correctness, answer_relevancy, context_precision, context_recall, faithfulness]

# Run the Ragas evaluation and show the results
ragas_results, ragas_results_df = run_ragas_evaluation(chain, questions, groundtruths, eval_metrics)

# Write the results to disk
print("Writing customer_testset_ragas_results.csv")
ragas_results_df.to_csv("ragas/customer_testset_ragas_results.csv")

# Show the resutls
ragas_results

# Create synthetic testset and evaluate using Ragas

In [42]:
from langchain_openai import ChatOpenAI, OpenAIEmbeddings

from ragas.testset.evolutions import simple, reasoning, multi_context
from ragas.testset.generator import TestsetGenerator

# Load the docs
documents = []

paths = ['docs/CHW_EOC_04-21-2017_ENG.pdf']

for path in paths:
    documents.extend(process_file(path=path))

# Chunk the docs 
# chunks = chunk_docs_nltk(documents, 1500, 150)
chunks = chunk_docs_semantic(documents)

# Set up the parameters for generating the testset
generator_llm = ChatOpenAI(model="gpt-3.5-turbo")
critic_llm = ChatOpenAI(model="gpt-4o-mini")
embeddings = OpenAIEmbeddings()
generator = TestsetGenerator.from_langchain(generator_llm, critic_llm, embeddings)
distributions = {simple: 0.5, multi_context: 0.4, reasoning: 0.1}

# Generate the testset and save to disk 
testset = generator.generate_with_langchain_docs(documents=chunks, test_size=50, distributions=distributions)
testset_df = testset.to_pandas()

print("Writing synthetic_testset.csv")
testset_df.to_csv("testsets/synthetic_testset.csv")
testset_df

ImportError: cannot import name 'MaxRetriesExceeded' from 'ragas.exceptions' (/opt/anaconda3/envs/aie4-demo/lib/python3.11/site-packages/ragas/exceptions.py)

In [None]:
# Evaluate the synthetic testset using Ragas

import pandas as pd

# Run the Ragas evaluation and show the results
# Get the questions and groundtruths from the dataframe
testset_df = pd.read_csv("testsets/synthetic_testset.csv")

questions = testset_df["question"].values.tolist()
questions = [str(question) for question in questions]

groundtruths = testset_df["ground_truth"].values.tolist()
groundtruths = [str(ground_truth) for ground_truth in groundtruths]  

# Specify the eval metrics
eval_metrics = [answer_correctness, answer_relevancy, context_precision, context_recall, faithfulness]

# Run the Ragas evaluation and show the results
ragas_results, ragas_results_df = run_ragas_evaluation(chain, questions, groundtruths, eval_metrics)

# Write the results to disk
print("Writing synthetic_testset_ragas_results.csv")
ragas_results_df.to_csv("ragas/synthetic_testset_ragas_results.csv")

# Show the resutls
ragas_results