In [1]:
import os
import re
import json
from typing import List, Tuple, Optional

import pandas as pd
from pydantic import BaseModel, Field
from langchain.docstore.document import Document
from langchain.retrievers import EnsembleRetriever
from langchain_community.retrievers import BM25Retriever
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain.prompts import PromptTemplate
from langchain.tools import DuckDuckGoSearchResults

# Section 2 : Setting Up the Retrieval System

The retrieval system is based in two techniques working together:
- Reciprocal Rank Fusion (https://plg.uwaterloo.ca/~gvcormac/cormacksigir09-rrf.pdf):
    using FAISS as vectorstore to perform a similarity search based in vector representation, as BM25 to seach similarily based on statistics

- CRAG(Corrective RAG)
    Extends the standard RAG approach by dynamically evaluating and correcting the retrieval process, combining the power of vector databases(in this case RRF retrieval), web search, and language models to provide accurate and context-aware responses to user queries. 

In [16]:
class DocumentRetriever:
    def __init__(self, bm25_docs: List[str], faiss_docs: List[str]):
        self.bm25_retriever = BM25Retriever.from_texts(
            bm25_docs, metadatas=[{"source": 1}] * len(bm25_docs)
        )
        self.bm25_retriever.k = 5        
        embedding = OpenAIEmbeddings()
        faiss_vectorstore = FAISS.from_texts(
            faiss_docs, embedding, metadatas=[{"source": 2}] * len(faiss_docs)
        )
        self.faiss_retriever = faiss_vectorstore.as_retriever(search_kwargs={"k": 5})

        self.ensemble_retriever = EnsembleRetriever(
            retrievers=[self.bm25_retriever, self.faiss_retriever], weights=[0.5, 0.5]
        )

    def retrieve(self, query: str) -> List[Document]:
        return self.ensemble_retriever.invoke(query)

In [5]:
class RetrievalEvaluatorInput(BaseModel):
    relevance_score: float = Field(..., description="The relevance score of the document to the query. the score should be between 0 and 1.")

In [6]:
class Evaluator:
    def __init__(self, llm):
        self.llm = llm

    def evaluate_relevance(self, query: str, document: str) -> float:
        prompt = PromptTemplate(
            input_variables=["query", "document"],
            template="On a scale from 0 to 1, how relevant is the following document to the query? Query: {query}\nDocument: {document}\nRelevance score:"
        )
        chain = prompt | self.llm.with_structured_output(RetrievalEvaluatorInput)
        input_variables = {"query": query, "document": document}
        result = chain.invoke(input_variables).relevance_score
        return result

In [7]:
class KnowledgeRefinementInput(BaseModel):
    key_points: str = Field(..., description="The document to extract key information from.")

In [8]:

class KnowledgeRefiner:
    def __init__(self, llm):
        self.llm = llm

    def refine(self, document: str) -> List[str]:
        prompt = PromptTemplate(
            input_variables=["document"],
            template="Extract the key information from the following document in bullet points:\n{document}\nKey points:"
        )
        chain = prompt | self.llm.with_structured_output(KnowledgeRefinementInput)
        input_variables = {"document": document}
        result = chain.invoke(input_variables).key_points
        return [point.strip() for point in result.split('\n') if point.strip()]

In [9]:
class QueryRewriterInput(BaseModel):
    query: str = Field(..., description="The query to rewrite.")

In [10]:
class QueryRewriter:
    def __init__(self, llm):
        self.llm = llm

    def rewrite(self, query: str) -> str:
        prompt = PromptTemplate(
            input_variables=["query"],
            template="Rewrite the following query to make it more suitable for a web search:\n{query}\nRewritten query:"
        )
        chain = prompt | self.llm.with_structured_output(QueryRewriterInput)
        input_variables = {"query": query}
        return chain.invoke(input_variables).query.strip()

In [11]:
class WebSearcher:
    def __init__(self, llm):
        self.llm = llm
        self.search = DuckDuckGoSearchResults()

    def search_and_refine(self, query: str) -> Tuple[List[str], List[Tuple[str, str]]]:
        rewritten_query = QueryRewriter(self.llm).rewrite(query)
        web_results = self.search.run(rewritten_query)
        web_knowledge = KnowledgeRefiner(self.llm).refine(web_results)
        sources = self.parse_search_results(web_results)
        return web_knowledge, sources

    @staticmethod
    def parse_search_results(results_string: str) -> List[Tuple[str, str]]:
        try:
            pattern = r'snippet: (.*?), title: (.*?), link: (https?://[^\s,]+)'
            matches = re.findall(pattern, results_string)
            data = [{"snippet": snippet, "title": title, "link": link} for snippet, title, link in matches]
            results = json.loads(json.dumps(data, indent=4))
            return [(result.get('title', 'Untitled'), result.get('link', '')) for result in results]
        except json.JSONDecodeError:
            print("Error parsing search results. Returning empty list.")
            return []

In [12]:
class ResponseGenerator:
    def __init__(self, llm):
        self.llm = llm

    def generate(self, query: str, knowledge: str, sources: List[Tuple[str, str]]) -> str:
        response_prompt = PromptTemplate(
            input_variables=["query", "knowledge", "sources"],
            template="Based on the following knowledge, answer the query. Include the sources with their links (if available) at the end of your answer:\nQuery: {query}\nKnowledge: {knowledge}\nSources: {sources}\nAnswer:"
        )
        input_variables = {
            "query": query,
            "knowledge": knowledge,
            "sources": "\n".join([f"{title}: {link}" if link else title for title, link in sources])
        }
        response_chain = response_prompt | self.llm
        return response_chain.invoke(input_variables).content

In [22]:
class QueryProcessor:
    def __init__(self, retriever: DocumentRetriever, evaluator: Evaluator, web_searcher: WebSearcher, llm):
        self.retriever = retriever
        self.evaluator = evaluator
        self.web_searcher = web_searcher
        self.llm = llm

    def process(self, query: str, eval_documents: bool = True) -> str:
        """
        Process the query and generate a response.

        Args:
            query (str): The query string.
            eval_documents (bool): Whether to calculate evaluation scores or just use the retrieved documents.

        Returns:
            str: The generated response.
        """
        retrieved_docs = self.retriever.retrieve(query)
        
        if eval_documents:
            eval_scores = [self.evaluator.evaluate_relevance(query, doc.page_content) for doc in retrieved_docs]
            max_score = max(eval_scores)

            if max_score > 0.7:
                best_doc = retrieved_docs[eval_scores.index(max_score)]
                final_knowledge = best_doc.page_content
                sources = [("Retrieved document", "")]
            elif max_score < 0.3:
                final_knowledge, sources = self.web_searcher.search_and_refine(query)
            else:
                best_doc = retrieved_docs[eval_scores.index(max_score)]
                retrieved_knowledge = KnowledgeRefiner(self.llm).refine(best_doc.page_content)
                web_knowledge, web_sources = self.web_searcher.search_and_refine(query)
                final_knowledge = "\n".join(retrieved_knowledge + web_knowledge)
                sources = [("Retrieved document", "")] + web_sources
        else:
            final_knowledge = retrieved_docs[0].page_content
            sources = [("Retrieved document", "")]

        response = ResponseGenerator(self.llm).generate(query, final_knowledge, sources)
        return response

In [26]:
df = pd.read_csv('/home/aragy/Huma.ai-assessment/Dataset/cleaned_dataset.csv')
documents = [row['question'] + ' ' + row['answer_solution'] for _, row in df.iterrows()]

retriever = DocumentRetriever(documents, documents)


In [27]:
retriever.retrieve("What are the Keytruda's side effects?")

[Document(metadata={'source': 1}, page_content='What are the common side effects of Keytruda? Common side effects include fatigue, nausea, and skin rash.'),
 Document(metadata={'source': 1}, page_content='Can Keytruda cause immune-related adverse effects? Yes, Keytruda can cause immune-related adverse effects such as colitis, hepatitis, and pneumonitis.'),
 Document(metadata={'source': 1}, page_content='What were the side effects noted in the KEYNOTE-006 trial? Common side effects included fatigue, itching, and diarrhea.'),
 Document(metadata={'source': 2}, page_content='Are there specific side effects of Keytruda that NSCLC patients should monitor? NSCLC patients should monitor for cough, shortness of breath, and chest pain, as these could indicate immune-related pneumonitis.'),
 Document(metadata={'source': 2}, page_content='What should patients report immediately while on Keytruda treatment? Patients should report any new or worsening symptoms such as cough, chest pain, or changes i

In [28]:
llm = ChatOpenAI(model="gpt-4o-mini", max_tokens=1000, temperature=0)
evaluator = Evaluator(llm)
web_searcher = WebSearcher(llm)

In [31]:
web_searcher.search("What are the Keytruda's side effects?")



In [30]:
web_searcher.search_and_refine("What are the Keytruda's side effects?")

(['- Keytruda is a biologic drug that belongs to the PD-1 inhibitor class.',
  '- Side effects can vary based on whether Keytruda is used alone or with other cancer drugs.',
  '- Commonly reported side effects include:',
  '- Diarrhea',
  '- Fatigue',
  '- Loss of appetite',
  '- Pain (stomach, muscles, bones, joints)',
  '- Rash or itching',
  '- Fever',
  '- Cough',
  '- Serious side effects that require immediate medical attention include:',
  '- Black, tarry stools',
  '- Bladder pain',
  '- Bloating or swelling of the face, arms, hands, lower legs, or feet',
  '- Bloody or cloudy urine',
  '- Blurred vision',
  '- Body aches or pain',
  '- It is important to consult a doctor or nurse if serious side effects occur.'],
 [('Keytruda side effects: What they are and how to manage them',
   'https://www.medicalnewstoday.com/articles/drugs-keytruda-side-effects'),
  ('Keytruda Side Effects: Common to Serious, Explained | Healthline',
   'https://www.healthline.com/health/drugs/keytruda-s

# Section 3: Integrating the Generation Component

* GPT-4o-mini
    - Advantages:

    - disvantages:

* 



In [None]:
openai_api_key = "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"
os.environ["OPENAI_API_KEY"]  = openai_api_key

In [72]:
llm = ChatOpenAI(model="gpt-4o-mini", max_tokens=1000, temperature=0)

In [73]:

evaluator = Evaluator(llm)
web_searcher = WebSearcher(llm)
processor = QueryProcessor(retriever, evaluator, web_searcher, llm)

In [19]:
response = processor.process("What are the Keytruda's side effects?")
print(response)

Processing query: What are the Keytruda's side effects?
Generating response...
Keytruda (pembrolizumab) is an immunotherapy drug used to treat various types of cancer. Common side effects associated with Keytruda include:

- Fatigue
- Nausea
- Skin rash

These side effects can vary in intensity and may not occur in every patient. It's important for patients to discuss any side effects they experience with their healthcare provider.

Sources: Retrieved document.


In [20]:
response = processor.process("What is the indication for using Keytruda?")
print(response)

Processing query: What is the indication for using Keytruda?
Generating response...
Keytruda (pembrolizumab) is an immunotherapy medication primarily indicated for the treatment of various types of cancer, including:

1. **Non-small cell lung cancer (NSCLC)** - It is particularly effective in patients with PD-L1 expression, significantly improving survival rates.
2. **Melanoma**
3. **Head and neck squamous cell carcinoma (HNSCC)**
4. **Classical Hodgkin lymphoma**
5. **Endometrial carcinoma**
6. **Triple-negative breast cancer**

Keytruda functions as a checkpoint inhibitor, enhancing the immune system's ability to fight cancer. It is administered via intravenous (IV) infusion and is intended for long-term treatment. The recommended dosages for adults are either 200 mg once every 3 weeks or 400 mg once every 6 weeks.

For more detailed information, you can refer to the following sources:
- [Medical News Today - Keytruda (pembrolizumab): Side effects, uses, cost, and more](https://www.m

In [52]:
response = processor.process("How Keytruda perform in cancer remission?")
print(response)

Keytruda (pembrolizumab) has shown significant efficacy in promoting cancer remission across various types of cancer, particularly in non-small cell lung cancer (NSCLC) and muscle-invasive urothelial carcinoma (MIUC). 

1. **Non-Small Cell Lung Cancer (NSCLC)**: Keytruda is effective in treating NSCLC, especially in patients with high PD-L1 expression, leading to improved survival rates. This highlights its role in enhancing the immune response against cancer cells.

2. **Muscle-Invasive Urothelial Carcinoma (MIUC)**: In patients with high-risk MIUC, adjuvant treatment with Keytruda has demonstrated improved disease-free survival (DFS) compared to observation alone after 45 months. This suggests that Keytruda can help prevent recurrence in this patient population.

3. **Kidney Cancer**: Keytruda has also been noted as the first adjuvant therapy to significantly improve overall survival in earlier-stage kidney cancer patients at high risk of recurrence, reducing the risk of death by nea

### Claude 3.5 sonnet

In [33]:
from langchain_anthropic import ChatAnthropic

In [44]:
ANTHROPIC_API_KEY = "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"
os.environ["ANTHROPIC_API_KEY"] = ANTHROPIC_API_KEY

In [56]:
llm = ChatAnthropic(temperature=0, model_name='claude-3-5-sonnet-20240620')

In [57]:
evaluator = Evaluator(llm)
web_searcher = WebSearcher(llm)
processor = QueryProcessor(retriever, evaluator, web_searcher, llm)

In [58]:
response = processor.process("What are the Keytruda's side effects?")
print(response)

Based on the provided knowledge, the common side effects of Keytruda include:

1. Fatigue
2. Nausea
3. Skin rash

It's important to note that these are just the common side effects, and individual experiences may vary. Patients should always consult their healthcare provider for a comprehensive list of potential side effects and to discuss any concerns they may have about their treatment.

Source:
Retrieved document (no link available)


In [59]:
response = processor.process("What is the indication for using Keytruda?")
print(response)

Based on the provided knowledge, Keytruda (pembrolizumab) is indicated for the treatment of various types of cancer, including:

1. Non-small cell lung cancer, particularly in patients with PD-L1 expression
2. Melanoma (skin cancer)
3. Adenocarcinomas
4. Endometrial carcinoma
5. Triple-negative breast cancer

Keytruda is an immunotherapy treatment classified as a checkpoint inhibitor. It works by helping the immune system fight cancer cells. The treatment has shown significant improvement in survival rates, especially for non-small cell lung cancer patients.

It's important to note that Keytruda is intended for long-term treatment, and its use is continuously being evaluated in numerous clinical trials for various cancers. Since its initial approval in September 2014, at least 20 new indications have been approved, and ongoing investigational studies are exploring new uses for pembrolizumab.

Sources:
2. Keytruda (pembrolizumab): Side effects, uses, cost, and more: https://www.medicaln

In [60]:
response = processor.process("How Keytruda perform in cancer remission?")
print(response)

Based on the provided knowledge, I cannot directly answer how Keytruda performs in cancer remission. The information given is specifically about Keytruda's effectiveness in treating non-small cell lung cancer, not about its performance in cancer remission.

However, I can share what is provided:

Keytruda has demonstrated significant improvement in survival rates for patients with non-small cell lung cancer who have PD-L1 expression. This suggests that Keytruda is effective in treating this specific type of cancer, but it does not provide information about cancer remission in general or for other types of cancer.

To accurately answer the query about Keytruda's performance in cancer remission, more specific and relevant information would be needed.

Source:
Retrieved document (no link available)


### Gemini

In [38]:
from langchain_google_genai import ChatGoogleGenerativeAI
os.environ["GOOGLE_API_KEY"] = "AIzaSyAl6FW4Kj2oOt-GmR9oBiu448jf8M4YJmE"
llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash", temperature=0,)

  from .autonotebook import tqdm as notebook_tqdm


In [39]:
evaluator = Evaluator(llm)
web_searcher = WebSearcher(llm)
processor = QueryProcessor(retriever, evaluator, web_searcher, llm)

In [40]:
response = processor.process("What are the Keytruda's side effects?")
print(response)

Keytruda's common side effects include fatigue, nausea, and skin rash. 

**Source:** Retrieved document 



In [42]:
response = processor.process("What is the indication for using Keytruda?")
print(response)

Keytruda is indicated for the treatment of relapsed or refractory classical Hodgkin lymphoma after two or more lines of therapy. 

**Source:** Retrieved document 



In [43]:
response = processor.process("How Keytruda perform in cancer remission?")
print(response)

Keytruda demonstrated superior efficacy compared to chemotherapy in NSCLC patients in the KEYNOTE-456 trial. This superiority was observed in terms of overall survival and progression-free survival. 

**Source:** Retrieved document 



# Section 4: Evaluation and Optimization

In [69]:
import json
from typing import List, Tuple

from deepeval import evaluate
from deepeval.metrics import GEval, FaithfulnessMetric, ContextualRelevancyMetric
from deepeval.test_case import LLMTestCase, LLMTestCaseParams
from langchain_openai import ChatOpenAI
import sys
import os

In [71]:
question = "What is the response rate of melanoma patients treated with Keytruda?"
gt_answer = "Approximately 40% of melanoma patients respond positively to Keytruda treatment."


In [74]:
pred_answer = processor.process(question)

### Test Correctness

In [70]:
correctness_metric = GEval(
    name="Correctness",
    model="gpt-4o-mini",
    evaluation_params=[
        LLMTestCaseParams.EXPECTED_OUTPUT,
        LLMTestCaseParams.ACTUAL_OUTPUT],
        evaluation_steps=[
        "Determine whether the actual output is factually correct based on the expected output."
    ],

)

In [75]:
test_case_correctness = LLMTestCase(
    input=question,
    expected_output=gt_answer,
    actual_output=pred_answer,
)

In [76]:
correctness_metric.measure(test_case_correctness)
print(correctness_metric.score)

1.0


### Test Faithfulness

In [81]:
faithfulness_metric = FaithfulnessMetric(
    threshold=0.7,
    model="gpt-4o-mini",
    include_reason=True
)

In [82]:
test_case = LLMTestCase(
    input = question,
    actual_output=pred_answer,
    retrieval_context=[gt_answer]

)

In [83]:
faithfulness_metric.measure(test_case)
print(faithfulness_metric.score)
print(faithfulness_metric.reason)

1.0
The score is 1.00 because there are no contradictions, indicating perfect alignment between the actual output and the retrieval context.


### Test contextual relevancy

In [84]:
retrieval_context = [
    "this is a test context",
    "there's other way to do this",
    "No detailed information aviable on the given topic.",
    "Approximately 40% of melanoma patients respond positively to Keytruda treatment."]

In [85]:
relevance_metric = ContextualRelevancyMetric(
    threshold=1,
    model="gpt-4o-mini",
    include_reason=True
)

In [86]:
relevance_test_case = LLMTestCase(
    input=question,
    actual_output=pred_answer,
    retrieval_context=retrieval_context,
    expected_output=gt_answer,

)

In [88]:
relevance_metric.measure(relevance_test_case)
print(relevance_metric.score)
print(relevance_metric.reason)

Output()