In [31]:
import json
from langchain_core.runnables import RunnablePassthrough, RunnableMap
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader
from langchain.embeddings.cache import CacheBackedEmbeddings
from langchain.storage import LocalFileStore
from langchain_core.prompts import ChatPromptTemplate
from langchain_mistralai import ChatMistralAI
from langchain_core.runnables import RunnableParallel


In [32]:
api_key="vOTciFdHcICaBscyJkwc0EOxD2GDABrp"


In [33]:
# Load the dataset for evaluation
with open("/Users/innovapathinc/Desktop/Gen Ai Topics /Retrieval_eval/ML_GenAI_Concepts/RAG/medical_dataset.json", "r") as f:
    ground_truth = json.load(f)

In [34]:
# Function to calculate evaluation metrics
def evaluate_retrieval(retriever, ground_truth, k=10):
    precision_list = []
    recall_list = []
    reciprocal_ranks = []

    for item in ground_truth:
        query = item["query"]
        ground_truth_doc = item["ground_truth_document"]

        # Retrieve top-k documents
        retrieved_docs = retriever.get_relevant_documents(query)[:k]

        # Extract content for comparison
        retrieved_content = [doc.page_content for doc in retrieved_docs]

        # Precision@k
        relevant_retrieved = sum(1 for doc in retrieved_content if ground_truth_doc in doc)
        precision = relevant_retrieved / k
        precision_list.append(precision)

        # Recall@k
        total_relevant = 1  # Assuming one ground truth document
        recall = relevant_retrieved / total_relevant
        recall_list.append(recall)


    # Calculate averages
    precision_avg = sum(precision_list) / len(precision_list)
    recall_avg = sum(recall_list) / len(recall_list)

    return {
        "Precision@k": precision_avg,
        "Recall@k": recall_avg,
    }

In [13]:
# Load and preprocess documents
loader = PyPDFLoader("/Users/innovapathinc/Desktop/Gen Ai Topics /Retrieval_eval/ML_GenAI_Concepts/RAG/Data/Medical_book.pdf")
docs = loader.load()
text_splitter = CharacterTextSplitter(
    separator="\n\n", chunk_size=500, chunk_overlap=150, is_separator_regex=False
)
chunks = text_splitter.split_documents(docs)

In [14]:
# Setup embeddings and retriever
store = LocalFileStore("./cache/")
core_embeddings_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
embedder = CacheBackedEmbeddings.from_bytes_store(core_embeddings_model, store)
vectorstore = Chroma.from_documents(chunks, embedder, persist_directory="./chroma_store")
retriever = vectorstore.as_retriever()

  core_embeddings_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
  from tqdm.autonotebook import tqdm, trange


In [35]:
# Define prompt and LLM
prompt = """You are an assistant for question-answering tasks.
Use the following pieces of retrieved context to answer the question.
If you don't know the answer, just say that you don't know.
Use three sentences maximum and keep the answer concise.

Question: {question}

Context: {context}

Answer:
"""
prompt_template = ChatPromptTemplate.from_template(prompt)
llm = ChatMistralAI(model="mistral-large-latest", streaming=True, api_key=api_key)

In [36]:
# RAG chain
rag_chain_from_docs = (
    RunnablePassthrough.assign(context=(lambda x: "\n\n".join(doc.page_content for doc in x["context"])))
    | prompt_template
    | llm
)
rag_chain_with_source = RunnableParallel(
    {"context": retriever, "question": RunnablePassthrough()}
).assign(answer=rag_chain_from_docs)

In [47]:
# Run a query
response = rag_chain_with_source.invoke("explain about brain tumour")
print("Answer:", response["answer"].content)
print("Retrieved Context:", response["context"])

Answer: A brain tumor is an abnormal growth of cells in the brain, which can be benign (non-cancerous) or malignant (cancerous). Benign brain tumors have clear borders and grow slowly, while malignant ones grow rapidly and invade nearby tissues. Brain tumors can cause various symptoms, including pain, brain damage, and even death, depending on their location and type.
Retrieved Context: [Document(metadata={'page': 582, 'source': './Medical_book.pdf'}, page_content='extension and rarely metastasize (spread) outside the\nbrain. A benign brain tumor is composed of non-cancer-\nous cells and does not metastasize beyond the part of the\nbrain where it originates. A brain tumor is considered\nmalignant if it contains cancer cells, or if it is composed\nof harmless cells located in an area where it suppresses\none or more vital functions.\nDescription\nEach year, more than 17,000 brain tumors are diag-\nnosed in the United States. About half of all primary\nbrain tumors are benign, but in lif

In [41]:
# Evaluate the retriever
evaluation_metrics = evaluate_retrieval(retriever, ground_truth, k=5)
print("Retrieval Evaluation Metrics:", evaluation_metrics)

Retrieval Evaluation Metrics: {'Precision@k': 0.092226148409894, 'Recall@k': 0.46113074204946997}


In [51]:
import json

# Define your RAG chain or model
# Assuming `rag_chain_with_source` is already initialized
# Example: response = rag_chain_with_source.invoke("query")

# Queries to generate responses for
queries = [
    "explain about brain tumour",
    "what are the symptoms of diabetes",
    "treatment options for hypertension"
]

# Path to save the generated responses
output_file = "/Users/innovapathinc/Desktop/Gen Ai Topics /Retrieval_eval/ML_GenAI_Concepts/RAG/generated_responses.json"

# List to store generated responses
generated_responses = []

# Generate and store responses
for query in queries:
    response = rag_chain_with_source.invoke(query)  # Call your RAG pipeline
    # Ensure retrieved context and answer are strings
    generated_responses.append({
        "query": query,
        "generated_answer": response["answer"].content if hasattr(response["answer"], "content") else str(response["answer"]),
        "retrieved_context": str(response["context"])  # Convert the context to string
    })

# Save the generated responses to a JSON file
with open(output_file, "w") as file:
    json.dump(generated_responses, file, indent=4)

print(f"Generated responses saved to: {output_file}")


Generated responses saved to: /Users/innovapathinc/Desktop/Gen Ai Topics /Retrieval_eval/ML_GenAI_Concepts/RAG/generated_responses.json


In [None]:
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
from rouge import Rouge

def evaluate_bleu_rouge(responses, ground_truths):
    """
    Evaluate LLM-generated responses using BLEU and ROUGE scores.

    Args:
        responses (list of str): List of responses generated by the LLM.
        ground_truths (list of str): List of ground truth answers.

    Returns:
        dict: A dictionary containing BLEU and ROUGE scores.
    """
    assert len(responses) == len(ground_truths), "Responses and ground truths must have the same length."

    # Initialize scores
    total_bleu_score = 0
    rouge = Rouge()
    rouge_scores = []

    # Evaluate each response
    for response, ground_truth in zip(responses, ground_truths):
        # Compute BLEU score
        bleu_score = sentence_bleu(
            [ground_truth.split()], response.split(),
            smoothing_function=SmoothingFunction().method1
        )
        total_bleu_score += bleu_score

        # Compute ROUGE scores
        rouge_score = rouge.get_scores(response, ground_truth, avg=True)
        rouge_scores.append(rouge_score)

    # Average BLEU score 
    avg_bleu_score = total_bleu_score / len(responses)

    # Average ROUGE scores
    avg_rouge_score = {
        "rouge-1": {
            "f": sum(score["rouge-1"]["f"] for score in rouge_scores) / len(rouge_scores),
            "p": sum(score["rouge-1"]["p"] for score in rouge_scores) / len(rouge_scores),
            "r": sum(score["rouge-1"]["r"] for score in rouge_scores) / len(rouge_scores),
        },
        "rouge-2": {
            "f": sum(score["rouge-2"]["f"] for score in rouge_scores) / len(rouge_scores),
            "p": sum(score["rouge-2"]["p"] for score in rouge_scores) / len(rouge_scores),
            "r": sum(score["rouge-2"]["r"] for score in rouge_scores) / len(rouge_scores),
        },
        "rouge-l": {
            "f": sum(score["rouge-l"]["f"] for score in rouge_scores) / len(rouge_scores),
            "p": sum(score["rouge-l"]["p"] for score in rouge_scores) / len(rouge_scores),
            "r": sum(score["rouge-l"]["r"] for score in rouge_scores) / len(rouge_scores),
        },
    }

    return {
        "Average BLEU Score": avg_bleu_score,
        "Average ROUGE Score": avg_rouge_score,
    }

responses = [
    "This is a generated response.",
    "Another generated response."
]
ground_truths = [
    "This is the expected response.",
    "Another correct response."
]

scores = evaluate_bleu_rouge(responses, ground_truths)
print(scores)
print("Answer:", response["answer"].content)

{'Average BLEU Score': 0.1304316792244985, 'Average ROUGE Score': {'rouge-1': {'f': 0.6333333283333334, 'p': 0.6333333333333333, 'r': 0.6333333333333333}, 'rouge-2': {'f': 0.12499999750000004, 'p': 0.125, 'r': 0.125}, 'rouge-l': {'f': 0.6333333283333334, 'p': 0.6333333333333333, 'r': 0.6333333333333333}}}
Answer: Treatment options for hypertension include ACE inhibitors, which are prescription medications that help control high blood pressure and may need to be taken for life. Additionally, lifestyle changes such as avoiding salty foods and maintaining a healthy weight are important. Regular physician check-ups are necessary to monitor blood pressure and adjust treatment as needed.
