In [None]:
!pip install llama-index-vector-stores-faiss
!pip install llama_index llama-index-llms-openai
!pip install PyPDF2
!pip install faiss-cpu
!pip install -U langchain-community
!pip install rouge-score
!pip install python-dotenv

In [None]:
import os
from llama_index.llms.openai import OpenAI
from dotenv import load_dotenv

# Load environment variables
load_dotenv()

# Set OpenAI API Key
openai_api_key =  # Replace with your actual key
os.environ["OPENAI_API_KEY"] = openai_api_key

def test_llama_index_openai_llm(prompt, model=LLM_MODEL, temperature=TEMPERATURE):
    """
    Query OpenAI LLM for direct question answering with concise responses.

    Args:
        prompt (str): Input prompt to query.
        model (str): OpenAI model to use.
        temperature (float): Temperature parameter for response generation.

    Returns:
        str: Model's response.
    """
    try:
        # Modify the prompt to ensure concise, one-word responses
        concise_prompt = f"{prompt}\nAnswer in one word or less:"

        llm = OpenAI(model=model, temperature=temperature)
        response = llm.complete(concise_prompt)
        return response
    except Exception as e:
        return f"Error: {e}"


if __name__ == "__main__":
    # Example prompt
    example_prompt = "What is the capital of  France?"

    # Test the LLM
    print("Testing LlamaIndex OpenAI LLM...")
    response = test_llama_index_openai_llm(example_prompt)
    print("Response:")
    print(response)


In [None]:
# prompt: from "/content/drive/MyDrive/sample_pdf_rag" copy the folder and contents to current directory

import shutil
import os

source_path = "/content/drive/MyDrive/sample_pdf_rag"
destination_path = os.getcwd()

# Check if the source folder exists
if os.path.exists(source_path):
  try:
    shutil.copytree(source_path, os.path.join(destination_path, "sample_pdf_rag"))
    print(f"Folder '{source_path}' copied to '{destination_path}' successfully.")
  except FileExistsError:
      print(f"Folder already exists in the destination directory")
  except OSError as e:
    print(f"Error copying folder: {e}")
else:
  print(f"Source folder '{source_path}' not found.")


In [None]:
import os
import faiss
import json
import torch
from dotenv import load_dotenv
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, StorageContext, load_index_from_storage
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.llms.openai import OpenAI
from llama_index.core.schema import TextNode
from llama_index.core.ingestion import IngestionPipeline
from llama_index.core.node_parser import SentenceSplitter
from langchain.vectorstores import FAISS

###############################################
# Step 1: Configuration
###############################################
# Load environment variables
load_dotenv()
openai_api_key = # place your Key here
os.environ["OPENAI_API_KEY"] = openai_api_key

# General configurations
PDF_FOLDER = "./sample_pdf_rag"  # Consistent folder naming
PERSIST_DIR = "./storage"
DOC_META_PATH = "doc_metadata.json"
INDEX_SAVE_PATH = "faiss_index.bin"
EMBEDDING_MODEL = "text-embedding-ada-002"
LLM_MODEL = "gpt-4"

# FAISS configuration
dimension = 1536  # Embedding dimension for 'text-embedding-ada-002'
CHUNK_SIZE = 500
OVERLAP = 50

###############################################
# Step 2: Load Documents and Create Nodes
###############################################
def extract_text_from_pdf(pdf_path):
    """Extract all text from a PDF file."""
    import PyPDF2
    text = ""
    with open(pdf_path, 'rb') as f:
        reader = PyPDF2.PdfReader(f)
        for page in reader.pages:
            page_text = page.extract_text()
            if page_text:
                text += page_text + "\n"
    return text

# Load documents
if not os.path.exists(PDF_FOLDER):
    raise FileNotFoundError(f"Folder '{PDF_FOLDER}' not found.")

# Read and chunk documents
documents = []
doc_metadata = []
doc_id = 0

for filename in os.listdir(PDF_FOLDER):
    if filename.lower().endswith(".pdf"):
        pdf_path = os.path.join(PDF_FOLDER, filename)
        text = extract_text_from_pdf(pdf_path)

        # Chunking text
        splitter = SentenceSplitter(chunk_size=CHUNK_SIZE, chunk_overlap=OVERLAP)
        chunks = splitter.split_text(text)

        for chunk_id, chunk in enumerate(chunks):
            node = TextNode(text=chunk, id_=f"{doc_id}_{chunk_id}")
            doc_metadata.append({
                "doc_id": doc_id,
                "filename": filename,
                "chunk_id": chunk_id,
                "text": chunk
            })
            documents.append(node)
        doc_id += 1

###############################################
# Step 3: Setup FAISS Vector Store and Index
###############################################
# Initialize FAISS index
faiss_index = faiss.IndexFlatL2(dimension)
storage_context = StorageContext.from_defaults()

# Use OpenAI embeddings
embedding_model = OpenAIEmbedding(model=EMBEDDING_MODEL)
# vectorstore = FAISS.from_documents(doc_chunks, embeddings)

if not os.path.exists(PERSIST_DIR) or not os.listdir(PERSIST_DIR):
    print("Building FAISS index...")

    # Create ingestion pipeline
    pipeline = IngestionPipeline(
        transformations=[
            SentenceSplitter(chunk_size=CHUNK_SIZE, chunk_overlap=OVERLAP),
            embedding_model
        ]
    )
    embeddings = [embedding_model.get_text_embedding(node.text) for node in documents]

    for embed in embeddings:
        faiss_index.add(torch.tensor(embed).unsqueeze(0).numpy())

    # Save metadata and FAISS index
    with open(DOC_META_PATH, 'w') as f:
        json.dump(doc_metadata, f)
    faiss.write_index(faiss_index, INDEX_SAVE_PATH)
else:
    print("Loading existing FAISS index...")
    faiss_index = faiss.read_index(INDEX_SAVE_PATH)

###############################################
# Step 4: Query Pipeline
###############################################
def retrieve_documents(query, top_k=2):
    """
    Retrieve top_k documents for the given query using FAISS and embedding.
    """
    query_embedding = embedding_model.get_text_embedding(query)
    scores, indices = faiss_index.search(torch.tensor(query_embedding).unsqueeze(0).numpy(), top_k)

    # Retrieve metadata for the top results
    results = []
    for i, score in zip(indices[0], scores[0]):
        if i != -1:
            results.append({
                "score": score,
                "content": doc_metadata[i]["text"],
                "metadata": doc_metadata[i]
            })
    return results

def query_with_llm(query, retrieved_docs):
    """
    Use LLM to answer the query based on retrieved documents.
    """
    llm = OpenAI(model=LLM_MODEL, temperature=0)
    context = "\n".join([doc["content"] for doc in retrieved_docs])
    prompt = f"""
    You are a knowledgeable assistant. Use the context below to answer the question concisely in as few words as possible.
    Context:
    {context}

    Question: {query}
    Answer (in minimal words):"""
    return llm.complete(prompt)

###############################################
# Example Usage
###############################################
# if __name__ == "__main__":
#     user_query = "What metabolites are associated with breast cancer?"

#     # Retrieve documents
#     top_docs = retrieve_documents(user_query, top_k=5)
#     print("Retrieved Documents:")
#     for doc in top_docs:
#         print(doc["content"][:200], "...")

#     # Generate answer with LLM
#     final_answer = query_with_llm(user_query, top_docs)
#     print("\nFinal Answer:")
#     print(final_answer)


In [None]:
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
from rouge_score import rouge_scorer

###############################################
# Step 10: Evaluation with ROUGE and BLEU
###############################################
# Sample ground truth data (Replace this with your actual ground truth dataset)
queries = [
    "Which biomarker is significantly elevated in the plasma of Gaucher Disease Type 1 patients?",
    "What therapy reduces urinary GlcSph levels in Gaucher Disease patients?",
    "Which biomarker is considered the gold standard for monitoring Gaucher Disease Type 1?",
    "What technique is used to quantify lyso-Gb1 and its analogs in plasma?",
    "Which urinary biomarker is highlighted for monitoring Gaucher Disease progression?",
    "What urinary biomarker class is elevated in untreated Gaucher Disease patients?",
    "What type of molecule is lyso-Gb1, which is associated with Gaucher Disease Type 1?",
    "Which class of biomarkers does lyso-Gb1 belong to in the context of Gaucher Disease Type 1?",
    "Which amino acids are elevated in patients with NASH compared to NAFLD?",
    "What metabolic pathway is closely associated with NASH progression?",
    "What urinary biomarker distinguishes between NAFLD and NASH?",
    "What metabolic pathway is altered in the progression from NAFLD to NASH?",
    "Which sulfated steroid increases with the progression of fibrosis in NAFLD?",
    "Which metabolite ratio is associated with fibrosis severity in NAFLD?",
    "Which biomarker is commonly used for early detection of Type 2 Diabetes (T2D)?",
    "What metabolic pathway is associated with 3-hydroxybutyrate in T2D?",
    "Which amino acids are identified as predictors of future diabetes in metabolomic studies?",
    "What diagnostic method is used for metabolic profiling in diabetes research?",
    "Which metabolite is associated with the progression of diabetic kidney disease?",
    "What technology is used for identifying lipid metabolism-related biomarkers in diabetes?"
]

ground_truths = [
    "Glucosylsphingosine (GlcSph).",
    "Enzyme Replacement Therapy (ERT).",
    "Lyso-Gb1.",
    "UPLC-MS/MS.",
    "Lyso-Gb1 analogs.",
    "Polycyclic Lyso-Gb1 analogs.",
    "A glucosylsphingosine derivative.",
    "Lipid biomarkers.",
    "Glutamate and phenylalanine.",
    "Amino acid metabolism.",
    "Pyroglutamic acid.",
    "Pentose phosphate pathway.",
    "16-OH-DHEA-S.",
    "16-OH-DHEA-S/DHEA-S.",
    "HbA1c.",
    "Ketogenesis.",
    "Branched-chain amino acids (BCAAs) like isoleucine, leucine, and valine.",
    "Nuclear Magnetic Resonance (NMR) spectroscopy.",
    "Phenylalanine.",
    "Liquid Chromatography-Mass Spectrometry (LC-MS)."
]


# def evaluate_results_with_llm(queries, ground_truths):
#     """
#     Evaluate LLM-generated responses with ROUGE and BLEU metrics.

#     Parameters:
#     - queries: List of queries.
#     - ground_truths: List of ground truth answers.

#     Returns:
#     - results: List of evaluation metrics for each query.
#     """
#     # If you want to be absolutely sure they match in length, you can re-enable or remove this check:
#     # assert len(queries) == len(ground_truths), "Queries and ground truths must have the same length."

#     # Initialize scorers
#     rouge_scorer_instance = rouge_scorer.RougeScorer(
#         ['rouge1', 'rouge2', 'rougeL'],
#         use_stemmer=True
#     )
#     smoothing_function = SmoothingFunction().method4  # For BLEU smoothing

#     results = []
#     # Safely zip up to the shortest list; or just zip if you're sure they're the same length
#     for query, ground_truth in zip(queries, ground_truths):
#         # 1) Retrieve documents from your RAG pipeline
#         retrieved_docs = retrieve_documents(query, top_k=3)

#         # 2) Generate an LLM response based on the retrieved docs
#         response = query_with_llm(query, retrieved_docs)

#         # 3) Convert the response to a plain string
#         #    If response is already a string, no change needed. Otherwise convert.
#         #    For example, if your LLM returns a dictionary or an object, adapt accordingly.
#         response_text = str(response).strip()

#         # 4) Calculate ROUGE
#         rouge_scores = rouge_scorer_instance.score(ground_truth, response_text)

#         # 5) Calculate BLEU
#         bleu_score = sentence_bleu(
#             [ground_truth.split()],   # Reference
#             response_text.split(),    # Candidate
#             smoothing_function=smoothing_function
#         )

#         # 6) Store the results
#         results.append({
#             "query": query,
#             "ground_truth": ground_truth,
#             "response": response_text,
#             "rouge1": rouge_scores['rouge1'].fmeasure,
#             "rouge2": rouge_scores['rouge2'].fmeasure,
#             "rougeL": rouge_scores['rougeL'].fmeasure,
#             "bleu": bleu_score
#         })

#     return results

def evaluate_results_with_llm(queries, ground_truths, top_k=2):
    """
    Evaluate LLM-generated responses using ROUGE, BLEU, and include retrieved context.
    """
    # For ROUGE
    rouge_scorer_instance = rouge_scorer.RougeScorer(
        ['rouge1', 'rouge2', 'rougeL'],
        use_stemmer=True
    )
    # For BLEU smoothing
    smoothing_function = SmoothingFunction().method4

    results = []
    for query, ground_truth in zip(queries, ground_truths):
        # 1) Retrieve relevant docs
        retrieved_docs = retrieve_documents(query, top_k=top_k)

        # 2) Generate context from retrieved documents
        context_text = "\n".join([doc["content"] for doc in retrieved_docs])

        # 3) Generate LLM response
        response = query_with_llm(query, retrieved_docs)
        response_text = str(response).strip()

        # 4) Compute ROUGE
        rouge_scores = rouge_scorer_instance.score(ground_truth, response_text)

        # 5) Compute BLEU
        bleu_score = sentence_bleu(
            [ground_truth.split()],    # reference
            response_text.split(),     # candidate
            smoothing_function=smoothing_function,
            weights=(0.5, 0.5, 0, 0)
        )

        # 6) Store the results
        results.append({
            "query": query,
            "ground_truth": ground_truth,
            "response": response_text[:],  # Truncate for readability
            "retrieved_context": context_text,  # <-- Add retrieved context
            "rouge1": rouge_scores['rouge1'].fmeasure,
            "rouge2": rouge_scores['rouge2'].fmeasure,
            "rougeL": rouge_scores['rougeL'].fmeasure,
            "bleu": bleu_score
        })

    return results


###############################################
# Run Evaluation
###############################################
evaluation_results = evaluate_results_with_llm(queries, ground_truths)

import csv

# # Define the CSV file path
# output_csv_path = "evaluation_metrics.csv"

# # Collect evaluation results into a CSV
# with open(output_csv_path, mode='w', newline='', encoding='utf-8') as csvfile:
#     writer = csv.writer(csvfile)
#     # Write header
#     writer.writerow(["Query", "Ground Truth", "Response", "ROUGE-1", "ROUGE-2", "ROUGE-L", "BLEU"])

#     # Write each evaluation result row
#     for result in evaluation_results:
#         writer.writerow([
#             result["query"],
#             result["ground_truth"],
#             result["response"],  # You may want to truncate this if responses are too long
#             f"{result['rouge1']:.4f}",
#             f"{result['rouge2']:.4f}",
#             f"{result['rougeL']:.4f}",
#             f"{result['bleu']:.4f}"
#         ])

# print(f"Evaluation metrics saved to {output_csv_path}")

# Define the CSV file path
output_csv_path = "evaluation_metrics_with_context.csv"

# Collect evaluation results into a CSV
with open(output_csv_path, mode='w', newline='', encoding='utf-8') as csvfile:
    writer = csv.writer(csvfile)
    # Write header
    writer.writerow(["Query", "Ground Truth", "Response", "Retrieved Context", "ROUGE-1", "ROUGE-2", "ROUGE-L", "BLEU"])

    # Write each evaluation result row
    for result in evaluation_results:
        writer.writerow([
            result["query"],
            result["ground_truth"],
            result["response"],  # You may want to truncate this if responses are too long
            result["retrieved_context"],  # <-- Add retrieved context
            f"{result['rouge1']:.4f}",
            f"{result['rouge2']:.4f}",
            f"{result['rougeL']:.4f}",
            f"{result['bleu']:.4f}"
        ])

print(f"Evaluation metrics saved to {output_csv_path}")


# # Print evaluation results
# for result in evaluation_results:
#     print("Query:", result["query"])
#     print("Ground Truth:", result["ground_truth"])
#     # Truncate response for readability
#     print("Response:", result["response"][:200], "...")
#     print(f"ROUGE-1: {result['rouge1']:.4f}, "
#           f"ROUGE-2: {result['rouge2']:.4f}, "
#           f"ROUGE-L: {result['rougeL']:.4f}, "
#           f"BLEU: {result['bleu']:.4f}")
#     print("------")

In [None]:
import csv

output_csv_path = "quantitative_evaluation_metrics_with_context_4openai.csv"

# Collect evaluation results into a CSV
with open(output_csv_path, mode='w', newline='', encoding='utf-8') as csvfile:
    writer = csv.writer(csvfile, quoting=csv.QUOTE_MINIMAL, escapechar='\\')
    # Write header
    writer.writerow(["Query", "Ground Truth", "Response", "Retrieved Context", "ROUGE-1", "ROUGE-2", "ROUGE-L", "BLEU"])

    # Write each evaluation result row
    for result in evaluation_results:
        writer.writerow([
            result["query"],
            result["ground_truth"],
            result["response"],  # You may want to truncate this if responses are too long
            result.get("retrieved_context", ""),  # Add retrieved context if available; use empty string if not
            f"{result['rouge1']:.4f}",
            f"{result['rouge2']:.4f}",
            f"{result['rougeL']:.4f}",
            f"{result['bleu']:.4f}"
        ])

print(f"Evaluation metrics saved to {output_csv_path}")


In [None]:
!pip install datasets
!pip install ragas

In [None]:
from datasets import Dataset
from ragas import evaluate
from ragas.metrics import faithfulness, answer_correctness, context_recall, context_precision, answer_relevancy
import pandas as pd

# Step 1: Convert `evaluation_results` to `ragas` format
data_samples = {
    'question': [result["query"] for result in evaluation_results],
    'answer': [result["response"] for result in evaluation_results],  # Generated LLM responses
    'contexts': [[result["retrieved_context"]] for result in evaluation_results],  # List of retrieved contexts
    'ground_truth': [result["ground_truth"] for result in evaluation_results]  # Ground truth answers
}

# Step 2: Convert to `datasets.Dataset` object
dataset = Dataset.from_dict(data_samples)

# Step 3: Evaluate using RAGAS metrics
score = evaluate(dataset, metrics=[faithfulness, answer_correctness, context_recall, context_precision,answer_relevancy])

# Step 4: Convert the scores to a Pandas DataFrame and save as CSV
df = score.to_pandas()
output_score_csv_path = "qualitative_ragas_evaluation_scores_4openai.csv"
df.to_csv(output_score_csv_path, index=False)

print(f"Evaluation scores saved to {output_score_csv_path}")


In [None]:
from ragas import SingleTurnSample
from ragas.metrics import LLMContextPrecisionWithReference

context_precision = LLMContextPrecisionWithReference()

sample = SingleTurnSample(
    user_input="Where is the Eiffel Tower located?",
    reference="The Eiffel Tower is located in Paris.",
    retrieved_contexts=["The Eiffel Tower is located in Paris."],
)

await context_precision.single_turn_ascore(sample)