In [None]:
# Install necessary libraries (if required)
!pip install llama-index-vector-stores-faiss
!pip install llama_index llama-index-llms-openai
!pip install PyPDF2 faiss-cpu -U langchain-community rouge-score python-dotenv


In [None]:
# Import necessary libraries
import os
from llama_index.llms.openai import OpenAI
from dotenv import load_dotenv
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
from rouge_score import rouge_scorer
import csv

In [None]:
###############################################
# Step 1: Configuration
###############################################
# Load environment variables
load_dotenv()

# Set OpenAI API Key
openai_api_key = # Place yuur API key here 
os.environ["OPENAI_API_KEY"] = openai_api_key

# OpenAI model configuration
LLM_MODEL = "gpt-4"  # Default model
TEMPERATURE = 0  # Control randomness

###############################################
# Step 2: Query Function
###############################################
def test_llama_index_openai_llm(prompt, model=LLM_MODEL, temperature=TEMPERATURE):
    """
    Query OpenAI LLM for direct question answering with concise responses.

    Args:
        prompt (str): Input prompt to query.
        model (str): OpenAI model to use.
        temperature (float): Temperature parameter for response generation.

    Returns:
        str: Model's response.
    """
    try:
        # Modify the prompt to ensure concise, one-word responses
        concise_prompt = f"{prompt}\nAnswer in one word or less:"

        llm = OpenAI(model=model, temperature=temperature)
        response = llm.complete(concise_prompt)
        return response
    except Exception as e:
        return f"Error: {e}"


In [None]:

queries = [
    "Which biomarker is significantly elevated in the plasma of Gaucher Disease Type 1 patients?",
    "What therapy reduces urinary GlcSph levels in Gaucher Disease patients?",
    "Which biomarker is considered the gold standard for monitoring Gaucher Disease Type 1?",
    "What technique is used to quantify lyso-Gb1 and its analogs in plasma?",
    "Which urinary biomarker is highlighted for monitoring Gaucher Disease progression?",
    "What urinary biomarker class is elevated in untreated Gaucher Disease patients?",
    "What type of molecule is lyso-Gb1, which is associated with Gaucher Disease Type 1?",
    "Which class of biomarkers does lyso-Gb1 belong to in the context of Gaucher Disease Type 1?",
    "Which amino acids are elevated in patients with NASH compared to NAFLD?",
    "What metabolic pathway is closely associated with NASH progression?",
    "What urinary biomarker distinguishes between NAFLD and NASH?",
    "What metabolic pathway is altered in the progression from NAFLD to NASH?",
    "Which sulfated steroid increases with the progression of fibrosis in NAFLD?",
    "Which metabolite ratio is associated with fibrosis severity in NAFLD?",
    "Which biomarker is commonly used for early detection of Type 2 Diabetes (T2D)?",
    "What metabolic pathway is associated with 3-hydroxybutyrate in T2D?",
    "Which amino acids are identified as predictors of future diabetes in metabolomic studies?",
    "What diagnostic method is used for metabolic profiling in diabetes research?",
    "Which metabolite is associated with the progression of diabetic kidney disease?",
    "What technology is used for identifying lipid metabolism-related biomarkers in diabetes?"
]

ground_truths = [
    "Glucosylsphingosine (GlcSph).",
    "Enzyme Replacement Therapy (ERT).",
    "Lyso-Gb1.",
    "UPLC-MS/MS.",
    "Lyso-Gb1 analogs.",
    "Polycyclic Lyso-Gb1 analogs.",
    "A glucosylsphingosine derivative.",
    "Lipid biomarkers.",
    "Glutamate and phenylalanine.",
    "Amino acid metabolism.",
    "Pyroglutamic acid.",
    "Pentose phosphate pathway.",
    "16-OH-DHEA-S.",
    "16-OH-DHEA-S/DHEA-S.",
    "HbA1c.",
    "Ketogenesis.",
    "Branched-chain amino acids (BCAAs) like isoleucine, leucine, and valine.",
    "Nuclear Magnetic Resonance (NMR) spectroscopy.",
    "Phenylalanine.",
    "Liquid Chromatography-Mass Spectrometry (LC-MS)."
]

In [None]:
###############################################
# Step 4: Evaluation Function
###############################################
def evaluate_responses(queries, ground_truths):
    """
    Evaluate OpenAI LLM responses against ground truths using ROUGE and BLEU.

    Args:
        queries (list): List of query strings.
        ground_truths (list): Corresponding ground truth answers.

    Returns:
        list: Evaluation metrics for each query-response pair.
    """
    rouge_scorer_instance = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
    smoothing_function = SmoothingFunction().method4

    results = []

    for query, ground_truth in zip(queries, ground_truths):
        response = test_llama_index_openai_llm(query)

        # Extract the response text
        if hasattr(response, "text"):
            response_text = response.text.strip()
        else:
            response_text = str(response).strip()

        # Calculate ROUGE scores
        rouge_scores = rouge_scorer_instance.score(ground_truth, response_text)

        # Calculate BLEU score
        bleu_score = sentence_bleu(
            [ground_truth.split()],
            response_text.split(),
            smoothing_function=smoothing_function
        )

        # Append results
        results.append({
            "query": query,
            "ground_truth": ground_truth,
            "response": response_text,
            "rouge1": rouge_scores['rouge1'].fmeasure,
            "rouge2": rouge_scores['rouge2'].fmeasure,
            "rougeL": rouge_scores['rougeL'].fmeasure,
            "bleu": bleu_score
        })

    return results

###############################################
# Step 5: Save Results to CSV
###############################################
def save_results_to_csv(results, output_file="evaluation_metrics.csv"):
    """
    Save evaluation metrics to a CSV file.

    Args:
        results (list): List of evaluation results.
        output_file (str): Filepath for the output CSV.
    """
    with open(output_file, mode="w", newline="", encoding="utf-8") as file:
        writer = csv.writer(file)
        writer.writerow(["Query", "Ground Truth", "Response", "ROUGE-1", "ROUGE-2", "ROUGE-L", "BLEU"])

        for result in results:
            writer.writerow([
                result["query"],
                result["ground_truth"],
                result["response"],
                f"{result['rouge1']:.4f}",
                f"{result['rouge2']:.4f}",
                f"{result['rougeL']:.4f}",
                f"{result['bleu']:.4f}"
            ])

    print(f"Results saved to {output_file}")

###############################################
# Step 6: Run Evaluation
###############################################
if __name__ == "__main__":
    print("Starting evaluation of LLM responses...")
    evaluation_results = evaluate_responses(queries, ground_truths)

    # Save evaluation metrics to a CSV file
    save_results_to_csv(evaluation_results, "query_based_evaluation_metrics.csv")

    print("Evaluation complete.")

In [None]:
!pip install ragas datasets

In [None]:
from datasets import Dataset
from ragas import evaluate
from ragas.metrics import faithfulness, answer_correctness, context_recall, context_precision, answer_relevancy
import pandas as pd

# Step 1: Convert `evaluation_results` to RAGAS-compatible format
data_samples = {
    'question': [result["query"] for result in evaluation_results],
    'answer': [result["response"] for result in evaluation_results],  # Generated LLM responses
    'contexts': [[] for _ in evaluation_results],  # Empty contexts as no retrieval is performed
    'ground_truth': [result["ground_truth"] for result in evaluation_results]  # Ground truth answers
}

# Step 2: Convert to `datasets.Dataset` object
dataset = Dataset.from_dict(data_samples)

# Step 3: Evaluate using RAGAS metrics
score = evaluate(dataset, metrics=[
    faithfulness,
    answer_correctness,
    context_recall,
    context_precision,
    answer_relevancy
])

# Step 4: Convert the scores to a Pandas DataFrame and save as CSV
df = score.to_pandas()
output_score_csv_path = "ragas_evaluation_metrics_no_context.csv"
df.to_csv(output_score_csv_path, index=False)

print(f"RAGAS evaluation scores saved to {output_score_csv_path}")
