# Evaluating the Model with RAGAs

In [None]:
import json

# Load JSON data
with open("dataset.json", "r") as f:
    data = json.load(f)

# Ensure the data is a list of dictionaries
if isinstance(data, dict):  # In case the JSON data isn't a list
    data = [data]

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

model = AutoModelForCausalLM.from_pretrained("./saved_model")
tokenizer = AutoTokenizer.from_pretrained("./saved_model")

# Set pad_token explicitly if it doesn't exist or overlaps with eos_token
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

# Check if GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Move model to GPU if available
model = model.to(device)

Test interaction with model

In [None]:
# Function to interact with the model
def interact_with_model(prompt):
    # Tokenize the input prompt with padding and attention_mask
    inputs = tokenizer(
        prompt,
        return_tensors="pt",
        padding=True,
        truncation=True,
        max_length=512,
    )
    # Ensure the attention_mask is passed to the model
    outputs = model.generate(
        input_ids=inputs["input_ids"],
        attention_mask=inputs["attention_mask"],
        max_length=50,
        num_return_sequences=1,
        pad_token_id=tokenizer.pad_token_id,
    )
    # Decode the generated output sequence
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response
# Test the model with simple prompts
test_prompts = [
    "Hello, how are you?",
    "What is the weather today?",
    "Can you tell me a joke?",
    "What's your favorite color?"
]

# Display responses
for prompt in test_prompts:
    response = interact_with_model(prompt)
    print(f"Prompt: {prompt}")
    print(f"Response: {response}")
    print("-" * 30)

Embeddings

In [None]:
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Weaviate
import weaviate
from weaviate.embedded import EmbeddedOptions
from dotenv import load_dotenv,find_dotenv

# Load OpenAI API key from .env file
#load_dotenv(find_dotenv())
chunks=[]
for item in data:
    chunks.append(item['context'])
# Setup vector database
client = weaviate.Client(
  embedded_options = EmbeddedOptions()
)

# Populate vector database
vectorstore = Weaviate.from_documents(
    client = client,    
    documents = chunks,
    embedding = OpenAIEmbeddings(),
    by_text = False
)

# Define vectorstore as retriever to enable semantic search
retriever = vectorstore.as_retriever()

RAGAs

In [None]:
from datasets import Dataset
from langchain.prompts import ChatPromptTemplate
from langchain.schema.runnable import RunnablePassthrough
from langchain.schema.output_parser import StrOutputParser

rag_chain = (
    {"context": retriever,  "question": RunnablePassthrough()} 
    | prompt 
    | model
    | StrOutputParser() 
)

Define the RAGAS Evaluation Pipeline: RAGAS evaluates the correctness of the generated responses based on context, question, and ground truth answers. Hereâ€™s how to build the evaluation:

In [None]:
from ragas import evaluate_results
from ragas.metrics import precision, recall, f1_score, contextual_accuracy

def qwen_generate(prompt, max_length=512, temperature=0, top_p=0.9):
    """
    Generates output using Qwen-2.5-3B-Instruct model.
    """
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    outputs = model.generate(
        **inputs,
        max_length=max_length,
        temperature=temperature,
        top_p=top_p
    )

    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response
# Prepare the dataset for RAGAS
dataset = [
    {
        "context": entry["context"],
        "question": entry["question"],
        "ground_truth": entry["answer"],
    }
    for entry in data
]

# Define the retriever (Qwen model)
def retriever(context, question):
    """
    Takes the context and question, and passes it to Qwen model.
    Returns the generated answer.
    """
    # Construct the prompt
    prompt = f"Context: {context}\nQuestion: {question}\nAnswer the Question based on context:"
    return qwen_generate(prompt)

# Generate predictions using your model
for entry in dataset:
    entry["predicted_answer"] = retriever(entry["context"], entry["question"])

# Evaluate the results using RAGAS metrics
metrics = [precision, recall, f1_score, contextual_accuracy]
evaluation_results = evaluate_results(dataset, metrics)

# Print the evaluation
print(evaluation_results)