# Implementing Evaluation

### prepare data

In [32]:
import pandas as pd
import ast  # Import ast for safe evaluation

def csv_to_dict(file_path):
    df = pd.read_csv(file_path)
    df = df[:10]
    
    data_samples = {
        'question': df['question'].tolist(),
        'answer': df['answer'].tolist(),
        'contexts': df['contexts'].apply(lambda x: x.split('\n\n') if isinstance(x, str) else []).tolist(),
        'ground_truth': df['ground_truth'].tolist()
    }
    
    return data_samples

# Example usage
rag_questions_responses_file = "rag_questions_responses.csv"
data_samples = csv_to_dict(rag_questions_responses_file)
print(data_samples)

{'question': ['What are the daily responsibilities of a Public Health Administrator?', 'What are the daily responsibilities of a Certified Nursing Assistant (CNA)?', 'What does a Makeup Artist do?', 'What does a Inventory Coordinator do?', 'What does a Clinical Data Manager do?', 'What are the daily responsibilities of a Health Informatics Specialist?', 'What are the daily responsibilities of a Nurse Educator?', 'What does a Inventory Manager do?', 'What does a Loss Prevention Specialist do?', 'What are the daily responsibilities of a Supply Chain Manager?'], 'answer': ["Alright, let's dive into what a Public Health Administrator does on a daily basis. Based on the job profiles I have, a Public Health Administrator's responsibilities align closely with those of a Health Services Manager or a Medical and Health Services Manager. Here’s a breakdown:\n\n**Job Role Overview**\nA Public Health Administrator, similar to a Health Services Manager, is responsible for planning, directing, and c

In [67]:
data_samples.keys()

dict_keys(['question', 'answer', 'contexts', 'ground_truth'])

### set api keys

In [77]:
import os


# Set environment variable
# os.environ["OPENAI_API_KEY"] = ""




### Import requirments 

In [45]:
from datasets import Dataset 
import os
from ragas import evaluate
from ragas.metrics import faithfulness, answer_correctness

dataset = Dataset.from_dict(data_samples)

# score = evaluate(dataset, metrics=[faithfulness, answer_correctness])
# df = score.to_pandas()
# df.to_csv('score.csv', index=False)

In [68]:
dataset

Dataset({
    features: ['question', 'answer', 'contexts', 'ground_truth'],
    num_rows: 10
})

In [69]:
from ragas.metrics import (
    context_precision,
    answer_relevancy,
    faithfulness,
    context_recall,
answer_correctness
)

# list of metrics 

metrics = [
    faithfulness,
    answer_correctness,
    answer_relevancy,
    context_recall,
    context_precision,
]

In [70]:
from langchain_openai.chat_models import AzureChatOpenAI
from langchain_openai.embeddings import AzureOpenAIEmbeddings
from ragas import evaluate
import os

# Azure OpenAI API Configurations
azure_endpoint = ""  
api_version = ""  

# Same deployment name for both LLM and Embeddings
deployment_name = ""  



# Initialize AzureChatOpenAI (LLM)
azure_model = AzureChatOpenAI(
    openai_api_type="azure",
    openai_api_version=api_version,
    azure_endpoint=azure_endpoint,
    azure_deployment=deployment_name,
)



In [71]:

from langchain_huggingface import HuggingFaceEmbeddings

# Define embedding model correctly
EMBEDDING_MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
embedding_model = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL_NAME)

In [72]:

# result = evaluate(
#     dataset, metrics=metrics, llm=azure_model, embeddings=embedding_model
# )

# result


Evaluating:   0%|          | 0/50 [00:00<?, ?it/s]

{'faithfulness': 0.6489, 'answer_correctness': 0.3033, 'answer_relevancy': 0.4196, 'context_recall': 0.2200, 'context_precision': 0.1833}

In [73]:
result_df = result.to_pandas()
result_df

Unnamed: 0,user_input,retrieved_contexts,response,reference,faithfulness,answer_correctness,answer_relevancy,context_recall,context_precision
0,What are the daily responsibilities of a Publi...,[Role: Health Services Manager\nDescription: A...,"Alright, let's dive into what a Public Health ...",A typical day for a Public Health Administrato...,0.958333,0.301241,1.0,0.8,1.0
1,What are the daily responsibilities of a Certi...,[Role: Clinical Nurse Specialist (CNS)\nDescri...,"I am sorry, but I cannot provide information a...",A CNA starts the day by checking on patients a...,0.75,0.176463,0.0,0.0,0.0
2,What does a Makeup Artist do?,[Role: Display Designer\nDescription: A Displa...,"I am sorry, but I cannot provide information a...",A Makeup Artist applies makeup to clients to e...,1.0,0.191464,0.0,0.0,0.0
3,What does a Inventory Coordinator do?,[Role: Merchandise Planner\nDescription: A Mer...,"I am sorry, but I do not have any information ...",An Inventory Coordinator in the e-commerce ret...,0.333333,0.166912,0.0,0.0,0.0
4,What does a Clinical Data Manager do?,[Role: Health Data Analyst\nDescription: A Hea...,A Clinical Trials Manager oversees clinical tr...,A Clinical Data Manager collects and manages d...,1.0,0.143052,0.613728,0.0,0.0
5,What are the daily responsibilities of a Healt...,[Role: Health Information Technician\nDescript...,A Health Informatics Specialist plays a crucia...,A typical day for a Health Informatics Special...,0.909091,0.650018,1.0,0.8,0.0
6,What are the daily responsibilities of a Nurse...,[Role: Registered Nurse (RN)\nDescription: A R...,"Based on the job profiles I have access to, he...",A Nurse Educator starts the day by preparing l...,0.0,0.62861,0.0,0.0,0.0
7,What does a Inventory Manager do?,[Role: Operations Manager\nDescription: An Ope...,An Inventory Manager ensures that a retail sto...,An Inventory Manager in retail store operation...,0.0,0.47116,0.82097,0.6,0.833333
8,What does a Loss Prevention Specialist do?,[Role: Regulatory Affairs Specialist\nDescript...,"I am sorry, but I cannot provide information a...",A Loss Prevention Specialist works in retail s...,1.0,0.139305,0.0,0.0,0.0
9,What are the daily responsibilities of a Suppl...,[Role: Retail Operations Manager\nDescription:...,"Based on the provided job profiles, here's wha...",A typical day for a Supply Chain Manager start...,0.538462,0.164515,0.761792,0.0,0.0


In [76]:
result_file = "result_file.csv"
result_df.to_csv(result_file, index=False)


In [65]:
df = result_1.to_pandas()
df.head()

Unnamed: 0,user_input,retrieved_contexts,response,reference,faithfulness,answer_relevancy,context_recall,context_precision
0,What are the daily responsibilities of a Publi...,[Role: Health Services Manager\nDescription: A...,"Alright, let's dive into what a Public Health ...",A typical day for a Public Health Administrato...,0.958333,1.0,0.8,1.0
1,What are the daily responsibilities of a Certi...,[Role: Clinical Nurse Specialist (CNS)\nDescri...,"I am sorry, but I cannot provide information a...",A CNA starts the day by checking on patients a...,0.75,0.0,0.0,0.0
2,What does a Makeup Artist do?,[Role: Display Designer\nDescription: A Displa...,"I am sorry, but I cannot provide information a...",A Makeup Artist applies makeup to clients to e...,1.0,0.0,0.0,0.0
3,What does a Inventory Coordinator do?,[Role: Merchandise Planner\nDescription: A Mer...,"I am sorry, but I do not have any information ...",An Inventory Coordinator in the e-commerce ret...,0.333333,0.0,0.0,0.0
4,What does a Clinical Data Manager do?,[Role: Health Data Analyst\nDescription: A Hea...,A Clinical Trials Manager oversees clinical tr...,A Clinical Data Manager collects and manages d...,1.0,0.613728,0.0,0.5


In [66]:
df

Unnamed: 0,user_input,retrieved_contexts,response,reference,faithfulness,answer_relevancy,context_recall,context_precision
0,What are the daily responsibilities of a Publi...,[Role: Health Services Manager\nDescription: A...,"Alright, let's dive into what a Public Health ...",A typical day for a Public Health Administrato...,0.958333,1.0,0.8,1.0
1,What are the daily responsibilities of a Certi...,[Role: Clinical Nurse Specialist (CNS)\nDescri...,"I am sorry, but I cannot provide information a...",A CNA starts the day by checking on patients a...,0.75,0.0,0.0,0.0
2,What does a Makeup Artist do?,[Role: Display Designer\nDescription: A Displa...,"I am sorry, but I cannot provide information a...",A Makeup Artist applies makeup to clients to e...,1.0,0.0,0.0,0.0
3,What does a Inventory Coordinator do?,[Role: Merchandise Planner\nDescription: A Mer...,"I am sorry, but I do not have any information ...",An Inventory Coordinator in the e-commerce ret...,0.333333,0.0,0.0,0.0
4,What does a Clinical Data Manager do?,[Role: Health Data Analyst\nDescription: A Hea...,A Clinical Trials Manager oversees clinical tr...,A Clinical Data Manager collects and manages d...,1.0,0.613728,0.0,0.5
5,What are the daily responsibilities of a Healt...,[Role: Health Information Technician\nDescript...,A Health Informatics Specialist plays a crucia...,A typical day for a Health Informatics Special...,0.909091,1.0,0.2,0.0
6,What are the daily responsibilities of a Nurse...,[Role: Registered Nurse (RN)\nDescription: A R...,"Based on the job profiles I have access to, he...",A Nurse Educator starts the day by preparing l...,0.0,0.0,0.0,0.0
7,What does a Inventory Manager do?,[Role: Operations Manager\nDescription: An Ope...,An Inventory Manager ensures that a retail sto...,An Inventory Manager in retail store operation...,0.0,0.82097,0.6,0.833333
8,What does a Loss Prevention Specialist do?,[Role: Regulatory Affairs Specialist\nDescript...,"I am sorry, but I cannot provide information a...",A Loss Prevention Specialist works in retail s...,1.0,0.0,0.0,0.0
9,What are the daily responsibilities of a Suppl...,[Role: Retail Operations Manager\nDescription:...,"Based on the provided job profiles, here's wha...",A typical day for a Supply Chain Manager start...,0.538462,0.761792,0.0,0.0


In [78]:
EMBEDDING_MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
HF_TOKEN = ""

In [40]:
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
from langchain import HuggingFacePipeline
from langchain_huggingface import HuggingFaceEmbeddings
from ragas.metrics import (
    answer_relevancy,
    faithfulness,
    context_recall,
    context_precision,
)
from ragas import evaluate

# Set your Hugging Face token
HF_TOKEN = "your_huggingface_token_here"  # Replace with your actual token

# Define embedding model correctly
EMBEDDING_MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
embedding_model = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL_NAME)

# Use a valid Hugging Face model
model_id = "mistralai/Mistral-7B-Instruct"

# Load tokenizer and model with authentication
tokenizer = AutoTokenizer.from_pretrained(model_id, token=HF_TOKEN)
model = AutoModelForCausalLM.from_pretrained(model_id, token=HF_TOKEN)

pipe = pipeline(
    model=model,
    tokenizer=tokenizer,
    return_full_text=True,  
    task="text-generation",
    temperature=0.1, 
    repetition_penalty=1.1  
)

evaluator = HuggingFacePipeline(pipeline=pipe)

# Ragas evaluation
result = evaluate(
    dataset=dataset,
    llm=evaluator,
    embeddings=embedding_model,
    metrics=[
        context_precision,
        faithfulness,
        answer_relevancy,
        context_recall,
    ],
)

print(result)


OSError: mistralai/Mistral-7B-Instruct is not a local folder and is not a valid model identifier listed on 'https://huggingface.co/models'
If this is a private repository, make sure to pass a token having permission to this repo either by logging in with `huggingface-cli login` or by passing `token=<your_token>`

In [4]:
import requests
import pandas as pd


RAG_API_URL = "http://127.0.0.1:8000/query/"

# call the RAG API
def get_rag_response(question):
    response = requests.get(RAG_API_URL, params={"query": question})
    if response.status_code == 200:
        data = response.json()
        return data.get("response", "no response"), data.get("retrieved_context", "no context")
    else:
        return "error getting response", []


# response, retrieved_context = get_rag_response("data")

In [10]:
questions_ground_truth_file = "random_sample_questions.csv"  
questions_ground_truth_df = pd.read_csv(questions_ground_truth_file)

In [11]:
questions_ground_truth_df.sample()

Unnamed: 0,questions,ground_truth
13,What does a Biomedical Engineer do?,A Biomedical Engineer designs and creates medi...


In [13]:
questions = questions_ground_truth_df["questions"]
questions

0     What are the daily responsibilities of a Publi...
1     What are the daily responsibilities of a Certi...
2                         What does a Makeup Artist do?
3                 What does a Inventory Coordinator do?
4                 What does a Clinical Data Manager do?
5     What are the daily responsibilities of a Healt...
6     What are the daily responsibilities of a Nurse...
7                     What does a Inventory Manager do?
8            What does a Loss Prevention Specialist do?
9     What are the daily responsibilities of a Suppl...
10      What does a Clinical Nurse Specialist (CNS) do?
11         What does a Visual Merchandising Manager do?
12    What are the daily responsibilities of a Opera...
13                  What does a Biomedical Engineer do?
14         What does a Visual Merchandising Planner do?
15    What are the daily responsibilities of a Stock...
16    What does a Healthcare Billing and Coding Spec...
17                 What does a Supply Chain Anal

In [14]:
responses = []
contexts = []
for question in questions:
    response, retrieved_context = get_rag_response(question)
    responses.append(response)
    contexts.append(retrieved_context)
    
    

In [15]:
questions_ground_truth_df["responses"]=responses
questions_ground_truth_df["contexts"]=contexts
questions_ground_truth_df

Unnamed: 0,questions,ground_truth,responses,contexts
0,What are the daily responsibilities of a Publi...,A typical day for a Public Health Administrato...,"Alright, let's dive into what a Public Health ...",Role: Health Services Manager\nDescription: A ...
1,What are the daily responsibilities of a Certi...,A CNA starts the day by checking on patients a...,"I am sorry, but I cannot provide information a...",Role: Clinical Nurse Specialist (CNS)\nDescrip...
2,What does a Makeup Artist do?,A Makeup Artist applies makeup to clients to e...,"I am sorry, but I cannot provide information a...",Role: Display Designer\nDescription: A Display...
3,What does a Inventory Coordinator do?,An Inventory Coordinator in the e-commerce ret...,"I am sorry, but I do not have any information ...",Role: Merchandise Planner\nDescription: A Merc...
4,What does a Clinical Data Manager do?,A Clinical Data Manager collects and manages d...,A Clinical Trials Manager oversees clinical tr...,Role: Health Data Analyst\nDescription: A Heal...
5,What are the daily responsibilities of a Healt...,A typical day for a Health Informatics Special...,A Health Informatics Specialist plays a crucia...,Role: Health Information Technician\nDescripti...
6,What are the daily responsibilities of a Nurse...,A Nurse Educator starts the day by preparing l...,"Based on the job profiles I have access to, he...",Role: Registered Nurse (RN)\nDescription: A Re...
7,What does a Inventory Manager do?,An Inventory Manager in retail store operation...,An Inventory Manager ensures that a retail sto...,Role: Operations Manager\nDescription: An Oper...
8,What does a Loss Prevention Specialist do?,A Loss Prevention Specialist works in retail s...,"I am sorry, but I cannot provide information a...",Role: Regulatory Affairs Specialist\nDescripti...
9,What are the daily responsibilities of a Suppl...,A typical day for a Supply Chain Manager start...,"Based on the provided job profiles, here's wha...",Role: Retail Operations Manager\nDescription: ...


In [16]:
rag_questions_responses_file = "rag_questions_responses.csv"
questions_ground_truth_df.to_csv(rag_questions_responses_file, index=False)

In [21]:
rag_questions_responses_file = "rag_questions_responses.csv"
rag_questions_responses_df = pd.read_csv(rag_questions_responses_file)

In [22]:
rag_questions_responses_df

Unnamed: 0,question,ground_truth,answer,contexts
0,What are the daily responsibilities of a Publi...,A typical day for a Public Health Administrato...,"Alright, let's dive into what a Public Health ...",Role: Health Services Manager\nDescription: A ...
1,What are the daily responsibilities of a Certi...,A CNA starts the day by checking on patients a...,"I am sorry, but I cannot provide information a...",Role: Clinical Nurse Specialist (CNS)\nDescrip...
2,What does a Makeup Artist do?,A Makeup Artist applies makeup to clients to e...,"I am sorry, but I cannot provide information a...",Role: Display Designer\nDescription: A Display...
3,What does a Inventory Coordinator do?,An Inventory Coordinator in the e-commerce ret...,"I am sorry, but I do not have any information ...",Role: Merchandise Planner\nDescription: A Merc...
4,What does a Clinical Data Manager do?,A Clinical Data Manager collects and manages d...,A Clinical Trials Manager oversees clinical tr...,Role: Health Data Analyst\nDescription: A Heal...
5,What are the daily responsibilities of a Healt...,A typical day for a Health Informatics Special...,A Health Informatics Specialist plays a crucia...,Role: Health Information Technician\nDescripti...
6,What are the daily responsibilities of a Nurse...,A Nurse Educator starts the day by preparing l...,"Based on the job profiles I have access to, he...",Role: Registered Nurse (RN)\nDescription: A Re...
7,What does a Inventory Manager do?,An Inventory Manager in retail store operation...,An Inventory Manager ensures that a retail sto...,Role: Operations Manager\nDescription: An Oper...
8,What does a Loss Prevention Specialist do?,A Loss Prevention Specialist works in retail s...,"I am sorry, but I cannot provide information a...",Role: Regulatory Affairs Specialist\nDescripti...
9,What are the daily responsibilities of a Suppl...,A typical day for a Supply Chain Manager start...,"Based on the provided job profiles, here's wha...",Role: Retail Operations Manager\nDescription: ...


In [25]:
rag_questions_responses_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20 entries, 0 to 19
Data columns (total 4 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   question      20 non-null     object
 1   ground_truth  20 non-null     object
 2   answer        20 non-null     object
 3   contexts      20 non-null     object
dtypes: object(4)
memory usage: 772.0+ bytes


In [23]:
rag_questions_responses_df["contexts"][0]

"Role: Health Services Manager\nDescription: A Health Services Manager plans, directs, and coordinates medical and health services. They might manage an entire facility, a specific department, or a medical practice for a group of doctors. They ensure that the facility runs smoothly and efficiently. They also make sure that the facility complies with laws and regulations. They work to improve the quality and efficiency of healthcare services.\nDay in Life: A typical day for a Health Services Manager starts with reviewing reports and emails. They attend meetings with doctors, nurses, and other staff to discuss operations and any issues. They might also meet with patients or their families to address concerns. Throughout the day, they monitor budgets, update policies, and ensure compliance with healthcare laws. They also work on improving services and may need to handle emergencies or unexpected problems.\nSkills: Logical Reasoning and Analytical Skills, Verbal Ability and Communication S

In [None]:
for questin in questins:
    print(q)


In [3]:
response

"Alright, let's dive into the world of data-related careers! Based on your query, here's a breakdown of some interesting options, focusing on roles in health and general data management:\n\n**1. Data Entry Clerk**\n\n*   **Job Role Overview:** Data Entry Clerks are the backbone of data management, inputting information into computer systems with accuracy.\n*   **Required Skills and Qualifications:** A 12th pass is generally sufficient. Basic computer skills and typing speed are essential.\n*   **Day-to-Day Responsibilities:** Entering data from various sources, verifying accuracy, updating records, and generating reports.\n*   **Employers Hiring:** Tata Consultancy Services (TCS), Infosys, Wipro, HCL Technologies, Tech Mahindra.\n\n**2. Public Health Data Collector**\n\n*   **Job Role Overview:** Public Health Data Collectors gather health information from communities through surveys and interviews, contributing to public health insights.\n*   **Required Skills and Qualifications:** A 

In [4]:
retrieved_context

"Role: Clinical Data Manager\nDescription: A Clinical Data Manager collects and manages data from clinical trials. They ensure the data is accurate and complete. They use software to organize and analyze the data. They also create reports based on the data. Their work helps in the development of new medicines and treatments.\nDay in Life: A typical day for a Clinical Data Manager starts with checking emails for updates on ongoing clinical trials. They spend time entering new data into the system and verifying its accuracy. They might have meetings with other team members to discuss the progress of the trials. They also generate reports and analyze data to find trends. At the end of the day, they ensure all data is securely stored.\nSkills: Logical Reasoning and Analytical Skills, Verbal Ability and Communication Skills, Numerical Aptitude, Creative Thinking and Innovation, Spatial Awareness, Interpersonal Skills, Technical Proficiency, Organizational Skills, Entrepreneurial Skills, Phy