In [2]:
from rag import RAG
from datasets import Dataset
from ragas.metrics import context_recall, answer_correctness, answer_relevancy
from ragas import evaluate

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
## Evaluation Metrics: Retrieval Accuracy & Answer Relevance

test_data = [
    {
        "question": "What are the core courses in the MS in Applied Data Science program?",
        "answer": "The core courses in the MS in Applied Data Science program include Leadership and Consulting for Data Science, Data Science Capstone Project, Time Series Analysis and Forecasting, Statistical Models for Data Science, Machine Learning I, Machine Learning II, and either Data Engineering Platforms for Analytics or Big Data and Cloud Computing."
    },
    {
        "question": "What are the admission requirements for the MS in Applied Data Science program?",
        "answer": "Applicants are required to submit a completed application form, have a bachelor’s degree from an accredited institution, provide transcripts from all post-secondary education, submit two letters of recommendation (preferably from a supervisor or academic instructor), a current resume or CV, a personal statement, and proof of English language proficiency if applicable. Submission of GRE or GMAT scores is optional."
    },
    {
        "question": "Can you provide information about the capstone project?",
        "answer": "The capstone project is a required component of the program and is completed over two quarters. Students design and implement a research project, working on real-world data science problems in collaboration with industry partners. Full-time students typically begin the capstone in their third quarter, while part-time students generally start in their fifth quarter."
    },
    {
        "question": "What career outcomes are available for graduates?",
        "answer": "Graduates of the MS in Applied Data Science program go on to work in a variety of roles including data scientist, machine learning engineer, data analyst, business intelligence analyst, and consultant. They are employed across industries such as technology, finance, healthcare, and consulting. The program provides tailored career services and access to a strong alumni network to support students’ career goals."
    },
    {
        "question": "Is the program available online?",
        "answer": "Yes, the MS in Applied Data Science program is available in both in-person and online formats. The online program is overseen by the same faculty as the in-person program, ensuring that students receive the same curriculum and academic standards."
    },
    {
        "question": "How long does it take to complete the program?",
        "answer": "Full-time students can typically complete the MS in Applied Data Science program in 12 to 18 months, while part-time students usually finish in 18 to 24 months depending on their course load and schedule."
    }
]

test_dataset = Dataset.from_list(test_data)

In [4]:
rag = RAG()
def rag_pipeline(question):
    docs = rag.qa_chain.retriever.get_relevant_documents(question)
    contexts = [doc.page_content for doc in docs]
    context_str = "\n".join(contexts)
    answer = rag.model.invoke(f"{context_str}\n\nQuestion: {question}\nAnswer:").content
    
    return {
        "question": question,
        "answer": answer,
        "retrieved_contexts": contexts
    }

Loading existing vector store...


  self.vector_store = Chroma(persist_directory=persist_directory, embedding_function=self.embeddings)


In [5]:
eval_data = []
for item in test_data:
    question = item["question"]
    reference_answer = item["answer"]
    rag_output = rag_pipeline(question)
    
    eval_data.append({
        "question": question,
        "answer": rag_output["answer"],
        "retrieved_contexts": rag_output["retrieved_contexts"],
        "reference": reference_answer                  
    })

test_dataset = Dataset.from_list(eval_data)

  docs = rag.qa_chain.retriever.get_relevant_documents(question)


In [6]:
eval_results = evaluate(
    dataset=test_dataset,
    metrics=[context_recall, answer_correctness, answer_relevancy],
)
print(eval_results)

Evaluating: 100%|███████████████████████████████| 18/18 [00:24<00:00,  1.35s/it]

{'context_recall': 0.7500, 'answer_correctness': 0.6535, 'answer_relevancy': 0.7641}



