In [2]:
import os
import getpass

# Get API keys

In [5]:
os.environ["OPENAI_API_KEY"] = getpass.getpass("Enter your OpenAI API key: ")

# Load Data

In [7]:
from langchain_community.document_loaders import DirectoryLoader
from langchain_community.document_loaders import TextLoader


path = "./../data/"
loader = DirectoryLoader(path, glob="*.txt", loader_cls=TextLoader)
docs = loader.load()

# Unrolled version of SDG for more flexibilty with testset distribution

In [None]:
from ragas.llms import LangchainLLMWrapper
from ragas.embeddings import LangchainEmbeddingsWrapper

from langchain_openai import ChatOpenAI
from langchain_openai import OpenAIEmbeddings
generator_llm = LangchainLLMWrapper(ChatOpenAI(model="gpt-4.1-nano"))
generator_embeddings = LangchainEmbeddingsWrapper(OpenAIEmbeddings())

In [11]:
from ragas.testset.graph import KnowledgeGraph

kg = KnowledgeGraph()
kg

KnowledgeGraph(nodes: 0, relationships: 0)

In [12]:
from ragas.testset.graph import Node, NodeType

### NOTICE: We're using a subset of the data for this example - this is to keep costs/time down.
for doc in docs[:15]:
    kg.nodes.append(
        Node(
            type=NodeType.DOCUMENT,
            properties={"page_content": doc.page_content, "document_metadata": doc.metadata}
        )
    )
kg

KnowledgeGraph(nodes: 15, relationships: 0)

In [None]:
from ragas.testset.transforms import default_transforms, apply_transforms

transformer_llm = generator_llm
embedding_model = generator_embeddings

default_transforms = default_transforms(documents=docs, llm=transformer_llm, embedding_model=embedding_model)
apply_transforms(kg, default_transforms)
kg

Applying HeadlinesExtractor:   0%|          | 0/15 [00:00<?, ?it/s]

Applying HeadlineSplitter:   0%|          | 0/15 [00:00<?, ?it/s]

Applying SummaryExtractor:   0%|          | 0/27 [00:00<?, ?it/s]

Property 'summary' already exists in node 'a4f675'. Skipping!
Property 'summary' already exists in node 'dc0de7'. Skipping!
Property 'summary' already exists in node '45a815'. Skipping!
Property 'summary' already exists in node '77ee46'. Skipping!
Property 'summary' already exists in node '7c7970'. Skipping!
Property 'summary' already exists in node 'ffa2d6'. Skipping!
Property 'summary' already exists in node '1fb052'. Skipping!
Property 'summary' already exists in node 'ed1fc0'. Skipping!
Property 'summary' already exists in node '430063'. Skipping!
Property 'summary' already exists in node '727179'. Skipping!
Property 'summary' already exists in node 'aaef5c'. Skipping!
Property 'summary' already exists in node '2e1bca'. Skipping!


Applying CustomNodeFilter:   0%|          | 0/6 [00:00<?, ?it/s]

Applying [EmbeddingExtractor, ThemesExtractor, NERExtractor]:   0%|          | 0/39 [00:00<?, ?it/s]

Property 'summary_embedding' already exists in node '77ee46'. Skipping!
Property 'summary_embedding' already exists in node '430063'. Skipping!
Property 'summary_embedding' already exists in node '45a815'. Skipping!
Property 'summary_embedding' already exists in node '727179'. Skipping!
Property 'summary_embedding' already exists in node '7c7970'. Skipping!
Property 'summary_embedding' already exists in node 'a4f675'. Skipping!
Property 'summary_embedding' already exists in node 'aaef5c'. Skipping!
Property 'summary_embedding' already exists in node 'ffa2d6'. Skipping!
Property 'summary_embedding' already exists in node 'dc0de7'. Skipping!
Property 'summary_embedding' already exists in node 'ed1fc0'. Skipping!
Property 'summary_embedding' already exists in node '2e1bca'. Skipping!
Property 'summary_embedding' already exists in node '1fb052'. Skipping!


Applying [CosineSimilarityBuilder, OverlapScoreBuilder]:   0%|          | 0/2 [00:00<?, ?it/s]

KnowledgeGraph(nodes: 33, relationships: 360)

In [15]:
kg.save("product_data_kg.json")
product_data_kg = KnowledgeGraph.load("product_data_kg.json")
product_data_kg

KnowledgeGraph(nodes: 33, relationships: 360)

In [16]:
from ragas.testset import TestsetGenerator

generator = TestsetGenerator(llm=generator_llm, embedding_model=embedding_model, knowledge_graph=product_data_kg)

In [None]:
from ragas.testset.synthesizers import default_query_distribution, SingleHopSpecificQuerySynthesizer, MultiHopAbstractQuerySynthesizer, MultiHopSpecificQuerySynthesizer

query_distribution = [
        (SingleHopSpecificQuerySynthesizer(llm=generator_llm), 1) #not enough clusters for multi-hop
]

In [21]:
testset = generator.generate(testset_size=25, query_distribution=query_distribution)
testset.to_pandas()

Generating Scenarios:   0%|          | 0/1 [00:00<?, ?it/s]

Generating Samples:   0%|          | 0/25 [00:00<?, ?it/s]

Unnamed: 0,user_input,reference_contexts,reference,synthesizer_name
0,Is this product available in France?,[Product Information Summary UPC: 028400070560...,"Yes, the product is available in France, as in...",single_hop_specifc_query_synthesizer
1,Frito Lay what is?,[Product Information Summary UPC: 028400070560...,Frito Lay is the manufacturer and distributor ...,single_hop_specifc_query_synthesizer
2,Is the Nacho Cheese Flavored Tortilla Chips pr...,[Product Information Summary UPC: 028400070560...,"Yes, the Nacho Cheese Flavored Tortilla Chips ...",single_hop_specifc_query_synthesizer
3,Is this product available in France?,[Product Information Summary UPC: 028400070560...,"Yes, the product is available in France, as in...",single_hop_specifc_query_synthesizer
4,In which countries is the Nacho Cheese Flavore...,[Product Information Summary UPC: 028400070560...,The Nacho Cheese Flavored Tortilla Chips produ...,single_hop_specifc_query_synthesizer
5,What is 028400070560?,[CONSUMER GUIDANCE This product summary is int...,This product reference 028400070560 is from th...,single_hop_specifc_query_synthesizer
6,How can I find detailed nutritional informatio...,[CONSUMER GUIDANCE This product summary is int...,For more detailed information about this produ...,single_hop_specifc_query_synthesizer
7,What is OpenFoodFacts and how can it help me m...,[CONSUMER GUIDANCE This product summary is int...,OpenFoodFacts is a collaborative database that...,single_hop_specifc_query_synthesizer
8,What should I do to get the latest info about ...,[CONSUMER GUIDANCE This product summary is int...,"For the most current product information, incl...",single_hop_specifc_query_synthesizer
9,What is OpenFoodFacts?,[CONSUMER GUIDANCE This product summary is int...,OpenFoodFacts is a collaborative database that...,single_hop_specifc_query_synthesizer


# Build Naive Graph

In [None]:
import sys

# Add the src directory to Python path
src_path = os.path.abspath(os.path.join(os.getcwd(), '..', 'src'))
sys.path.insert(0, src_path)

# Now you can import from utils
from utils.rag_graph import build_rag_graph

graph = build_rag_graph()

In [26]:
graph = build_rag_graph()

# Evaluate Naive Graph

In [27]:
for test_row in testset:
  response = graph.invoke({"question" : test_row.eval_sample.user_input})
  test_row.eval_sample.response = response["response"]
  test_row.eval_sample.retrieved_contexts = [context.page_content for context in response["context"]]

In [29]:
from ragas import EvaluationDataset

evaluation_dataset = EvaluationDataset.from_pandas(testset.to_pandas())

In [None]:
from ragas.llms import LangchainLLMWrapper

evaluator_llm = LangchainLLMWrapper(ChatOpenAI(model="gpt-4.1-mini"))

In [41]:
from ragas.metrics import LLMContextRecall, Faithfulness, FactualCorrectness, ResponseRelevancy, ContextEntityRecall, NoiseSensitivity
from ragas import evaluate, RunConfig

custom_run_config = RunConfig(timeout=360)

naive_result = evaluate(
    dataset=evaluation_dataset,
    metrics=[LLMContextRecall(), Faithfulness(), FactualCorrectness(), ResponseRelevancy(), ContextEntityRecall(), NoiseSensitivity()],
    llm=evaluator_llm,
    run_config=custom_run_config
)

Evaluating:   0%|          | 0/150 [00:00<?, ?it/s]

In [42]:
naive_result

{'context_recall': 0.7467, 'faithfulness': 0.9104, 'factual_correctness': 0.5212, 'answer_relevancy': 0.6594, 'context_entity_recall': 0.5806, 'noise_sensitivity_relevant': 0.1942}

# Build Advanced Graph

In [34]:
from utils.advanced_rag_graph import build_rag_graph

advancedgraph = build_rag_graph()

# Evaluate Advanced Graph

In [36]:
import copy

advanced_graph_testset = copy.deepcopy(testset)

In [37]:
for test_row in advanced_graph_testset:
  response = advancedgraph.invoke({"question" : test_row.eval_sample.user_input})
  test_row.eval_sample.response = response["response"]
  test_row.eval_sample.retrieved_contexts = [context.page_content for context in response["context"]]

In [38]:

advanced_evaluation_dataset = EvaluationDataset.from_pandas(advanced_graph_testset.to_pandas())

In [39]:
result = evaluate(
    dataset=advanced_evaluation_dataset,
    metrics=[LLMContextRecall(), Faithfulness(), FactualCorrectness(), ResponseRelevancy(), ContextEntityRecall(), NoiseSensitivity()],
    llm=evaluator_llm,
    run_config=custom_run_config
)

Evaluating:   0%|          | 0/150 [00:00<?, ?it/s]

Exception raised in Job[131]: TimeoutError()


In [40]:
result

{'context_recall': 0.9160, 'faithfulness': 0.9374, 'factual_correctness': 0.4656, 'answer_relevancy': 0.8017, 'context_entity_recall': 0.4537, 'noise_sensitivity_relevant': 0.3176}

# Rag Graph Comparison

In [44]:
naive_result_dict = {'context_recall': 0.7467, 'faithfulness': 0.9104, 'factual_correctness': 0.5212, 'answer_relevancy': 0.6594, 'context_entity_recall': 0.5806, 'noise_sensitivity_relevant': 0.1942}
advanced_result_dict = {'context_recall': 0.9160, 'faithfulness': 0.9374, 'factual_correctness': 0.4656, 'answer_relevancy': 0.8017, 'context_entity_recall': 0.4537, 'noise_sensitivity_relevant': 0.3176}

In [48]:
import pandas as pd

comparison = pd.DataFrame([naive_result_dict, advanced_result_dict], index=["Naive", "Advanced"])
comparison


Unnamed: 0,context_recall,faithfulness,factual_correctness,answer_relevancy,context_entity_recall,noise_sensitivity_relevant
Naive,0.7467,0.9104,0.5212,0.6594,0.5806,0.1942
Advanced,0.916,0.9374,0.4656,0.8017,0.4537,0.3176
