# Synthetic Data Generation Using RAGAS - RAG Evaluation with LangSmith

In [1]:
# Standard Library Imports
import getpass
import os
from operator import itemgetter
from uuid import uuid4

# Third-Party Imports
# LangChain Core
from langchain.prompts import ChatPromptTemplate
from langchain.schema import StrOutputParser
from langchain.text_splitter import RecursiveCharacterTextSplitter
# from langchain_core.runnables import RunnableParallel, RunnablePassthrough

# LangChain Community
from langchain_community.document_loaders import DirectoryLoader, PyMuPDFLoader
from langchain_community.vectorstores import Qdrant

# LangChain OpenAI
from langchain_openai import ChatOpenAI, OpenAIEmbeddings

# RAGAS
from ragas.embeddings import LangchainEmbeddingsWrapper
from ragas.llms import LangchainLLMWrapper
from ragas.testset import TestsetGenerator
from ragas.testset.graph import KnowledgeGraph, Node, NodeType
from ragas.testset.synthesizers import (
    MultiHopAbstractQuerySynthesizer,
    MultiHopSpecificQuerySynthesizer,
    SingleHopSpecificQuerySynthesizer,
)
from ragas.testset.transforms import apply_transforms, default_transforms

# LangSmith
from langsmith import Client
from langsmith.evaluation import LangChainStringEvaluator, evaluate

# Local Application Imports
# (none yet)


  for match in re.finditer('{0}\s*'.format(re.escape(sent)), self.original_text):
  txt = re.sub('(?<={0})\.'.format(am), '∯', txt)
  txt = re.sub('(?<={0})\.'.format(am), '∯', txt)


In [2]:
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_API_KEY"] = getpass.getpass("LangChain API Key:")
os.environ["LANGCHAIN_PROJECT"] = f"AIM - SDG - {uuid4().hex[0:8]}"
os.environ["OPENAI_API_KEY"] = getpass.getpass("OpenAI API Key:")

In [3]:
BASELINE_PROMPT = """\
Given a provided context and question, you must answer the question based only on context.

If you cannot answer the question based on the context - you must say "I don't know".

Context: {context}
Question: {question}
"""

DOPE_PROMPT = """\
Given a provided context and question, you must answer the question based only on context.

If you cannot answer the question based on the context - you must say "I don't know".

Make your answer rad, ensure high levels of dopeness. Do not be generic, or give generic responses.

Context: {context}
Question: {question}
"""

In [6]:
from pathlib import Path

project_root = Path.cwd().parent  # Go up one level from notebooks/ to project root
data_path = project_root / "data"

print(f"Project root: {project_root}")
print(f"Data path: {data_path}")
print(f"Data path exists: {data_path.exists()}")

# Load documents
loader = DirectoryLoader(str(data_path), glob="*.pdf", loader_cls=PyMuPDFLoader)
docs = loader.load()
print(f"Loaded {len(docs)} documents")

docs_copy = docs

Project root: /home/donbr/don-aie-cohort8/aie8-s09-adv-retrieval
Data path: /home/donbr/don-aie-cohort8/aie8-s09-adv-retrieval/data
Data path exists: True
Loaded 64 documents


In [8]:
# RAGAS Models
# LangchainEmbeddingsWrapper are deprecated
generator_llm = LangchainLLMWrapper(ChatOpenAI(model="gpt-4.1-nano"))
generator_embeddings = LangchainEmbeddingsWrapper(OpenAIEmbeddings())
transformer_llm = generator_llm
embedding_model = generator_embeddings

# LangChain Models
llm = ChatOpenAI(model="gpt-4.1-mini")
embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
eval_llm = ChatOpenAI(model="gpt-4.1")
embeddings = OpenAIEmbeddings(model="text-embedding-3-large")

## RAGAS - knowledge graph approach

In [9]:
kg = KnowledgeGraph()
for doc in docs:
    kg.nodes.append(
        Node(
            type=NodeType.DOCUMENT,
            properties={"page_content": doc.page_content, "document_metadata": doc.metadata}
        )
    )

In [11]:
kg_transform = default_transforms(documents=docs, llm=transformer_llm, embedding_model=embedding_model)

In [12]:
apply_transforms(kg, kg_transform)

Applying HeadlinesExtractor:   0%|          | 0/21 [00:00<?, ?it/s]

Applying HeadlineSplitter:   0%|          | 0/64 [00:00<?, ?it/s]

unable to apply transformation: 'headlines' property not found in this node
unable to apply transformation: 'headlines' property not found in this node
unable to apply transformation: 'headlines' property not found in this node
unable to apply transformation: 'headlines' property not found in this node
unable to apply transformation: 'headlines' property not found in this node
unable to apply transformation: 'headlines' property not found in this node
unable to apply transformation: 'headlines' property not found in this node
unable to apply transformation: 'headlines' property not found in this node
unable to apply transformation: 'headlines' property not found in this node
unable to apply transformation: 'headlines' property not found in this node
unable to apply transformation: 'headlines' property not found in this node
unable to apply transformation: 'headlines' property not found in this node
unable to apply transformation: 'headlines' property not found in this node
unable to ap

Applying SummaryExtractor:   0%|          | 0/38 [00:00<?, ?it/s]

Property 'summary' already exists in node '493448'. Skipping!
Property 'summary' already exists in node 'da452a'. Skipping!
Property 'summary' already exists in node 'f5cafb'. Skipping!
Property 'summary' already exists in node '3256cd'. Skipping!
Property 'summary' already exists in node '18238d'. Skipping!
Property 'summary' already exists in node 'fdd054'. Skipping!
Property 'summary' already exists in node 'b535f0'. Skipping!
Property 'summary' already exists in node '0a5fb4'. Skipping!
Property 'summary' already exists in node '19a95f'. Skipping!
Property 'summary' already exists in node 'c41583'. Skipping!
Property 'summary' already exists in node 'fb68d3'. Skipping!
Property 'summary' already exists in node 'c53350'. Skipping!
Property 'summary' already exists in node '07048d'. Skipping!
Property 'summary' already exists in node '6e86dd'. Skipping!
Property 'summary' already exists in node 'ce70d5'. Skipping!
Property 'summary' already exists in node '9d8e6c'. Skipping!
Property

Applying CustomNodeFilter:   0%|          | 0/8 [00:00<?, ?it/s]

Applying [EmbeddingExtractor, ThemesExtractor, NERExtractor]:   0%|          | 0/48 [00:00<?, ?it/s]

Property 'summary_embedding' already exists in node '0a5fb4'. Skipping!
Property 'summary_embedding' already exists in node 'f5cafb'. Skipping!
Property 'summary_embedding' already exists in node '3256cd'. Skipping!
Property 'summary_embedding' already exists in node 'fdd054'. Skipping!
Property 'summary_embedding' already exists in node 'b535f0'. Skipping!
Property 'summary_embedding' already exists in node '493448'. Skipping!
Property 'summary_embedding' already exists in node 'fb68d3'. Skipping!
Property 'summary_embedding' already exists in node 'c41583'. Skipping!
Property 'summary_embedding' already exists in node '18238d'. Skipping!
Property 'summary_embedding' already exists in node '19a95f'. Skipping!
Property 'summary_embedding' already exists in node 'da452a'. Skipping!
Property 'summary_embedding' already exists in node 'c53350'. Skipping!
Property 'summary_embedding' already exists in node '6e86dd'. Skipping!
Property 'summary_embedding' already exists in node '07048d'. Sk

Applying [CosineSimilarityBuilder, OverlapScoreBuilder]:   0%|          | 0/2 [00:00<?, ?it/s]

In [13]:
kg.save("usecase_data_kg.json")

In [14]:
usecase_data_kg = KnowledgeGraph.load("usecase_data_kg.json")

generator = TestsetGenerator(llm=generator_llm, embedding_model=embedding_model, knowledge_graph=usecase_data_kg)

query_distribution = [
        (SingleHopSpecificQuerySynthesizer(llm=generator_llm), 0.5),
        (MultiHopAbstractQuerySynthesizer(llm=generator_llm), 0.25),
        (MultiHopSpecificQuerySynthesizer(llm=generator_llm), 0.25),
]

In [15]:
golden_testset = generator.generate(testset_size=10, query_distribution=query_distribution)
golden_testset.to_pandas()

Generating personas:   0%|          | 0/3 [00:00<?, ?it/s]

Generating Scenarios:   0%|          | 0/3 [00:00<?, ?it/s]

Generating Samples:   0%|          | 0/11 [00:00<?, ?it/s]

Unnamed: 0,user_input,reference_contexts,reference,synthesizer_name
0,What is ChatGPT?,[Introduction ChatGPT launched in November 202...,ChatGPT is a mass-market chatbot based on a La...,single_hop_specifc_query_synthesizer
1,OpenAI what is it doin with ChatGPT messages?,[Table 1: ChatGPT daily message counts (millio...,The context explains that OpenAI develops Chat...,single_hop_specifc_query_synthesizer
2,What are nonprofessional occupations?,[Variation by Occupation Figure 23 presents va...,Variation by Occupation Figure 23 presents var...,single_hop_specifc_query_synthesizer
3,"What does the term '29,000 messages per second...",[Conclusion This paper studies the rapid growt...,"The term '29,000 messages per second' refers t...",single_hop_specifc_query_synthesizer
4,How do user demographics and privacy-preservin...,[<1-hop>\n\nConclusion This paper studies the ...,The context indicates that ChatGPT's usage has...,multi_hop_abstract_query_synthesizer
5,Based on the data showing message volume growt...,[<1-hop>\n\nMonth Non-Work (M) (%) Work (M) (%...,The data indicates that non-work messages have...,multi_hop_abstract_query_synthesizer
6,wht occupation catgories use chatgpt for work ...,[<1-hop>\n\nVariation by Occupation Figure 23 ...,Variation by Occupation Figure 23 shows that u...,multi_hop_abstract_query_synthesizer
7,"how Handa et al., 2025 shows ChatGPT usage lik...",[<1-hop>\n\nIntroduction ChatGPT launched in N...,"Handa et al., 2025 reports that ChatGPT usage ...",multi_hop_specific_query_synthesizer
8,"Handa et al., 2025, say that most ChatGPT use ...",[<1-hop>\n\nIntroduction ChatGPT launched in N...,"Handa et al., 2025, indicate that nearly 80% o...",multi_hop_specific_query_synthesizer
9,How does the growth of ChatGPT usage by July 2...,[<1-hop>\n\nConclusion This paper studies the ...,"By July 2025, ChatGPT had experienced rapid gr...",multi_hop_specific_query_synthesizer


## LangChain RAG

### First Chain - baseline_chain

In [16]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 500,
    chunk_overlap = 50
)

# redefine rag_documents
baseline_documents = text_splitter.split_documents(docs_copy)

In [17]:
baseline_vectorstore = Qdrant.from_documents(
    documents=baseline_documents,
    embedding=embeddings,
    location=":memory:",
    collection_name="Use Case RAG"
)

In [18]:
baseline_retriever = baseline_vectorstore.as_retriever(search_kwargs={"k": 10})

baseline_prompt = ChatPromptTemplate.from_template(BASELINE_PROMPT)

baseline_chain = (
    {"context": itemgetter("question") | baseline_retriever, "question": itemgetter("question")}
    | baseline_prompt | llm | StrOutputParser()
)

In [19]:
baseline_chain.invoke({"question" : "What are people doing with AI these days?"})

'Based on the context from the document "How People Use ChatGPT," people are using AI primarily for workplace tasks, either augmenting or automating human labor. Generative AI is highly flexible and is used in many different ways both at work and outside of work. Specifically, people use AI for obtaining information and advice, producing long-form outputs such as writing and software code, data analysis, and digital product creation like spreadsheets. Additionally, some usage also involves self-expression areas such as relationships, personal reflection, games, and role play, though these are less common. AI supports decision-making and various economic tasks, reflecting its broad occupational and societal impact.'

### Second Chain - dope_chain

In [20]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 1000,
    chunk_overlap = 50
)

# redefine rag_documents
dope_documents = text_splitter.split_documents(docs_copy)

In [21]:
# reuse of vectorstore name for different collection
# will this reuse the existing in-memory instance or overwrite?
dope_vectorstore = Qdrant.from_documents(
    documents=dope_documents,
    embedding=embeddings,
    location=":memory:",
    collection_name="Use Case RAG Docs"
)

In [22]:
dope_prompt = ChatPromptTemplate.from_template(DOPE_PROMPT)
dope_retriever = dope_vectorstore.as_retriever()

dope_chain = (
    {"context": itemgetter("question") | dope_retriever, "question": itemgetter("question")}
    | dope_prompt | llm | StrOutputParser()
)

In [23]:
dope_chain.invoke({"question" : "How are people using AI to make money?"})

'Ah, you wanna know how folks are turning AI magic into cold, hard cash? Let me drop the knowledge bomb straight from the docs.\n\nPeople aren’t just getting AI to do grunt work—they’re harnessing ChatGPT and generative AI as *advisors* and *research assistants*. This isn’t just automation, it’s *decision support on steroids*. In knowledge-heavy gigs, where smart choices mean big bucks, AI cranks up productivity by leveling up decision-making quality. Imagine having a supercharged brain partner that boosts your workflow without breaking a sweat.\n\nCollis and Brynjolfsson (2025) nailed it: American users value generative AI so much that ditching it for a month would cost them *$98 each*—adding up to a mind-blowing $97 billion+ yearly surplus. This money-making mojo isn’t limited to task automation; it’s about making smarter, faster, and more informed moves in the workplace.\n\nSo basically, AI is the secret sauce for crushing it in knowledge work—boosting output, refining decisions, an

## LangSmith

### Create Dataset - from RAGAS Golden Testset

In [24]:
client = Client()

langsmith_dataset_name = f"Use Case Synthetic Data - AIE8 - {uuid4().hex[0:8]}"

langsmith_dataset = client.create_dataset(
    dataset_name=langsmith_dataset_name,
    description="Synthetic Data for Use Cases"
)

In [25]:
for data_row in golden_testset.to_pandas().iterrows():
  client.create_example(
      inputs={
          "question": data_row[1]["user_input"]
      },
      outputs={
          "answer": data_row[1]["reference"]
      },
      metadata={
          "context": data_row[1]["reference_contexts"]
      },
      dataset_id=langsmith_dataset.id
  )

### Setup Evaluation Criteria - using legacy approach... not OpenEvals

In [26]:
qa_evaluator = LangChainStringEvaluator("qa", config={"llm" : eval_llm})

labeled_helpfulness_evaluator = LangChainStringEvaluator(
    "labeled_criteria",
    config={
        "criteria": {
            "helpfulness": (
                "Is this submission helpful to the user,"
                " taking into account the correct reference answer?"
            )
        },
        "llm" : eval_llm
    },
    prepare_data=lambda run, example: {
        "prediction": run.outputs["output"],
        "reference": example.outputs["answer"],
        "input": example.inputs["question"],
    }
)

dopeness_evaluator = LangChainStringEvaluator(
    "criteria",
    config={
        "criteria": {
            "dopeness": "Is this response dope, lit, cool, or is it just a generic response?",
        },
        "llm" : eval_llm
    }
)

## Run Evaluations

### First Evaluation - baseline_chain

In [27]:
evaluate(
    baseline_chain.invoke,
    data=langsmith_dataset_name,
    evaluators=[
        qa_evaluator,
        labeled_helpfulness_evaluator,
        dopeness_evaluator
    ],
    metadata={"revision_id": "default_chain_init"},
)

View the evaluation results for experiment: 'mealy-protest-87' at:
https://smith.langchain.com/o/29b9636b-ddfa-4496-93ee-b2875ed2ee09/datasets/29692958-3c25-495f-987f-d5d0cdd33206/compare?selectedSessions=8a6460e6-1b28-4434-8bc2-9c88afdf6442




0it [00:00, ?it/s]

Unnamed: 0,inputs.question,outputs.output,error,reference.answer,feedback.correctness,feedback.helpfulness,feedback.dopeness,execution_time,example_id,id
0,"Based on the data from July 2025, how does the...","Based on the data from July 2025, the rapid gr...",,"By July 2025, ChatGPT had experienced rapid gr...",1,1,0,6.641255,01d079d2-06b7-4601-a841-6d9d88931149,722bbfd9-13f9-4959-abd8-8541aa73e2ec
1,How does the growth of ChatGPT usage by July 2...,"By July 2025, ChatGPT had experienced rapid gr...",,"By July 2025, ChatGPT had experienced rapid gr...",1,1,0,2.816137,537ba08d-b429-4164-9624-52eecb71df69,15b8600e-a185-49ac-a7cb-4e001e8bbd4b
2,"Handa et al., 2025, say that most ChatGPT use ...","Based on the provided context, Handa et al., 2...",,"Handa et al., 2025, indicate that nearly 80% o...",1,1,0,4.389732,e60e5d75-0439-47d1-96c0-245ffe593faa,a84ce206-a42f-4eb9-9415-f3a10f0a52c6
3,"how Handa et al., 2025 shows ChatGPT usage lik...","The provided context mentions Handa et al., 20...",,"Handa et al., 2025 reports that ChatGPT usage ...",0,0,0,3.061774,0265ce5a-88c0-4d10-9409-ccb970ff5b9a,8dfd8f2c-a223-4a0e-a8f4-cde78f23483d
4,wht occupation catgories use chatgpt for work ...,"Based on the provided context, the occupation ...",,Variation by Occupation Figure 23 shows that u...,1,1,0,3.874545,5974c600-e166-423e-b400-940f8da2ddb9,92ab83d1-7209-40ec-bfcc-85cd17750478
5,Based on the data showing message volume growt...,"Based on the context, the increasing share of ...",,The data indicates that non-work messages have...,1,1,0,4.848903,8ee55c2e-c845-4d16-8dcc-2237bc80f171,28cc1792-2b8b-443b-b388-3514acad2424
6,How do user demographics and privacy-preservin...,"Based on the provided context, user demographi...",,The context indicates that ChatGPT's usage has...,1,1,0,5.621086,294a3dd9-d910-4ca3-80a9-eb46224b5941,94071071-43c4-47e2-9a6d-8803f46ad6ff
7,"What does the term '29,000 messages per second...","The term ""29,000 messages per second"" refers t...",,"The term '29,000 messages per second' refers t...",1,1,0,2.2926,4be055b5-8558-44bf-914e-ad449b39ec04,0fb41f9d-87d3-4aaf-a9e0-3a006340f155
8,What are nonprofessional occupations?,Nonprofessional occupations include administra...,,Variation by Occupation Figure 23 presents var...,1,1,0,0.85688,95955f93-b8af-4589-9c03-4d7318cc9f5f,f3582d5b-f116-4220-9535-011b2c4c694f
9,OpenAI what is it doin with ChatGPT messages?,OpenAI analyzes ChatGPT messages by using a cl...,,The context explains that OpenAI develops Chat...,1,0,0,2.268285,4117b9f7-99f2-4cb1-ac6f-3f8fd11fda8d,88dc2f87-8dff-4bf2-b5af-159463a04a10


### Second Evaluation - dope_chain

In [28]:
evaluate(
    dope_chain.invoke,
    data=langsmith_dataset_name,
    evaluators=[
        qa_evaluator,
        labeled_helpfulness_evaluator,
        dopeness_evaluator
    ],
    metadata={"revision_id": "dope_chain"},
)

View the evaluation results for experiment: 'yellow-income-12' at:
https://smith.langchain.com/o/29b9636b-ddfa-4496-93ee-b2875ed2ee09/datasets/29692958-3c25-495f-987f-d5d0cdd33206/compare?selectedSessions=d8bdf9e7-9862-4339-931c-018f8c699e87




0it [00:00, ?it/s]

Unnamed: 0,inputs.question,outputs.output,error,reference.answer,feedback.correctness,feedback.helpfulness,feedback.dopeness,execution_time,example_id,id
0,"Based on the data from July 2025, how does the...","Oh, brace yourself for this turbo-charged insi...",,"By July 2025, ChatGPT had experienced rapid gr...",1,1,1,7.517711,01d079d2-06b7-4601-a841-6d9d88931149,66aaa27c-9aa4-40e4-bc4f-b6fe0eb70fe6
1,How does the growth of ChatGPT usage by July 2...,"Alright, strap in for some serious dopeness on...",,"By July 2025, ChatGPT had experienced rapid gr...",1,1,1,5.924114,537ba08d-b429-4164-9624-52eecb71df69,eb9ecdb4-1ba9-443b-bb8b-67cc8fc96095
2,"Handa et al., 2025, say that most ChatGPT use ...","Alright, buckle up—here’s the juicy lowdown ri...",,"Handa et al., 2025, indicate that nearly 80% o...",1,1,1,5.08349,e60e5d75-0439-47d1-96c0-245ffe593faa,1a7fc9f1-0533-4d4b-9870-6487fa4b3698
3,"how Handa et al., 2025 shows ChatGPT usage lik...","Yo, let's break this down with ultra-fresh pre...",,"Handa et al., 2025 reports that ChatGPT usage ...",1,1,1,6.809444,0265ce5a-88c0-4d10-9409-ccb970ff5b9a,634b3e4b-e11b-4065-af51-e40c19a1228b
4,wht occupation catgories use chatgpt for work ...,"Alright, let’s drop some knowledge bombs about...",,Variation by Occupation Figure 23 shows that u...,1,0,1,6.499883,5974c600-e166-423e-b400-940f8da2ddb9,b48229fa-2c26-438c-a502-e4a1ead56c8b
5,Based on the data showing message volume growt...,"Yo, buckle up — here’s the skinny straight fro...",,The data indicates that non-work messages have...,1,1,1,5.913716,8ee55c2e-c845-4d16-8dcc-2237bc80f171,c191ba1f-a7af-48b1-b07d-4c8bbbdc6eb6
6,How do user demographics and privacy-preservin...,"Alright, strap in for some next-level insight ...",,The context indicates that ChatGPT's usage has...,1,1,1,5.752986,294a3dd9-d910-4ca3-80a9-eb46224b5941,f1af5e41-7905-4baa-b046-5f92f4aaaa6f
7,"What does the term '29,000 messages per second...","Ah, the legendary ""29,000 messages per second""...",,"The term '29,000 messages per second' refers t...",0,0,1,4.331795,4be055b5-8558-44bf-914e-ad449b39ec04,67bf00eb-7f96-4720-87f4-7be0d3948ec5
8,What are nonprofessional occupations?,"Yo, nonprofessional occupations are the hustle...",,Variation by Occupation Figure 23 presents var...,1,1,1,2.139521,95955f93-b8af-4589-9c03-4d7318cc9f5f,43632ec5-efc5-45bd-bb90-8fe372f9e45f
9,OpenAI what is it doin with ChatGPT messages?,"Alright, here's the rad lowdown straight from ...",,The context explains that OpenAI develops Chat...,1,0,1,3.540938,4117b9f7-99f2-4cb1-ac6f-3f8fd11fda8d,8ebd27f1-eee2-4d06-a273-441c44839dfc


## Python Library Versions - from uv.lock


```
[package.metadata]
requires-dist = [
    { name = "jupyter", specifier = ">=1.1.1" },
    { name = "langchain-community", specifier = ">=0.3.16" },
    { name = "langchain-openai", specifier = ">=0.3.3" },
    { name = "langchain-qdrant", specifier = ">=0.2.0" },
    { name = "langgraph", specifier = ">=0.2.69" },
    { name = "nltk", specifier = "==3.8.1" },
    { name = "numpy", specifier = ">=2.2.2" },
    { name = "pymupdf", specifier = ">=1.26.3" },
    { name = "ragas", specifier = "==0.2.10" },
    { name = "unstructured", specifier = ">=0.14.8" },
]
```