# Synthetic Data Generation Using RAGAS - RAG Evaluation with LangSmith

In [1]:
# Standard Library Imports
import getpass
import os
from operator import itemgetter
from uuid import uuid4

# Third-Party Imports
# LangChain Core
from langchain.prompts import ChatPromptTemplate
from langchain.schema import StrOutputParser
from langchain.text_splitter import RecursiveCharacterTextSplitter
# from langchain_core.runnables import RunnableParallel, RunnablePassthrough

# LangChain Community
from langchain_community.document_loaders import DirectoryLoader, PyMuPDFLoader
from langchain_community.vectorstores import Qdrant

# LangChain OpenAI
from langchain_openai import ChatOpenAI, OpenAIEmbeddings

# RAGAS
from ragas.embeddings import LangchainEmbeddingsWrapper
from ragas.llms import LangchainLLMWrapper
from ragas.testset import TestsetGenerator
from ragas.testset.graph import KnowledgeGraph, Node, NodeType
from ragas.testset.synthesizers import (
    MultiHopAbstractQuerySynthesizer,
    MultiHopSpecificQuerySynthesizer,
    SingleHopSpecificQuerySynthesizer,
)
from ragas.testset.transforms import apply_transforms, default_transforms

# LangSmith
from langsmith import Client
from langsmith.evaluation import LangChainStringEvaluator, evaluate

# Local Application Imports
# (none yet)


In [2]:
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_API_KEY"] = getpass.getpass("LangChain API Key:")
os.environ["LANGCHAIN_PROJECT"] = f"AIM - SDG - {uuid4().hex[0:8]}"
os.environ["OPENAI_API_KEY"] = getpass.getpass("OpenAI API Key:")

In [3]:
BASELINE_PROMPT = """\
Given a provided context and question, you must answer the question based only on context.

If you cannot answer the question based on the context - you must say "I don't know".

Context: {context}
Question: {question}
"""

DOPE_PROMPT = """\
Given a provided context and question, you must answer the question based only on context.

If you cannot answer the question based on the context - you must say "I don't know".

Make your answer rad, ensure high levels of dopeness. Do not be generic, or give generic responses.

Context: {context}
Question: {question}
"""

In [4]:
path = "data/"
loader = DirectoryLoader(path, glob="*.pdf", loader_cls=PyMuPDFLoader)
docs = loader.load()

docs_copy = docs

In [None]:
# RAGAS Models
# LangchainEmbeddingsWrapper are deprecated
generator_llm = LangchainLLMWrapper(ChatOpenAI(model="gpt-4.1-nano"))
generator_embeddings = LangchainEmbeddingsWrapper(OpenAIEmbeddings())
transformer_llm = generator_llm
embedding_model = generator_embeddings

# LangChain Models
llm = ChatOpenAI(model="gpt-4.1-mini")
embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
eval_llm = ChatOpenAI(model="gpt-4.1")
embeddings = OpenAIEmbeddings(model="text-embedding-3-large")

## RAGAS - knowledge graph approach

In [6]:
kg = KnowledgeGraph()
for doc in docs:
    kg.nodes.append(
        Node(
            type=NodeType.DOCUMENT,
            properties={"page_content": doc.page_content, "document_metadata": doc.metadata}
        )
    )

In [7]:
kg_transform = default_transforms(documents=docs, llm=transformer_llm, embedding_model=embedding_model)

In [8]:
apply_transforms(kg, kg_transform)

Applying HeadlinesExtractor:   0%|          | 0/21 [00:00<?, ?it/s]

Applying HeadlineSplitter:   0%|          | 0/64 [00:00<?, ?it/s]

unable to apply transformation: 'headlines' property not found in this node
unable to apply transformation: 'headlines' property not found in this node
unable to apply transformation: 'headlines' property not found in this node
unable to apply transformation: 'headlines' property not found in this node
unable to apply transformation: 'headlines' property not found in this node
unable to apply transformation: 'headlines' property not found in this node
unable to apply transformation: 'headlines' property not found in this node
unable to apply transformation: 'headlines' property not found in this node
unable to apply transformation: 'headlines' property not found in this node
unable to apply transformation: 'headlines' property not found in this node
unable to apply transformation: 'headlines' property not found in this node
unable to apply transformation: 'headlines' property not found in this node
unable to apply transformation: 'headlines' property not found in this node
unable to ap

Applying SummaryExtractor:   0%|          | 0/38 [00:00<?, ?it/s]

Property 'summary' already exists in node '3697c1'. Skipping!
Property 'summary' already exists in node '840221'. Skipping!
Property 'summary' already exists in node '840221'. Skipping!
Property 'summary' already exists in node '52ad50'. Skipping!
Property 'summary' already exists in node '52ad50'. Skipping!
Property 'summary' already exists in node 'e4319c'. Skipping!
Property 'summary' already exists in node 'e4319c'. Skipping!
Property 'summary' already exists in node '9a0a7f'. Skipping!
Property 'summary' already exists in node '9a0a7f'. Skipping!
Property 'summary' already exists in node '2ca25f'. Skipping!
Property 'summary' already exists in node '9225ef'. Skipping!
Property 'summary' already exists in node '7d27da'. Skipping!
Property 'summary' already exists in node '2ca25f'. Skipping!
Property 'summary' already exists in node '9225ef'. Skipping!
Property 'summary' already exists in node '7d27da'. Skipping!
Property 'summary' already exists in node 'e8173b'. Skipping!
Property

Applying CustomNodeFilter:   0%|          | 0/8 [00:00<?, ?it/s]

Applying [EmbeddingExtractor, ThemesExtractor, NERExtractor]:   0%|          | 0/50 [00:00<?, ?it/s]

Property 'summary_embedding' already exists in node 'a8d7f5'. Skipping!
Property 'summary_embedding' already exists in node 'e8173b'. Skipping!
Property 'summary_embedding' already exists in node 'e8173b'. Skipping!
Property 'summary_embedding' already exists in node '52ad50'. Skipping!
Property 'summary_embedding' already exists in node '52ad50'. Skipping!
Property 'summary_embedding' already exists in node '840221'. Skipping!
Property 'summary_embedding' already exists in node '840221'. Skipping!
Property 'summary_embedding' already exists in node '3697c1'. Skipping!
Property 'summary_embedding' already exists in node '3697c1'. Skipping!
Property 'summary_embedding' already exists in node '9225ef'. Skipping!
Property 'summary_embedding' already exists in node '9225ef'. Skipping!
Property 'summary_embedding' already exists in node '2b2269'. Skipping!
Property 'summary_embedding' already exists in node '2b2269'. Skipping!
Property 'summary_embedding' already exists in node 'e4319c'. Sk

Applying [CosineSimilarityBuilder, OverlapScoreBuilder]:   0%|          | 0/2 [00:00<?, ?it/s]

In [9]:
kg.save("usecase_data_kg.json")

In [10]:
usecase_data_kg = KnowledgeGraph.load("usecase_data_kg.json")

generator = TestsetGenerator(llm=generator_llm, embedding_model=embedding_model, knowledge_graph=usecase_data_kg)

query_distribution = [
        (SingleHopSpecificQuerySynthesizer(llm=generator_llm), 0.5),
        (MultiHopAbstractQuerySynthesizer(llm=generator_llm), 0.25),
        (MultiHopSpecificQuerySynthesizer(llm=generator_llm), 0.25),
]

In [11]:
golden_testset = generator.generate(testset_size=10, query_distribution=query_distribution)
golden_testset.to_pandas()

Generating personas:   0%|          | 0/3 [00:00<?, ?it/s]

Generating Scenarios:   0%|          | 0/3 [00:00<?, ?it/s]

Generating Samples:   0%|          | 0/11 [00:00<?, ?it/s]

Unnamed: 0,user_input,reference_contexts,reference,synthesizer_name
0,What did Tomlinson et al. study regarding Chat...,[Introduction ChatGPT launched in November 202...,"Two recent papers, including Tomlinson et al.,...",single_hop_specifc_query_synthesizer
1,How does Claude compare to ChatGPT in terms of...,[Table 1: ChatGPT daily message counts (millio...,The context indicates that ChatGPT's usage is ...,single_hop_specifc_query_synthesizer
2,"What are blue-collar occupations, and how do t...",[Figure 23 presents variation in ChatGPT usage...,Blue-collar occupations are included in the br...,single_hop_specifc_query_synthesizer
3,How does the concept of management influence t...,[Conclusion This paper studies the rapid growt...,The context indicates that users who are highl...,single_hop_specifc_query_synthesizer
4,How does the variation in ChatGPT usage by occ...,"[<1-hop>\n\n6.5 Variation by Occupation, <2-ho...","The variation in ChatGPT usage by occupation, ...",multi_hop_abstract_query_synthesizer
5,How has the rapid global diffusion of AI techn...,[<1-hop>\n\nIntroduction ChatGPT launched in N...,"The rapid global diffusion of AI technology, h...",multi_hop_abstract_query_synthesizer
6,chatgpt use by occupation categories how diffe...,"[<1-hop>\n\n6.5 Variation by Occupation, <2-ho...",Figure 23 shows variation in ChatGPT usage by ...,multi_hop_abstract_query_synthesizer
7,what happen in july 2025 with chatgpt usage an...,[<1-hop>\n\nConclusion This paper studies the ...,"In july 2025, chatgpt was used very a lot, wit...",multi_hop_specific_query_synthesizer
8,Whats the date in July 2025 that ChatGPT was u...,[<1-hop>\n\nConclusion This paper studies the ...,"In July 2025, ChatGPT had been used weekly by ...",multi_hop_specific_query_synthesizer
9,Based on the rapid growth of ChatGPT usage in ...,[<1-hop>\n\nConclusion This paper studies the ...,"The first segment indicates that by July 2025,...",multi_hop_specific_query_synthesizer


## LangChain RAG

### First Chain

In [12]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 500,
    chunk_overlap = 50
)

# redefine rag_documents
baseline_documents = text_splitter.split_documents(docs_copy)

In [13]:
baseline_vectorstore = Qdrant.from_documents(
    documents=baseline_documents,
    embedding=embeddings,
    location=":memory:",
    collection_name="Use Case RAG"
)

In [14]:
baseline_retriever = baseline_vectorstore.as_retriever(search_kwargs={"k": 10})

baseline_prompt = ChatPromptTemplate.from_template(BASELINE_PROMPT)

baseline_chain = (
    {"context": itemgetter("question") | baseline_retriever, "question": itemgetter("question")}
    | baseline_prompt | llm | StrOutputParser()
)

In [15]:
baseline_chain.invoke({"question" : "What are people doing with AI these days?"})

'Based on the provided context, people are using AI, particularly generative AI like ChatGPT, to perform workplace tasks by either augmenting or automating human labor. They use AI in many flexible ways including producing long-form outputs such as writing and software code, spreadsheets, and other digital products. About 81% of work-related AI uses are associated with activities like obtaining information and conducting data analysis. Additionally, users seek information and advice from AI, fulfilling intents classified as Asking, Doing, or Expressing. AI is also used for self-expression activities such as relationships, personal reflection, games, and role play, though these are less common. Overall, AI is applied both at work and outside work in diverse ways including augmenting decisions, creating content, and supporting various economic and social activities.'

### Second Chain - dope_chain

In [16]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 1000,
    chunk_overlap = 50
)

# redefine rag_documents
dope_documents = text_splitter.split_documents(docs_copy)

In [17]:
# reuse of vectorstore name for different collection
# will this reuse the existing in-memory instance or overwrite?
dope_vectorstore = Qdrant.from_documents(
    documents=dope_documents,
    embedding=embeddings,
    location=":memory:",
    collection_name="Use Case RAG Docs"
)

In [18]:
dope_prompt = ChatPromptTemplate.from_template(DOPE_PROMPT)
dope_retriever = dope_vectorstore.as_retriever()

dope_chain = (
    {"context": itemgetter("question") | dope_retriever, "question": itemgetter("question")}
    | dope_prompt | llm | StrOutputParser()
)

In [19]:
dope_chain.invoke({"question" : "How are people using AI to make money?"})

'Alright, buckle up—here’s the sick lowdown on how folks are cashing in with AI, straight from the context:\n\nPeople ain’t just grinding AI to crank out tasks solo—they’re deploying ChatGPT like a supercharged advisor and research sidekick. Imagine having a wizard in your corner boosting your decision-making mojo, especially in brainy, knowledge-rich gigs. Instead of just handing off jobs for the AI to spit out, users leverage it for **decision support**, dialing up the quality of their work and productivity like a boss.\n\nPut simply: AI is remixing how work gets done by amplifying human smarts, not replacing them. This turbocharged synergy leads to better calls, smarter moves, and thus more $$$ flowing in. The context even plugs a mind-blowing estimate—US users value generative AI so much it’s like they’d pay a cool $98 just not to use it for a month, implying a whopping $97 billion-plus annual economic surplus. That’s AI turning brainwaves into bankwaves.\n\nIn sum? People are legi

## LangSmith

### Create Dataset from RAGAS Golden Testset

In [None]:
client = Client()

langsmith_dataset_name = f"Use Case Synthetic Data - AIE8 - {uuid4().hex[0:8]}"

langsmith_dataset = client.create_dataset(
    dataset_name=langsmith_dataset_name,
    description="Synthetic Data for Use Cases"
)

In [21]:
for data_row in golden_testset.to_pandas().iterrows():
  client.create_example(
      inputs={
          "question": data_row[1]["user_input"]
      },
      outputs={
          "answer": data_row[1]["reference"]
      },
      metadata={
          "context": data_row[1]["reference_contexts"]
      },
      dataset_id=langsmith_dataset.id
  )

### Setup Evaluation Criteria - this is using the legacy approach, not OpenEvals

In [22]:
qa_evaluator = LangChainStringEvaluator("qa", config={"llm" : eval_llm})

labeled_helpfulness_evaluator = LangChainStringEvaluator(
    "labeled_criteria",
    config={
        "criteria": {
            "helpfulness": (
                "Is this submission helpful to the user,"
                " taking into account the correct reference answer?"
            )
        },
        "llm" : eval_llm
    },
    prepare_data=lambda run, example: {
        "prediction": run.outputs["output"],
        "reference": example.outputs["answer"],
        "input": example.inputs["question"],
    }
)

dopeness_evaluator = LangChainStringEvaluator(
    "criteria",
    config={
        "criteria": {
            "dopeness": "Is this response dope, lit, cool, or is it just a generic response?",
        },
        "llm" : eval_llm
    }
)

## Run Evaluations

### First Evaluation - baseline_rag_chain

In [None]:
evaluate(
    baseline_chain.invoke,
    data=langsmith_dataset_name,
    evaluators=[
        qa_evaluator,
        labeled_helpfulness_evaluator,
        dopeness_evaluator
    ],
    metadata={"revision_id": "default_chain_init"},
)

View the evaluation results for experiment: 'glossy-wash-30' at:
https://smith.langchain.com/o/29b9636b-ddfa-4496-93ee-b2875ed2ee09/datasets/703e7706-7f02-4663-86dd-8695bf183fb8/compare?selectedSessions=d29da37a-752f-4cb5-9049-754d1d776830




0it [00:00, ?it/s]

Unnamed: 0,inputs.question,outputs.output,error,reference.answer,feedback.correctness,feedback.helpfulness,feedback.dopeness,execution_time,example_id,id
0,Based on the data showing that over 70% of Cha...,"Based on the context, the fact that over 70% o...",,"The context indicates that by July 2025, more ...",1,1,0,4.720682,13321055-5845-4a80-af72-e5603a108d46,fb75a526-d408-4d6c-93c7-d6d76a603834
1,Based on the rapid growth of ChatGPT usage in ...,"Based on the provided context, the increasing ...",,"The first segment indicates that by July 2025,...",1,1,0,9.10673,cddbe112-9433-4574-8cc5-96b3d38e5607,33951d77-6e60-4c42-87b1-6fb1a9f05c77
2,Whats the date in July 2025 that ChatGPT was u...,The context does not specify an exact date in ...,,"In July 2025, ChatGPT had been used weekly by ...",1,1,0,2.640109,8af7ba16-a455-4c7c-bc07-2ffb0dfb65c1,af7df2e2-44d4-4c75-a189-3770b66d7df9
3,what happen in july 2025 with chatgpt usage an...,Based on the provided context:\n\n- **July 202...,,"In july 2025, chatgpt was used very a lot, wit...",1,0,0,12.182166,ef27544d-e81c-4310-9fb0-f84f9eaddd49,deb23bbb-6a5d-4479-8841-8e8666d4772a
4,chatgpt use by occupation categories how diffe...,"Based on the context, ChatGPT usage varies sig...",,Figure 23 shows variation in ChatGPT usage by ...,1,1,0,3.52881,a031271b-d711-491d-8a69-6e8b34f2ebaa,98699fe9-8e16-4134-b593-430438b209d6
5,How has the rapid global diffusion of AI techn...,"Based on the provided context, the rapid globa...",,"The rapid global diffusion of AI technology, h...",1,1,0,5.301738,363fc6ff-af56-4758-a00d-736f1aeb7c96,4cf1f259-3ea4-44b0-8a41-82d5752522ad
6,How does the variation in ChatGPT usage by occ...,The variation in ChatGPT usage by occupation i...,,"The variation in ChatGPT usage by occupation, ...",1,1,0,3.337843,99fc3ca7-4288-4896-806c-ddef3e55092b,992b54f7-0786-44b2-95ce-405af7dc7e14
7,How does the concept of management influence t...,"Based on the provided context, management as a...",,The context indicates that users who are highl...,1,1,0,5.62904,3f7c8864-d904-4612-a141-2b7ac916b2f4,746eadab-cb84-49dd-a084-2578e4747c31
8,"What are blue-collar occupations, and how do t...",Blue-collar occupations are included within th...,,Blue-collar occupations are included in the br...,1,0,0,2.692904,8cc5e447-4c56-4b87-8a2e-71925a962dc0,81fd88ec-a2e9-4deb-97c4-d07dc9a448cf
9,How does Claude compare to ChatGPT in terms of...,"Based on the provided context, Claude conversa...",,The context indicates that ChatGPT's usage is ...,1,0,0,2.216251,94549375-8e92-437b-b842-788dd75df411,ab98f839-4427-4477-95b8-af2af5368325


### Second Evaluation - dope_chain

In [24]:
evaluate(
    dope_chain.invoke,
    data=langsmith_dataset_name,
    evaluators=[
        qa_evaluator,
        labeled_helpfulness_evaluator,
        dopeness_evaluator
    ],
    metadata={"revision_id": "dope_chain"},
)

View the evaluation results for experiment: 'back-night-46' at:
https://smith.langchain.com/o/29b9636b-ddfa-4496-93ee-b2875ed2ee09/datasets/703e7706-7f02-4663-86dd-8695bf183fb8/compare?selectedSessions=82114b75-8c0b-47f1-b849-70976b9af849




0it [00:00, ?it/s]

Unnamed: 0,inputs.question,outputs.output,error,reference.answer,feedback.correctness,feedback.helpfulness,feedback.dopeness,execution_time,example_id,id
0,Based on the data showing that over 70% of Cha...,"Yo, here’s the scoop dripping straight from th...",,"The context indicates that by July 2025, more ...",1,1,1,6.000123,13321055-5845-4a80-af72-e5603a108d46,aecec017-5812-414d-b7e7-fc261a5ac186
1,Based on the rapid growth of ChatGPT usage in ...,"Ah, buckle up—this is where AI usage gets stra...",,"The first segment indicates that by July 2025,...",1,1,1,6.941672,cddbe112-9433-4574-8cc5-96b3d38e5607,765660e7-ff39-4d73-a4a4-fbe462f25642
2,Whats the date in July 2025 that ChatGPT was u...,"Oh heck yes, let's drop some knowledge bombs! ...",,"In July 2025, ChatGPT had been used weekly by ...",1,1,1,2.957055,8af7ba16-a455-4c7c-bc07-2ffb0dfb65c1,b7eeef03-9e4b-455a-8068-2e0e5fe59a73
3,what happen in july 2025 with chatgpt usage an...,"Yo, let’s break down the ChatGPT scene as of *...",,"In july 2025, chatgpt was used very a lot, wit...",1,1,1,14.611715,ef27544d-e81c-4310-9fb0-f84f9eaddd49,4b7226d5-92d3-4c6b-8c8b-fb4c70da22a6
4,chatgpt use by occupation categories how diffe...,"Alright, buckle up for the ultra-slick breakdo...",,Figure 23 shows variation in ChatGPT usage by ...,1,1,1,5.748604,a031271b-d711-491d-8a69-6e8b34f2ebaa,79d2f976-7b2a-40b5-b135-1bdc69f5b7cf
5,How has the rapid global diffusion of AI techn...,"Alright, let’s blast off into the cosmic vibe ...",,"The rapid global diffusion of AI technology, h...",1,1,1,5.736228,363fc6ff-af56-4758-a00d-736f1aeb7c96,e2dfb33f-949c-4848-b46a-7cbe634d2eb9
6,How does the variation in ChatGPT usage by occ...,"Yo, strap in for this occupational odyssey thr...",,"The variation in ChatGPT usage by occupation, ...",1,1,1,5.506113,99fc3ca7-4288-4896-806c-ddef3e55092b,49d16269-2864-449c-8721-ce66bb5831b2
7,How does the concept of management influence t...,"Alright, let's crank this up to eleven. Manage...",,The context indicates that users who are highl...,1,1,1,4.704535,3f7c8864-d904-4612-a141-2b7ac916b2f4,b96d7937-ed26-4a09-85c4-528fc58fbe2f
8,"What are blue-collar occupations, and how do t...","Yo, blue-collar occupations are the gritty fro...",,Blue-collar occupations are included in the br...,1,0,1,3.889654,8cc5e447-4c56-4b87-8a2e-71925a962dc0,026066e3-6f1c-4b11-a96d-3c49e813d58d
9,How does Claude compare to ChatGPT in terms of...,"Alright, strap in for the showdown of AI usage...",,The context indicates that ChatGPT's usage is ...,1,1,1,5.821431,94549375-8e92-437b-b842-788dd75df411,ca8637ff-8642-4d5e-89b7-595fc3b3cf79


## Python Library Versions - from uv.lock


```
[package.metadata]
requires-dist = [
    { name = "jupyter", specifier = ">=1.1.1" },
    { name = "langchain-community", specifier = ">=0.3.16" },
    { name = "langchain-openai", specifier = ">=0.3.3" },
    { name = "langchain-qdrant", specifier = ">=0.2.0" },
    { name = "langgraph", specifier = ">=0.2.69" },
    { name = "nltk", specifier = "==3.8.1" },
    { name = "numpy", specifier = ">=2.2.2" },
    { name = "pymupdf", specifier = ">=1.26.3" },
    { name = "ragas", specifier = "==0.2.10" },
    { name = "unstructured", specifier = ">=0.14.8" },
]
```