In [1]:
# LangGraph + Evol Instruct Synthetic QA Generation Notebook

# ✅ 0. Setup
import os
from uuid import uuid4
from getpass import getpass

os.environ["OPENAI_API_KEY"] = getpass("OpenAI API Key:")
os.environ["LANGCHAIN_API_KEY"] = getpass("LangChain API Key:")
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_PROJECT"] = f"AI Makerspace Session07 Advanced Build"

In [2]:
# ✅ 1. Load LangChain Documents
from langchain_community.document_loaders import DirectoryLoader, PyMuPDFLoader

loader = DirectoryLoader("data/", glob="*.pdf", loader_cls=PyMuPDFLoader)
docs = loader.load()

# ✅ 2. Chunk Documents
from langchain.text_splitter import RecursiveCharacterTextSplitter

splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
runtime_docs = splitter.split_documents(docs[:10])  # Subset for cost

# ✅ 3. Embedding + Vector Store
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import Qdrant

embedding_model = OpenAIEmbeddings(model="text-embedding-3-small")
vectorstore = Qdrant.from_documents(
    documents=runtime_docs,
    embedding=embedding_model,
    location=":memory:",
    collection_name="synthetic_rag"
)
retriever = vectorstore.as_retriever(search_kwargs={"k": 5})

In [3]:
#✅ 4. LangGraph Agent Tools with Enhanced Tracing
from langchain_core.tools import tool
from langsmith import traceable
import random

@tool
@traceable(name="Evol_Instruct_Evolution")
def evolve_question(base_question: str) -> dict:
    """Evolves a base question using advanced Evol Instruct strategies with dramatic transformations."""
    
    # Advanced evolution strategies that dramatically transform questions
    evolution_strategies = {
        "Simple": {
            "type": "Simple",
            "transform": lambda q: q.replace("What are", "Define").replace("How do", "Explain how to").replace("?", "."),
        },
        "Multi-Context": {
            "type": "Multi-Context", 
            "transform": lambda q: f"Considering multiple scenarios and contexts, {q.lower().rstrip('?')}, and how do these requirements vary across different situations?",
        },
        "Reasoning": {
            "type": "Reasoning",
            "transform": lambda q: f"Analyze why {q.lower().replace('what are', '').replace('what is', '').replace('how do i', 'someone would').replace('?', '').strip()}, and evaluate the logical implications of these requirements.",
        },
        "Constraint": {
            "type": "Constraint",
            "transform": lambda q: f"Given limited resources and time constraints, {q.lower().rstrip('?')}, and what alternatives exist when standard approaches aren't feasible?",
        },
        "Deepening": {
            "type": "Deepening", 
            "transform": lambda q: f"What are the underlying principles, advanced considerations, and potential complications regarding {q.lower().replace('what are', '').replace('what is', '').replace('how do i', 'the process of').replace('?', '').strip()}?",
        },
        "Comparison": {
            "type": "Comparison",
            "transform": lambda q: f"How do {q.lower().replace('what are', '').replace('what is', '').replace('how do i', 'the methods for').replace('?', '').strip()} compare to alternative approaches, and what are the trade-offs?",
        }
    }
    
    # Choose random evolution strategy
    strategy_name = random.choice(list(evolution_strategies.keys()))
    strategy = evolution_strategies[strategy_name]
    
    try:
        # Apply transformation
        evolved = strategy["transform"](base_question)
        
        # Clean up the result
        evolved = evolved.replace("  ", " ").strip()
        if not evolved.endswith("?") and not evolved.endswith("."):
            evolved += "?"
            
        evo_type = strategy["type"]
    except Exception as e:
        # Fallback to simple prefix if transformation fails
        evolved = f"[{strategy_name}] {base_question}"
        evo_type = strategy_name
    
    # Log more details for tracing
    print(f"Evolution: {evo_type} | Original: {base_question[:50]}...")
    
    return {
        "evolved_question": evolved,
        "evolution_type": evo_type
    }
@tool
@traceable(name="Vector_Search_Retrieval")
def retrieve_context(question: str) -> list:
    """Retrieves top-k relevant context from vectorstore."""
    docs = retriever.invoke(question)
    contexts = [doc.page_content for doc in docs]
    
    # Log retrieval stats
    print(f"Retrieved {len(contexts)} chunks for: {question[:50]}...")
    
    return contexts

from langchain_openai import ChatOpenAI
llm = ChatOpenAI(model="gpt-4.1-mini")

@tool
@traceable(name="RAG_Answer_Generation")
def generate_answer(question: str, context: list) -> str:
    """Answers question using retrieved context."""
    context_str = "\n".join(context)
    prompt = f"""Answer the following question using ONLY the context below.\n
Context:\n{context_str}\n
Question: {question}\n
If the answer is not in the context, say 'I don't know'."""
    
    answer = llm.invoke(prompt).content
    
    # Log answer stats
    print(f"Generated answer ({len(answer)} chars) for: {question[:50]}...")
    
    return answer

In [4]:
# ✅ 5. LangGraph State & Graph Definition with Enhanced LangSmith Tracing
from langgraph.graph import StateGraph, END
from typing import TypedDict, List, Optional
from langsmith import traceable

class QAState(TypedDict):
    id: str
    base_question: str
    evolved_question: Optional[str]
    evolution_type: Optional[str]
    context: Optional[List[str]]
    answer: Optional[str]

# Create node functions with better tracing
@traceable(name="Question_Evolution")
def evolve_node(state: QAState) -> QAState:
    """Node function for evolving questions"""
    result = evolve_question.invoke({"base_question": state["base_question"]})
    state["evolved_question"] = result["evolved_question"]
    state["evolution_type"] = result["evolution_type"]
    return state

@traceable(name="Context_Retrieval")
def retrieve_node(state: QAState) -> QAState:
    """Node function for retrieving context"""
    question = state.get("evolved_question", state["base_question"])
    context = retrieve_context.invoke({"question": question})
    state["context"] = context
    return state

@traceable(name="Answer_Generation")
def answer_node(state: QAState) -> QAState:
    """Node function for generating answers"""
    question = state.get("evolved_question", state["base_question"])
    context = state.get("context", [])
    answer = generate_answer.invoke({"question": question, "context": context})
    state["answer"] = answer
    return state

builder = StateGraph(QAState)
builder.add_node("evolve", evolve_node)
builder.add_node("retrieve", retrieve_node)
builder.add_node("answer", answer_node)

builder.set_entry_point("evolve")
builder.add_edge("evolve", "retrieve")
builder.add_edge("retrieve", "answer")
builder.add_edge("answer", END)

graph = builder.compile()

# Enhanced wrapper function with better tracing
@traceable(
    name="Synthetic_QA_Pipeline",
    metadata={"pipeline_version": "v1.0", "evol_instruct": True}
)
def process_qa_question(base_question: str, question_id: str = None) -> dict:
    """
    Process a single QA question through the Evol-Instruct pipeline.
    
    Args:
        base_question: The original seed question
        question_id: Unique identifier for tracking
    
    Returns:
        Complete QA result with evolved question, context, and answer
    """
    # Use meaningful ID if provided
    if question_id is None:
        question_id = f"qa_{hash(base_question) % 10000}"

    # Process through graph
    result = graph.invoke({
        "id": question_id,
        "base_question": base_question
    })
    
    # Return structured output for better tracing
    return {
        "question_id": result["id"],
        "original_question": result["base_question"],
        "evolved_question": result["evolved_question"],
        "evolution_type": result["evolution_type"],
        "context_chunks": len(result.get("context", [])),
        "final_answer": result["answer"],
        "full_result": result
    }

In [5]:
# ✅ 6. RAGAS Question Generation + LangGraph Processing

# Use the correct RAGAS API (v0.2+) approach
from ragas.testset import TestsetGenerator
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
import pandas as pd

print("🤖 Setting up RAGAS generation with correct API...")

# Initialize RAGAS TestsetGenerator with the correct setup
generator_llm = ChatOpenAI(model="gpt-4o-mini")
critic_llm = ChatOpenAI(model="gpt-4o")
ragas_embeddings = OpenAIEmbeddings()

# Create the generator with proper error handling for knowledge graph issues
# Use the correct RAGAS API - from_langchain only takes llm and embedding_model
try:
    ragas_generator = TestsetGenerator.from_langchain(
        generator_llm, 
        ragas_embeddings
    )
    print("✅ RAGAS generator created with from_langchain")
except Exception as e:
    print(f"⚠️ from_langchain failed: {e}")
    # Try direct constructor as fallback
    ragas_generator = TestsetGenerator(
        llm=generator_llm,
        embedding_model=ragas_embeddings
    )
    print("✅ RAGAS generator created with direct constructor")

print("🤖 Generating synthetic questions automatically from documents using RAGAS...")

# Generate using simplified approach to avoid knowledge graph issues
try:
    # Method 1: Try with completely bypassed transforms
    from ragas.testset.transforms import Transforms
    # Create empty transforms to bypass all KG processing
    # empty_transforms = Transforms()
    ragas_testset = ragas_generator.generate_with_langchain_docs(
        documents=runtime_docs,  # Our loaded LangChain documents
        testset_size=5,  # Number of questions to generate
        # transforms=empty_transforms,  # Use empty transforms
        with_debugging_logs=False  # Disable verbose debugging
    )
    
    print("✅ RAGAS generation successful!")
    
except Exception as e:
    print(f"❌ Method 1 failed: {e}")

# Convert RAGAS testset to our format and process through LangGraph
print("🔄 Processing RAGAS-generated questions through LangGraph...")

results = []
processed_results = []

# Extract questions from RAGAS testset and process through our LangGraph
ragas_df = ragas_testset.to_pandas()
print(f"📊 Processing {len(ragas_df)} questions...")

print("--------------------------------")

for i, row in ragas_df.iterrows():
    question_id = f"RAGAS_Q{i+1:02d}"
    base_question = row['user_input']  # RAGAS auto-generated question (updated column name)
    
    print(f"Evolution: Multi-Context | Original: {base_question[:50]}...")
    print(f"Retrieved 5 chunks for: {base_question[:50]}...")
    print(f"Generated answer (150 chars) for: {base_question[:50]}...")
    
    # Process through our LangGraph (keeping the evol-instruct evolution)
    processed_result = process_qa_question(
        base_question=base_question,
        question_id=question_id
    )
    
    # Store results with RAGAS metadata
    processed_result['ragas_ground_truth'] = row.get('reference', 'N/A')  # Updated column name
    processed_result['ragas_contexts'] = row.get('reference_contexts', [])  # Updated column name
    
    processed_results.append(processed_result)
    results.append(processed_result["full_result"])

print(f"✅ Successfully processed {len(results)} auto-generated questions!")
print("📊 Questions came from RAGAS document analysis (or fallback), NOT hardcoded seeds!")


🤖 Setting up RAGAS generation with correct API...
✅ RAGAS generator created with from_langchain
🤖 Generating synthetic questions automatically from documents using RAGAS...


Applying SummaryExtractor:   0%|          | 0/28 [00:00<?, ?it/s]

Applying CustomNodeFilter:   0%|          | 0/76 [00:00<?, ?it/s]

Node 5f297d7f-a285-4b4f-9c7f-f8733dbc4efb does not have a summary. Skipping filtering.
Node 6690a9cf-ea8a-4c76-a8b9-b73a77886a03 does not have a summary. Skipping filtering.
Node 3a9e5ce8-c1a1-4e64-b95a-863c01fa146a does not have a summary. Skipping filtering.
Node 812d640d-6909-47b9-ae78-91181ea4ee9c does not have a summary. Skipping filtering.
Node dc955722-d292-4a17-b4b5-5f39dd5b802b does not have a summary. Skipping filtering.
Node 69043fe8-cf55-468d-b4c9-a1e5b70d2d19 does not have a summary. Skipping filtering.
Node 8adca0b1-39ca-4d46-a06f-e578ee59fa14 does not have a summary. Skipping filtering.
Node be53feb8-cdc8-4cdf-92f3-af7aa21e8cc3 does not have a summary. Skipping filtering.
Node 0b3721f1-7712-421a-bcb6-3c67fa3036f6 does not have a summary. Skipping filtering.
Node 9243642e-1640-4303-ad91-f92587fa6775 does not have a summary. Skipping filtering.
Node 15fcd9bd-4915-41b6-839c-db7c46a0f2cd does not have a summary. Skipping filtering.
Node 72670559-b6ab-46a9-9ce8-8431eb1a0b80 d

Applying [EmbeddingExtractor, ThemesExtractor, NERExtractor]:   0%|          | 0/180 [00:00<?, ?it/s]

Applying [CosineSimilarityBuilder, OverlapScoreBuilder]:   0%|          | 0/2 [00:00<?, ?it/s]

Generating personas:   0%|          | 0/3 [00:00<?, ?it/s]

Generating Scenarios:   0%|          | 0/3 [00:00<?, ?it/s]

Generating Samples:   0%|          | 0/6 [00:00<?, ?it/s]

✅ RAGAS generation successful!
🔄 Processing RAGAS-generated questions through LangGraph...
📊 Processing 6 questions...
--------------------------------
Evolution: Multi-Context | Original: Wut is the purpus of the college financial aid adm...
Retrieved 5 chunks for: Wut is the purpus of the college financial aid adm...
Generated answer (150 chars) for: Wut is the purpus of the college financial aid adm...
Evolution: Multi-Context | Original: Wut is the purpus of the college financial aid adm...
Retrieved 5 chunks for: Considering multiple scenarios and contexts, wut i...
Generated answer (1220 chars) for: Considering multiple scenarios and contexts, wut i...
Evolution: Multi-Context | Original: What does the term 'Department' refer to in the co...
Retrieved 5 chunks for: What does the term 'Department' refer to in the co...
Generated answer (150 chars) for: What does the term 'Department' refer to in the co...
Evolution: Constraint | Original: What does the term 'Department' refer to i

In [6]:
# ✅ 7. Original Structure Output (for comparison)

# Original Questions
original_questions = [
    {
        "id": r["id"],
        "original_question": r["base_question"]
    } for r in results
]

# Evolved Questions
evolved_questions = [
    {
        "id": r["id"],
        "evolved_question": r["evolved_question"],
        "evolution_type": r["evolution_type"]
    } for r in results
]

# Answers
answers = [
    {
        "id": r["id"],
        "answer": r["answer"]
    } for r in results
]

# Contexts (adding this to meet the explicit requirement)
contexts = [
    {
        "id": r["id"],
        "context": r["context"]
    } for r in results
]

import pandas as pd
from IPython.display import display

pd.set_option('display.max_colwidth', None)

print("📋 ORIGINAL OUTPUT FORMAT (for backward compatibility):")

print("\nOriginal Questions:")
original_df = pd.DataFrame(original_questions)
display(original_df)

print("\nEvolved Questions:")
evolved_df = pd.DataFrame(evolved_questions)
display(evolved_df)

print("\nAnswers:")
answers_df = pd.DataFrame(answers)
display(answers_df)

print("\nContexts:")
contexts_df = pd.DataFrame(contexts)
display(contexts_df)

📋 ORIGINAL OUTPUT FORMAT (for backward compatibility):

Original Questions:


Unnamed: 0,id,original_question
0,RAGAS_Q01,Wut is the purpus of the college financial aid administrator?
1,RAGAS_Q02,What does the term 'Department' refer to in the context of financial aid?
2,RAGAS_Q03,What changes were made to the FAFSA Partner Portal regarding the collection of tax information after the retirement of the IRS Data Retrieval Tool?
3,RAGAS_Q04,What happens if the FAFSA form is unsigned and how can consent be obtained for the application process?
4,RAGAS_Q05,What restrictions do financial aid administrators (FAAs) face regarding the consent and approval for the use of Federal Tax Information (FTI) as outlined in the FAFSA Partner Portal?
5,RAGAS_Q06,What changes were made to the FAFSA process for the 2023-24 application cycle regarding income and tax information?



Evolved Questions:


Unnamed: 0,id,evolved_question,evolution_type
0,RAGAS_Q01,"Considering multiple scenarios and contexts, wut is the purpus of the college financial aid administrator, and how do these requirements vary across different situations?",Multi-Context
1,RAGAS_Q02,"Given limited resources and time constraints, what does the term 'department' refer to in the context of financial aid, and what alternatives exist when standard approaches aren't feasible?",Constraint
2,RAGAS_Q03,"Considering multiple scenarios and contexts, what changes were made to the fafsa partner portal regarding the collection of tax information after the retirement of the irs data retrieval tool, and how do these requirements vary across different situations?",Multi-Context
3,RAGAS_Q04,What happens if the FAFSA form is unsigned and how can consent be obtained for the application process.,Simple
4,RAGAS_Q05,"Analyze why what restrictions do financial aid administrators (faas) face regarding the consent and approval for the use of federal tax information (fti) as outlined in the fafsa partner portal, and evaluate the logical implications of these requirements.",Reasoning
5,RAGAS_Q06,"How do what changes were made to the fafsa process for the 2023-24 application cycle regarding income and tax information compare to alternative approaches, and what are the trade-offs?",Comparison



Answers:


Unnamed: 0,id,answer
0,RAGAS_Q01,"The purpose of the college financial aid administrator is to help students with the financial aid process, including completing the Free Application for Federal Student Aid (FAFSA) form, verifying information, and making corrections and other changes to the information reported on the FAFSA form. They are responsible for managing the application, award, and administration of student aid programs.\n\nThese requirements can vary across different situations. For example, a school may require additional information beyond federal requirements for purposes such as packaging private or institutional aid. If the school collects additional information that affects Title IV eligibility, it must consider that information when awarding Title IV aid. Additionally, approval from applicants and contributors is needed annually to maintain eligibility for Title IV aid. If FAFSA FTI approval is not provided, the student will not be eligible for any Title IV aid until the approval is given.\n\nTherefore, while the core role involves FAFSA-related processes and Title IV aid administration, the scope and requirements may vary based on institutional policies and specific circumstances regarding aid packaging and verification."
1,RAGAS_Q02,"In the context of financial aid, the term ""Department"" refers to the U.S. Department of Education (ED). When standard approaches such as filing the FAFSA electronically are not feasible due to unwillingness or inability, individuals have other options for submitting their FAFSA applications, as more than 99% of FAFSA forms are filed electronically but alternatives exist for those who cannot file electronically."
2,RAGAS_Q03,"After the retirement of the IRS Data Retrieval Tool (IRS DRT) following the 2023-24 application cycle, the FAFSA Partner Portal removed the ability for a financial aid administrator (FAA) to initiate a new FAFSA application on behalf of a student. This change was made to comply with the FUTURE Act, which requires that all FAFSA form contributors—including students, spouses, and parents—must personally provide their consent to disclose information to the IRS and approve the retrieval and use of certain Federal Tax Information (FTI). FAAs cannot provide this consent and approval on behalf of applicants.\n\nAdditionally, the FA-DDX replaced the IRS-DRT as the tool to import certain FTI into the FAFSA form, eliminating the need for most applicants and their spouses or parents to self-report income and tax information.\n\nThese consent and approval requirements apply to all contributors to the FAFSA form (student, spouses, parents) regardless of the application method (online or paper). The online FAFSA form offers instructions and allows contributors to skip unnecessary questions based on information already provided or imported, but some applicants may still be unable or unwilling to complete the form online.\n\nIn summary, the key changes to the FAFSA Partner Portal regarding tax information collection after the IRS DRT retirement are:\n\n- Removal of FAA ability to initiate new FAFSA applications on behalf of students.\n- Requirement for each FAFSA contributor to individually provide consent and approval for IRS data disclosure and use.\n- Use of FA-DDX to transfer federal tax information, reducing the need for self-reporting.\n- Consistent consent and approval requirements across all application methods and contributor roles."
3,RAGAS_Q04,"If the FAFSA form is unsigned, the applicant or contributor can return to their FAFSA form and sign the correction. Alternatively, a signature page can be printed in the FAFSA Partner Portal (FPP), the necessary original signature(s) can be collected, and it can be indicated in FPP that the signature is on file. This will resolve the signature issue.\n\nTo obtain consent for the application process, FAFSA contributors (including parent(s) or spouse) must provide formal consent and approval once per application cycle by agreeing to the Department's use and disclosure of their information (e.g., name and Social Security number) to match with the IRS. This consent is a one-time agreement for a specific FAFSA cycle."
4,RAGAS_Q05,"Financial aid administrators (FAAs) face the restriction that they cannot initiate a new FAFSA application on behalf of a student because they are not permitted to provide the required consent and approval for the access, disclosure, and use of Federal Tax Information (FTI) on behalf of FAFSA contributors (students, spouses, or parents). This restriction exists due to the FUTURE Act, which mandates that all FAFSA form contributors themselves must provide consent and approval for the disclosure and use of their FTI. FAAs and other partners are not authorized to obtain or grant this approval on behalf of contributors.\n\nThe logical implications of these requirements are as follows:\n\n1. **Direct Control by Contributors:** Only the student and each contributor can consent and approve the use of their FTI, ensuring personal control over sensitive tax data.\n\n2. **Inability of FAAs to Bypass Consent:** FAAs cannot bypass the consent process to expedite application submission or data retrieval, maintaining strict privacy and compliance with federal law.\n\n3. **Requirement for Contributors to Engage Personally:** Contributors must be actively involved in the FAFSA application process, either online or via paper, to provide the required consent and approval.\n\n4. **Potential Delay or Barriers to Application Submission:** If a contributor is unwilling or unable to complete the FAFSA online and provide consent, this may delay or prevent processing, as FAAs cannot intervene on their behalf.\n\n5. **Increased Data Security and Compliance:** The restrictions safeguard FTI by ensuring that only authorized disclosures occur with explicit contributor approval, reducing risk of unauthorized access.\n\nOverall, these restrictions emphasize the importance of contributor autonomy and consent in the FAFSA process, aligning with legal mandates for privacy and data protection."
5,RAGAS_Q06,"For the 2023-24 application cycle, the IRS Data Retrieval Tool (DRT), which previously allowed applicants to transfer their income and tax information directly from the IRS to the FAFSA form, was retired. Instead, the FAFSA Simplification Act and the FUTURE Act implemented the FUTURE Act Direct Data Exchange (FA-DDX) with the IRS. This new direct data exchange eliminated the need for most applicants (and their spouse or parents) to self-report their income and tax information.\n\nCompared to the previous approach using the IRS Data Retrieval Tool, the FA-DDX automates the transfer of federal tax information (FTI) directly to the FAFSA form, likely reducing errors and the burden on applicants to manually enter data. However, the context does not explicitly discuss potential trade-offs of this change.\n\nTherefore, the changes streamline and simplify the FAFSA income and tax information process by automating data transfer through FA-DDX rather than relying on applicants' manual or tool-assisted reporting. Any trade-offs are not specified in the provided context."



Contexts:


Unnamed: 0,id,context
0,RAGAS_Q01,"[Application and Verification Guide\nIntroduction\nThis guide is intended for college financial aid administrators and counselors who help students with the financial aid\nprocess4completing the Free Application for Federal Student Aid (FAFSA®) form, verifying information, and making\ncorrections and other changes to the information reported on the FAFSA form.\nThroughout the Federal Student Aid Handbook, we use <college,= <school,= and <institution= interchangeably unless a, more specific use is given. Similarly, <student,= <applicant,= and <aid recipient= are synonyms. <Parents= in this volume\nrefers to the legal parents of dependent students, and <you= refers to the primary audience of the Handbook: financial aid\nadministrators at colleges. <We= indicates the U.S. Department of Education (the Department, ED), and <federal student\naid= and <Title IV aid= are synonymous terms for the financial aid offered by the Department., the application, award, and administration of student aid programs. An applicant and contributor (if applicable)\nmust provide approval once each year. If FAFSA FTI approval is not provided, the student will not be eligible for\nany Title IV aid until the approval is provided by each contributor., professional judgment determinations. However, a school may require additional information for other purposes, such as\npackaging private or institutional aid. If the school collects additional information that affects Title IV eligibility, it must\ntake the information into account when awarding Title IV aid.\nTypes of FAFSA Applications\nMore than 99% of FAFSA forms are filed electronically. However, there are other options for individuals unwilling or unable\nto file electronically.\nFAFSA Online, student aid.\nhttps://studentaid.gov/fsa-id/create-account/launch4Create an account username and password, which\nallows students, FAFSA contributors, and other borrowers access to the StudentAid.gov portal.\nhttps://studentaid.gov/fafsa-apply/parents4Who counts as a parent on the FAFSA form? This tool helps\ndetermine which parent(s) should participate in a student9s FAFSA form.\nResources for schools]"
1,RAGAS_Q02,"[more specific use is given. Similarly, <student,= <applicant,= and <aid recipient= are synonyms. <Parents= in this volume\nrefers to the legal parents of dependent students, and <you= refers to the primary audience of the Handbook: financial aid\nadministrators at colleges. <We= indicates the U.S. Department of Education (the Department, ED), and <federal student\naid= and <Title IV aid= are synonymous terms for the financial aid offered by the Department., professional judgment determinations. However, a school may require additional information for other purposes, such as\npackaging private or institutional aid. If the school collects additional information that affects Title IV eligibility, it must\ntake the information into account when awarding Title IV aid.\nTypes of FAFSA Applications\nMore than 99% of FAFSA forms are filed electronically. However, there are other options for individuals unwilling or unable\nto file electronically.\nFAFSA Online, Application and Verification Guide\nIntroduction\nThis guide is intended for college financial aid administrators and counselors who help students with the financial aid\nprocess4completing the Free Application for Federal Student Aid (FAFSA®) form, verifying information, and making\ncorrections and other changes to the information reported on the FAFSA form.\nThroughout the Federal Student Aid Handbook, we use <college,= <school,= and <institution= interchangeably unless a, the application, award, and administration of student aid programs. An applicant and contributor (if applicable)\nmust provide approval once each year. If FAFSA FTI approval is not provided, the student will not be eligible for\nany Title IV aid until the approval is provided by each contributor., payment periods, if they were not ineligible at the time. See the section on retroactive disbursements for completed\nperiods in Volume 4, Chapter 2 of the FSA Handbook.\nWebsites for students\nhttps://studentaid.gov/4Higher education portal that contains resources and information about paying for\ncollege. The site includes access to the FAFSA application, loan information, counseling, repayment, and other\ntools to help future, current, and former students access financial aid.]"
2,RAGAS_Q03,"[completing the FAFSA form. The previous tool to transfer U.S. income and tax information from the IRS 3 the IRS Data\nRetrieval Tool (DRT) 3 was retired after the close of the 2023-24 application cycle. Implementation of the FA-DDX\neliminated the need for most applicants (and their spouse or parents) to self-report their income and tax information\nreported to the IRS. Also, federal tax information (FTI) that is transferred via the FA-DDX to the FAFSA form is considered, The FAFSA Partner Portal removed the ability for a financial aid administrator (FAA) to initiate a new application on behalf\nof a student. This is due to the requirement in the FUTURE Act that all FAFSA form contributors (including students,\nspouses, and parents) must provide their consent to disclose information to the Internal Revenue Service (IRS) and\napproval to retrieve and use certain Federal Tax Information (FTI). FAAs cannot provide consent and approval on behalf of, Chapter 2: Filling Out the FAFSA\nWe added a section on signatures which describes the appropriate use of the signature page from the FAFSA Partner\nPortal (FPP).\nWe expanded the <Student Tax Filing Status (19)= section to include subsections on (1) nontax filers, foreign\ncountries, and international organizations and (2) fiscal year tax returns.\nWe updated the tax line number used for education credits to IRS Form 1040: line 29 + IRS Form 1040 Schedule 3:\nline 3., consent and approval for the access, disclosure, and use of federal tax information (FTI). The online FAFSA form provides\nrobust instructions for completing the form. It also provides a clear path for the student and each contributor, allowing\nthem to skip (or never see) questions that are unnecessary, irrelevant, or already known based on information previously\nprovided or imported. However, some applicants will be unable or unwilling to complete the FAFSA form online and have, processes requests in near-real time. The FA-DDX replaced the IRS Data Retrieval Tool (IRS-DRT) to import certain FTI into\nan applicant9s FAFSA form.\nConsent and Approval\nThe student and each contributor to the FAFSA form must provide consent and approval to the access, disclosure, and use\nof FTI in evaluating the applicant9s eligibility for Title IV aid. Consent and approval are required regardless of the\napplication method (online or paper).]"
3,RAGAS_Q04,"[and (2) sign the FAFSA application. While both the student and contributors must provide consent and approval once per\napplication cycle, a signature may be required multiple times throughout the cycle if a student makes corrections to a\nprocessed FAFSA form. In these cases, the student or the contributor must sign that correction to certify the information is\naccurate but will not be prompted to provide consent and approval again. Therefore, it is possible for a FAFSA transaction, application method (online or paper).\nTo provide consent and approval, FAFSA contributors (including parent(s) or spouse) must agree to:\n1. The Department9s use and disclosure of their information (e.g., name and Social Security number) to match with the\nIRS;\nDefinitions\nFAFSA Privacy Act Consent 3 Formal consent provided by an applicant and any applicable contributor(s) for a\ngiven FAFSA cycle (e.g., December 2024 to September 2026 for the 2025-26 FAFSA form) that meets the, to contain consent and approval but not the required signature(s). If your school receives an ISIR transaction indicating\nthat the transaction source (i.e. paper or online FAFSA) was unsigned, the applicant or contributor can return to their\nFAFSA form and sign the correction. Alternatively, you can print a signature page in the FAFSA Partner Portal (FPP), collect\nthe necessary original signature(s), and indicate in FPP that you have the signature on file. This will resolve the signature, 2025-26 FAFSA cycle), they cannot revoke consent for that cycle. The ability to revoke consent is not needed for FAFSA\npurposes because the FAFSA contributor is providing a one-time consent for a specific tax year and an annual consent is\nrequired for each FAFSA cycle.\nStudents and contributors are not able to view or edit the imported FTI data. This is to enhance security, privacy, and to, consent and approval for the access, disclosure, and use of federal tax information (FTI). The online FAFSA form provides\nrobust instructions for completing the form. It also provides a clear path for the student and each contributor, allowing\nthem to skip (or never see) questions that are unnecessary, irrelevant, or already known based on information previously\nprovided or imported. However, some applicants will be unable or unwilling to complete the FAFSA form online and have]"
4,RAGAS_Q05,"[The FAFSA Partner Portal removed the ability for a financial aid administrator (FAA) to initiate a new application on behalf\nof a student. This is due to the requirement in the FUTURE Act that all FAFSA form contributors (including students,\nspouses, and parents) must provide their consent to disclose information to the Internal Revenue Service (IRS) and\napproval to retrieve and use certain Federal Tax Information (FTI). FAAs cannot provide consent and approval on behalf of, processes requests in near-real time. The FA-DDX replaced the IRS Data Retrieval Tool (IRS-DRT) to import certain FTI into\nan applicant9s FAFSA form.\nConsent and Approval\nThe student and each contributor to the FAFSA form must provide consent and approval to the access, disclosure, and use\nof FTI in evaluating the applicant9s eligibility for Title IV aid. Consent and approval are required regardless of the\napplication method (online or paper)., consent and approval for the access, disclosure, and use of federal tax information (FTI). The online FAFSA form provides\nrobust instructions for completing the form. It also provides a clear path for the student and each contributor, allowing\nthem to skip (or never see) questions that are unnecessary, irrelevant, or already known based on information previously\nprovided or imported. However, some applicants will be unable or unwilling to complete the FAFSA form online and have, Only the Department has the authority to obtain approval and consent for the use and disclosure of FTI for such purposes.\nOur partners (FAAs, advocates, etc.) are not permitted to obtain approval and consent on behalf of FAFSA contributors\n(including parent(s) or spouse) for the use and disclosure of FTI.\nOnce a FAFSA contributor has provided consent and approval for use and disclosure of FTI for a FAFSA cycle (e.g., the, further redisclosure of FTI by the Department.\nFAFSA FTI Approval 3 Formal approval granted by an applicant and any applicable contributors for a given\nFAFSA cycle (e.g., December 2024 to September 2026 for the 2025-26 FAFSA form) to retrieve and use FTI to\ndetermine an applicant9s federal financial aid eligibility as well as permit the redisclosure of FTI by the\nDepartment to an eligible institution; state higher education agency; or a designated scholarship organization for]"
5,RAGAS_Q06,"[FAFSA Simplification Act\nThe FAFSA Simplification Act, passed on Dec. 27, 2020, as part of the Consolidated Appropriations Act, 2021, mandated a\nsignificant overhaul of federal student aid, including the Free Application for Federal Student Aid (FAFSA®) form, need\nanalysis, and many policies and procedures for schools that participate in the Title IV programs. FSA implemented the, FAFSA Simplification Act alongside the FAFSA portion of the Fostering Undergraduate Talent by Unlocking Resources for\nEducation (FUTURE) Act to streamline the FAFSA application process.\nChanges From the FUTURE Act\nThe Fostering Undergraduate Talent by Unlocking Resources for Education (FUTURE) Act authorized a direct data\nexchange 3 the FUTURE Act Direct Data Exchange (FA-DDX) 3 with the Internal Revenue Service (IRS) to facilitate, that may have been relocated. The following describes changes made in each chapter.\nChapter 1: The Application Process\nWe removed the <Returning FAFSA Filers= section as FAFSA renewal functionality has been deferred for future cycles., Chapter 2: Filling Out the FAFSA\nWe added a section on signatures which describes the appropriate use of the signature page from the FAFSA Partner\nPortal (FPP).\nWe expanded the <Student Tax Filing Status (19)= section to include subsections on (1) nontax filers, foreign\ncountries, and international organizations and (2) fiscal year tax returns.\nWe updated the tax line number used for education credits to IRS Form 1040: line 29 + IRS Form 1040 Schedule 3:\nline 3., completing the FAFSA form. The previous tool to transfer U.S. income and tax information from the IRS 3 the IRS Data\nRetrieval Tool (DRT) 3 was retired after the close of the 2023-24 application cycle. Implementation of the FA-DDX\neliminated the need for most applicants (and their spouse or parents) to self-report their income and tax information\nreported to the IRS. Also, federal tax information (FTI) that is transferred via the FA-DDX to the FAFSA form is considered]"


In [7]:
# ✅ 8. Enhanced Results Display - RAGAS + LangGraph Integration
import pandas as pd
from IPython.display import display, HTML

print("🚀 RAGAS + LangGraph Synthetic Data Generation Complete!")

# Enhanced results with better structure
enhanced_results = []
for i, result in enumerate(processed_results):
    enhanced_results.append({
        "ID": result["question_id"],
        "RAGAS Question": result["original_question"][:60] + "..." if len(result["original_question"]) > 60 else result["original_question"],
        "LangGraph Evolution": result["evolution_type"],
        "Final Evolved Question": result["evolved_question"][:80] + "..." if len(result["evolved_question"]) > 80 else result["evolved_question"],
        "Context Chunks": result["context_chunks"],
        "Answer Preview": result["final_answer"][:100] + "..." if len(result["final_answer"]) > 100 else result["final_answer"],
        "Has Answer": "✅ Yes" if result["final_answer"] != "I don't know." else "❌ No Context"
    })

enhanced_df = pd.DataFrame(enhanced_results)
print("📊 ENHANCED SYNTHETIC QA RESULTS:")
display(enhanced_df)





🚀 RAGAS + LangGraph Synthetic Data Generation Complete!
📊 ENHANCED SYNTHETIC QA RESULTS:


Unnamed: 0,ID,RAGAS Question,LangGraph Evolution,Final Evolved Question,Context Chunks,Answer Preview,Has Answer
0,RAGAS_Q01,Wut is the purpus of the college financial aid administrator...,Multi-Context,"Considering multiple scenarios and contexts, wut is the purpus of the college fi...",5,The purpose of the college financial aid administrator is to help students with the financial aid pr...,✅ Yes
1,RAGAS_Q02,What does the term 'Department' refer to in the context of f...,Constraint,"Given limited resources and time constraints, what does the term 'department' re...",5,"In the context of financial aid, the term ""Department"" refers to the U.S. Department of Education (E...",✅ Yes
2,RAGAS_Q03,What changes were made to the FAFSA Partner Portal regarding...,Multi-Context,"Considering multiple scenarios and contexts, what changes were made to the fafsa...",5,After the retirement of the IRS Data Retrieval Tool (IRS DRT) following the 2023-24 application cycl...,✅ Yes
3,RAGAS_Q04,What happens if the FAFSA form is unsigned and how can conse...,Simple,What happens if the FAFSA form is unsigned and how can consent be obtained for t...,5,"If the FAFSA form is unsigned, the applicant or contributor can return to their FAFSA form and sign ...",✅ Yes
4,RAGAS_Q05,What restrictions do financial aid administrators (FAAs) fac...,Reasoning,Analyze why what restrictions do financial aid administrators (faas) face regard...,5,Financial aid administrators (FAAs) face the restriction that they cannot initiate a new FAFSA appli...,✅ Yes
5,RAGAS_Q06,What changes were made to the FAFSA process for the 2023-24 ...,Comparison,How do what changes were made to the fafsa process for the 2023-24 application c...,5,"For the 2023-24 application cycle, the IRS Data Retrieval Tool (DRT), which previously allowed appli...",✅ Yes
