In [None]:
'''
Self RAG Implementation
Author: Christian Sarmiento
Purpose: This notebook is intended to get a Self-RAG implementation set up with LangChain/LangGraph.
Date Created: 11/17/24
Last Updated: 11/29/24
Data: Marist College Administrative Corpus Dataset
Sources:
- https://blog.langchain.dev/agentic-rag-with-langgraph/
- https://github.com/langchain-ai/langgraph/blob/main/examples/rag/langgraph_self_rag.ipynb?ref=blog.langchain.dev
- ChatGPT: o1-preview
Note: Most of the code for graph implementation of Self-RAG was taken from the second source.
-----------------------------------------------------------------------------------------------------------------------
RAG Research             |               Machine Learning Independent Study             |              DR. EITEL LAURIA
'''

In [None]:
%pip install langgraph

In [None]:
!pip install langchain_anthropic

In [21]:
!pip install -qU langchain-cohere

In [14]:
!pip install -qU langchain_huggingface

In [None]:
# Imports
import sys
sys.path.append("/Users/christiansarmiento/Library/CloudStorage/OneDrive-MaristCollege/Machine Learning/Private Code")
from api_keys import openAIKey
from api_keys import langchainKey
from api_keys import anthropicKey
from langchain_openai import ChatOpenAI
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain import hub
from langchain_core.output_parsers import StrOutputParser
from langchain.schema import Document
from langchain_chroma import Chroma
from langchain_openai import OpenAIEmbeddings
from typing import List, Dict, Optional, Annotated
from typing_extensions import TypedDict
from langgraph.graph import END, StateGraph, START
from langchain_core.runnables import RunnablePassthrough
from langchain.schema.runnable import RunnableLambda
from langchain.prompts import PromptTemplate
from langchain_anthropic import ChatAnthropic
from langchain_huggingface import HuggingFacePipeline


from huggingface_hub import login
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline


import pandas as pd
import os
import gradio as gr  # easy frontend implementation
from pprint import pprint
import numpy as np
import operator
import time


from ragas.metrics import LLMContextRecall, Faithfulness, FactualCorrectness, SemanticSimilarity
from ragas.llms import LangchainLLMWrapper
from ragas.embeddings import LangchainEmbeddingsWrapper
from ragas import SingleTurnSample

In [13]:
# LangChain Enviornment Variables
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_API_KEY"] = langchainKey()
os.environ["OPENAI_API_KEY"] = openAIKey()

In [3]:
# Load Data
csvPath = "/Users/christiansarmiento/Library/CloudStorage/OneDrive-MaristCollege/Machine Learning/Data/Cleaned_QA.csv"
maristQA = pd.read_csv(csvPath, header=None)

# To use RecursiveCharacterTextSplitter, we need a list of dictionaries
maristContext = [Document(page_content=text) for text in maristQA[1].tolist()]

In [4]:
# Split Documents into Chunks
textSplitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200, add_start_index=True)
texts = textSplitter.split_documents(maristContext)

In [5]:
# Store Documents in Vector DB (Chroma)
vectorDB = Chroma.from_documents(documents=texts, embedding=OpenAIEmbeddings())

# Setup Retrieval System
retriever = vectorDB.as_retriever(search_type="similarity", search_kwargs={"k": 3})  # Retrieves 3 documents

In [6]:
# Define Grader class for document grading in Self-RAG
class GradeDocuments(BaseModel):
    """Binary score for relevance check on retrieved documents."""

    binary_score: str = Field(
        description="Documents are relevant to the question, 'yes' or 'no'"
    )

# LLM with function call
llmDocGrader = ChatOpenAI(model="gpt-4o-mini", temperature=0)
structuredLLMGrader = llmDocGrader.with_structured_output(GradeDocuments)

# Prompt
system = """You are a grader assessing relevance of a retrieved document to a user question. \n 
    It does not need to be a stringent test. The goal is to filter out erroneous retrievals. \n
   tic meaning related to the user question, grade it as relevant. \n
    Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question."""
gradePrompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        ("human", "Retrieved document: \n\n {document} \n\n User question: {question}"),
    ]
)

# Grader Chain
retrievalGrader = gradePrompt | structuredLLMGrader

# Testing it
question = "agent memory"
docs = retriever.get_relevant_documents(question)
docTxt = docs[1].page_content
print(retrievalGrader.invoke({"question": question, "document": docTxt}))

  docs = retriever.get_relevant_documents(question)


binary_score='no'


In [7]:
# Generation Chain

# Prompt
prompt = hub.pull("rlm/rag-prompt")

# LLM
llm = ChatOpenAI(model_name="gpt-4o-mini", temperature=0)

# Post-processing
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

# Chain
ragChain = prompt | llm | StrOutputParser()

# Run
generation = ragChain.invoke({"context": docs, "question": question})
print(generation)

I don't know.


In [8]:
# Define Hallucination Grader Class
class GradeHallucinations(BaseModel):
    """Binary score for hallucination present in generation answer."""

    binary_score: str = Field(
        description="Answer is grounded in the facts, 'yes' or 'no'"
    )


# LLM with function call
llmHallucinations = ChatOpenAI(model="gpt-4o-mini", temperature=0)
structuredLLMHallucinationGrader = llmHallucinations.with_structured_output(GradeHallucinations)

# Prompt
system = """You are a grader assessing whether an LLM generation is grounded in / supported by a set of retrieved facts. \n 
     Give a binary score 'yes' or 'no'. 'Yes' means that the answer is grounded in / supported by the set of facts."""
hallucinationPrompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        ("human", "Set of facts: \n\n {documents} \n\n LLM generation: {generation}"),
    ]
)

hallucinationGrader = hallucinationPrompt | structuredLLMHallucinationGrader
hallucinationGrader.invoke({"documents": docs, "generation": generation})

GradeHallucinations(binary_score='no')

In [9]:
# Answer Grader
class GradeAnswer(BaseModel):
    """Binary score to assess answer addresses question."""

    binary_score: str = Field(
        description="Answer addresses the question, 'yes' or 'no'"
    )

# LLM with function call
llmAnswerGrading = ChatOpenAI(model="gpt-4o-mini", temperature=0)
structuredLLMAnswerGrader = llmAnswerGrading.with_structured_output(GradeAnswer)

# Prompt
system = """You are a grader assessing whether an answer addresses / resolves a question \n 
     Give a binary score 'yes' or 'no'. Yes' means that the answer resolves the question."""
answerPrompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        ("human", "User question: \n\n {question} \n\n LLM generation: {generation}"),
    ]
)

# Chain
answerGrader = answerPrompt | structuredLLMAnswerGrader
answerGrader.invoke({"question": question, "generation": generation})

GradeAnswer(binary_score='no')

In [10]:
# Question Rewriter
llmRewriter = ChatOpenAI(model="gpt-4o-mini", temperature=0)

# Prompt
system = """You a question re-writer that converts an input question to a better version that is optimized \n 
     for vectorstore retrieval. Look at the input and try to reason about the underlying semantic intent / meaning."""
rewritePrompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        (
            "human",
            "Here is the initial question: \n\n {question} \n Formulate an improved question.",
        ),
    ]
)

questionRewriter = rewritePrompt | llm | StrOutputParser()
questionRewriter.invoke({"question": question})

'What are the key concepts and techniques related to agent memory in artificial intelligence?'

In [11]:
# Define metric evaluator

## Evaluation LLM & embeddings
evalLLM = LangchainLLMWrapper(ChatOpenAI(model="gpt-4o-mini", temperature=0))
evalEmbeddings = LangchainEmbeddingsWrapper(OpenAIEmbeddings())

## Initialize metrics with LLM and embeddings
contextRecall = LLMContextRecall(llm=evalLLM)
faithfulness = Faithfulness(llm=evalLLM)
factualCorrectness = FactualCorrectness(llm=evalLLM)
semanticSimilarity = SemanticSimilarity(embeddings=evalEmbeddings)

## Collect metrics
evalMetrics = [
    contextRecall,
    faithfulness,
    factualCorrectness,
    semanticSimilarity
]

In [137]:
# Define Graph structure for Self-RAG

# Graph State
class GraphState(TypedDict):
    """
    Represents the state of our graph.

    Attributes:
        question: question
        generation: LLM generation
        documents: list of documents
        metrics: evaluation metrics for each generation 
    """
    question: Annotated[str, "question_metadata"]
    generation: str
    documents: List[str]
    metrics: Optional[Dict[str, float]]
    retry_count: int = 0


## Nodes

# Retrieval Node     
def retrieve(state):
    """
    Retrieve documents

    Args:
        state (dict): The current graph state

    Returns:
        state (dict): New key added to state, documents, that contains retrieved documents
    """
    print("---RETRIEVE---")
    question = state["question"]

    # Initialize retry count if not set
    print(f"Current Retry Count: {state["retry_count"]}")

    # Retrieval
    documents = retriever.get_relevant_documents(question)
    return {"documents": documents, "question": question, "retry_count": state["retry_count"]}


# Generation Node
def generate(state):
    """
    Generate answer

    Args:
        state (dict): The current graph state

    Returns:
        state (dict): New key added to state, generation, that contains LLM generation
    """
    print(f"Current Retry Count: {state['retry_count']}")
    print("---GENERATE---")
    question = state["question"]
    documents = state["documents"]
    retry_count = state["retry_count"]


    # RAG generation
    generation = ragChain.invoke({"context": documents, "question": question})

    # Return updated state
    updatedState = {"documents": documents, "question": question, "generation": generation, "retry_count": retry_count}

    return updatedState

# Grader Node
def gradeDocuments(state):
    """
    Determines whether the retrieved documents are relevant to the question.

    Args:
        state (dict): The current graph state

    Returns:
        state (dict): Updates documents key with only filtered relevant documents
    """
    print("---CHECK DOCUMENT RELEVANCE TO QUESTION---")
    print(f"Current Retry Count: {state['retry_count']}")
    question = state["question"]
    documents = state["documents"]
    retry_count = state["retry_count"]

    # Recursion base case
    if retry_count > 5:
        print("---MAX RETRY LIMIT REACHED IN TRANSFORM QUERY: FORCING END---")
        return {"documents": state["documents"], "question": state["question"], "retry_count": retry_count}
    
    # Score each doc
    filtered_docs = []
    for d in documents:

        score = retrievalGrader.invoke(
            {"question": question, "document": d.page_content}
        )
        grade = score.binary_score
        if grade == "yes":
            print("---GRADE: DOCUMENT RELEVANT---")
            filtered_docs.append(d)

        else:
            print("---GRADE: DOCUMENT NOT RELEVANT---")
            continue

    return {"documents": filtered_docs, "question": question, "retry_count": retry_count}


# Rewriter node
def transformQuery(state):
    """
    Transform the query to produce a better question.

    Args:
        state (dict): The current graph state

    Returns:
        state (dict): Updates question key with a re-phrased question
    """

    print("---TRANSFORM QUERY---")
    question = state["question"]
    documents = state["documents"]
    retry_count = state["retry_count"]

    # Recursion base case
    print(f"Retry Count: {retry_count}")
    if retry_count > 5:
        print("---MAX RETRY LIMIT REACHED IN TRANSFORM QUERY: FORCING END---")
        return {"documents": state["documents"], "question": state["question"], "retry_count": retry_count}
    
    # Re-write question
    better_question = questionRewriter.invoke({"question": question})
    return {"documents": documents, "question": better_question, "retry_count": retry_count + 1}


## Edges

# Generation edge
def decideToGenerate(state):
    """
    Determines whether to generate an answer, or re-generate a question.

    Args:
        state (dict): The current graph state

    Returns:
        str: Binary decision for next node to call
    """

    print("---ASSESS GRADED DOCUMENTS---")
    state["question"]
    filtered_documents = state["documents"]
    retry_count = state["retry_count"]

    # Recursion base case
    print(f"Current Retry Count: {retry_count}")
    if retry_count > 5:  
        print("---MAX RETRY LIMIT REACHED: STOPPING---")
        return "generate"
    
    # All documents have been filtered check_relevance
    # We will re-generate a new query
    if not filtered_documents:
        print(
            "---DECISION: ALL DOCUMENTS ARE NOT RELEVANT TO QUESTION, TRANSFORM QUERY---"
        )
        return "transformQuery"
    
    # We have relevant documents, so generate answer
    else:
        print("---DECISION: GENERATE---")
        return "generate"


def grade_generation_v_documents_and_question(state):
    """
    Determines whether the generation is grounded in the document and answers question.

    Args:
        state (dict): The current graph state

    Returns:
        str: Decision for next node to call
    """

    print("---CHECK HALLUCINATIONS---")
    print(f"Current Retry Count: {state['retry_count']}")
    
    question = state["question"]
    documents = state["documents"]
    generation = state["generation"]
    
    score = hallucinationGrader.invoke(
        {"documents": documents, "generation": generation}
    )
    grade = score.binary_score
    
    # Recursion base case
    retry_count = state["retry_count"]
    print(f"Current Retry Count: {retry_count}")
    if retry_count > 5:  
        print("---MAX RETRY LIMIT REACHED: STOPPING---")
        return "useful"
    
    # Check hallucination
    if grade == "yes":
        print("---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---")

        # Check question-answering
        print("---GRADE GENERATION vs QUESTION---")
        score = answerGrader.invoke({"question": question, "generation": generation})
        grade = score.binary_score

        if grade == "yes":
            print("---DECISION: GENERATION ADDRESSES QUESTION---")
            return "useful"
        
        else:
            print("---DECISION: GENERATION DOES NOT ADDRESS QUESTION---")
            return "not useful"
        
    else:
        pprint("---DECISION: GENERATION IS NOT GROUNDED IN DOCUMENTS, RE-TRY---")
        return "not supported"

async def evaluateMetrics(state):
    """
    Evaluate metrics for the current RAG pipeline response.

    Args:
        state (dict): The current graph state.

    Returns:
        state (dict): Adds a 'metrics' key containing evaluation scores.
    """
    print("---EVALUATING METRICS---")
    retry_count = state.get("retry_count", 0)
    print(f"Final Retry Count Before Reset: {retry_count}")
    state["retry_count"] = 0
    question = state["question"]
    generation = state["generation"]
    documents = state["documents"]

    # Mock ground truth if unavailable (replace with actual reference if possible)
    groundTruth = state.get("groundTruth", "Expected answer based on context.")

    # Prepare retrieved contexts
    retrievedContexts = [doc.page_content for doc in documents]

    # Create a SingleTurnSample object
    sample = SingleTurnSample(
        user_input=question,
        response=generation,
        reference=groundTruth,
        retrieved_contexts=retrievedContexts,
    )

    # Evaluate metrics
    state["metrics"] = {
        "LLMContextRecall": await contextRecall.single_turn_ascore(sample),
        "Faithfulness": await faithfulness.single_turn_ascore(sample),
        "FactualCorrectness": await factualCorrectness.single_turn_ascore(sample),
        "SemanticSimilarity": await semanticSimilarity.single_turn_ascore(sample),
    }

    return state

In [138]:
# Build Self-RAG Graph
workflow = StateGraph(GraphState)

# Define the nodes
workflow.add_node("retrieve", retrieve)  
workflow.add_node("gradeDocuments", gradeDocuments)  
workflow.add_node("generate", generate)  
workflow.add_node("transformQuery", transformQuery)  
workflow.add_node("evaluateMetrics", evaluateMetrics)

# Build graph
workflow.add_edge(START, "retrieve")
workflow.add_edge("retrieve", "gradeDocuments")
workflow.add_conditional_edges(
    "gradeDocuments",
    decideToGenerate,
    {
        "transformQuery": "transformQuery",
        "generate": "generate"
    },
)
workflow.add_edge("transformQuery", "retrieve")
workflow.add_conditional_edges(
    "generate",
    grade_generation_v_documents_and_question,
    {
        "not supported": "generate",
        "useful": "evaluateMetrics",
        "not useful": "transformQuery",
    },
)
workflow.add_edge("generate", "evaluateMetrics")
workflow.add_edge("evaluateMetrics", END)


# Compile
app = workflow.compile()


In [None]:
# Inital test run
inputs = {"question": "Who is Carolyn Matheus?"}
async for output in app.astream(inputs):
    for key, value in output.items():

        # Print node
        pprint(f"Node '{key}':")

        # Optional: print full state at each node
        # pprint.pprint(value["keys"], indent=2, width=80, depth=None)

        # Print metrics
        if "metrics" in value:
            pprint("Metrics: ")
            pprint(value["metrics"])

    pprint("\n---\n")

# Final generation
pprint("Final Generation: ")
pprint(value["generation"])

# Final metrics
if "metrics" in value:
    pprint("Final Metrics: ")
    pprint(value["metrics"])

---RETRIEVE---
"Node 'retrieve':"
'\n---\n'
---CHECK DOCUMENT RELEVANCE TO QUESTION---
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---ASSESS GRADED DOCUMENTS---
---DECISION: GENERATE---
"Node 'gradeDocuments':"
'\n---\n'
---GENERATE---
---CHECK HALLUCINATIONS---
---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---
---GRADE GENERATION vs QUESTION---
---DECISION: GENERATION ADDRESSES QUESTION---
"Node 'generate':"
'\n---\n'
---EVALUATING METRICS---
"Node 'evaluateMetrics':"
'Metrics: '
{'FactualCorrectness': 0.0,
 'Faithfulness': 1.0,
 'LLMContextRecall': 0.0,
 'SemanticSimilarity': 0.7259663840810067}
'\n---\n'
'Final Generation: '
('Dr. Carolyn C. Matheus is an Associate Professor of Information Systems and '
 'the Director of the Honors Program at Marist College. She holds a PhD in '
 'Organizational Studies with a focus on leadership from SUNY Albany and has '
 'received the National Society of Leadership and Success award for Excellenc

In [139]:
# Put SelfRAG into Gradio
evaluationSamples = []
async def selfRAG(userQuery, history, correctAnswer):
    """
    Gradio-compatible function to process SelfRAG workflow.
    Args:
        userQuery (str): The user's question.
        history (list): Conversation history.
        correctAnswer (str): The ground truth answer for metrics (optional).

    Returns:
        tuple: (chatDisplay, history)
    """

    # Get our input together
    inputs = {"question": userQuery, "retry_count": 0}

    # Start the workflow
    finalOutput = None
    async for output in app.astream(inputs, config={"recursion_limit": 100}):

        # Saving final output for metric purposes
        finalOutput = output

        # Printing out each node state for clarity
        for key, value in output.items():

            # Print node
            pprint(f"Node '{key}':")

            # Print metrics
            if "metrics" in value:
                pprint("Metrics: ")
                pprint(value["metrics"])
    
    # Get the generation and its metrics
    finalNodeKey = list(finalOutput.keys())[-1]  # Get the key of the last executed node
    nodeOutput = finalOutput[finalNodeKey]  # Access the nested state
    generation = nodeOutput.get("generation", "No generation produced.")
    metrics = nodeOutput.get("metrics", {})

    # Update history
    if history is None:
        history = []
    
    history.extend([
        {"role": "user", "content": userQuery},
        {"role": "llm", "content": generation}
    ])

    # Display output for gradio
    chatDisplay = [(msg["content"], "User" if msg["role"] == "user" else "LLM") for msg in history]

    # Append metrics to evaluationSamples for tracking (if correctAnswer is provided)
    if correctAnswer:
        evaluationSamples.append({
            "user_input": userQuery,
            "retrieved_contexts": [doc.page_content for doc in finalOutput.get("documents", [])],
            "response": generation,
            "reference": correctAnswer,
            "metrics": metrics,
        })

    

    return history  #, chatDisplay

In [None]:
# Gradio frontend
interface = gr.Interface(
    fn=selfRAG,
    inputs=[
        gr.Textbox(label="Ask a Question", placeholder="Enter your question here..."),
        gr.State(),  # Keeps track of conversation history
        gr.Textbox(label="Correct Answer (Optional)", placeholder="For evaluation purposes..."),
    ],
    outputs=[
        gr.Chatbot(label="SelfRAG Conversation"),
        gr.State(),  # Updates conversation history
    ],
    title="Self-RAG Implementation",
    description="Interact with the Self-RAG workflow for document-grounded question answering.",
)

# Launch the interface
interface.launch()



* Running on local URL:  http://127.0.0.1:7871

To create a public link, set `share=True` in `launch()`.




---RETRIEVE---
"Node 'retrieve':"
---CHECK DOCUMENT RELEVANCE TO QUESTION---
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---ASSESS GRADED DOCUMENTS---
---DECISION: GENERATE---
"Node 'gradeDocuments':"
---GENERATE---
---CHECK HALLUCINATIONS---
---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---
---GRADE GENERATION vs QUESTION---
---DECISION: GENERATION ADDRESSES QUESTION---
"Node 'generate':"
---EVALUATING METRICS---
"Node 'evaluateMetrics':"
'Metrics: '
{'FactualCorrectness': 0.0,
 'Faithfulness': 1.0,
 'LLMContextRecall': 0.0,
 'SemanticSimilarity': 0.7259663840810067}
---RETRIEVE---
"Node 'retrieve':"
---CHECK DOCUMENT RELEVANCE TO QUESTION---
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---ASSESS GRADED DOCUMENTS---
---DECISION: GENERATE---
"Node 'gradeDocuments':"
---GENERATE---
---CHECK HALLUCINATIONS---
---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---
---GRADE GENERATION vs QUESTIO

In [140]:
# Function to evaluate our RAG pipeline when given ground truth
async def pipelineEvaluation(dataset, metrics):

    # Run through our runs
    results = []
    for run in dataset:

        # Save our inputs/outputs
        inputQuery = run["user_input"]
        groundTruthAnswer = run["reference"]
        contexts = run["retrieved_contexts"]
        response = run["response"]

        # Create a SingleTurnSample object
        sample = SingleTurnSample(
            user_input=inputQuery,
            response=response,
            reference=groundTruthAnswer,
            retrieved_contexts=contexts 
        )

        # Evaluate metrics
        runResults = {"input_query": inputQuery}
        for metric in metrics:

            # Get the score for the given metric
            try:

                score = await metric.single_turn_ascore(sample)
                runResults[type(metric).__name__] = score

            except Exception as e:
                # Catch errors for debugging
                runResults[type(metric).__name__] = f"Error: {str(e)}"
        
        # Save metric results
        results.append(runResults)
    
    # Calculate mean and standard deviation for each metric
    metricsStats = {}
    for metric in metrics:
        metricName = type(metric).__name__
        scores = [result[metricName] for result in results if isinstance(result[metricName], (int, float))]
        
        # Only calculate stats if there are valid scores
        if scores:
            metricsStats[metricName] = {
                "mean": np.mean(scores),
                "std_dev": np.std(scores),
            }
            
        else:
            metricsStats[metricName] = {
                "mean": "No valid scores",
                "std_dev": "No valid scores",
            }
    
    return results, metricsStats
    

In [16]:
# Load metrics
evalMetrics = [LLMContextRecall(llm=LangchainLLMWrapper(llm)), 
               FactualCorrectness(llm=LangchainLLMWrapper(llm)), 
               Faithfulness(llm=LangchainLLMWrapper(llm)), 
               SemanticSimilarity(embeddings=LangchainEmbeddingsWrapper(OpenAIEmbeddings()))]

In [61]:
# Evaluate our pipeline responses
evalResults = await pipelineEvaluation(evaluationSamples, evalMetrics)
for result in evalResults:
    print(result)

{'input_query': 'Who is Carolyn Matheus?', 'LLMContextRecall': 0.0, 'FactualCorrectness': 0.0, 'Faithfulness': 0.0, 'SemanticSimilarity': 0.928909234587568}


In [136]:
# Sample 222 records from our dataset
maristTestSample = maristQA.sample(50, replace=False)
maristTestSample.head()

Unnamed: 0,0,1
91,Graduate school GPA requirement.,Graduate: FAQsFrequently asked questions about...
398,Interactive media department info,"Film, TV, Games, and Interactive Media Departm..."
123,Who is the writing center director?,Contact InformationAcademic SchoolOfficeEmailP...
182,What is Marist perspective on diversity and in...,"Diversity and Inclusion at MaristAt Marist, we..."
514,Mark James Morreale history?,Contact InformationAcademic SchoolOfficeEmailP...


In [141]:
# Run our chain with each question and evaluate
chatHistory = None
for row in maristTestSample.iterrows():
    chatHistory = await selfRAG(row[1][0], chatHistory, row[1][1])
    
    
## Evaluation
evalResults, metricStats = await pipelineEvaluation(evaluationSamples, evalMetrics)
for result in evalResults:
    print(result)

for metric in metricStats.keys():
    print(f"{metric} - Mean: {metricStats[metric]['mean']}, St. Dev: {metricStats[metric]['std_dev']}")

---RETRIEVE---
Current Retry Count: 0
"Node 'retrieve':"
---CHECK DOCUMENT RELEVANCE TO QUESTION---
Current Retry Count: 0
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---ASSESS GRADED DOCUMENTS---
Current Retry Count: 0
---DECISION: GENERATE---
"Node 'gradeDocuments':"
Current Retry Count: 0
---GENERATE---
---CHECK HALLUCINATIONS---
Current Retry Count: 0
Current Retry Count: 0
---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---
---GRADE GENERATION vs QUESTION---
---DECISION: GENERATION ADDRESSES QUESTION---
"Node 'generate':"
---EVALUATING METRICS---
Final Retry Count Before Reset: 0
"Node 'evaluateMetrics':"
'Metrics: '
{'FactualCorrectness': 0.0,
 'Faithfulness': 1.0,
 'LLMContextRecall': 0.0,
 'SemanticSimilarity': 0.7184164073522444}
---RETRIEVE---
Current Retry Count: 0
"Node 'retrieve':"
---CHECK DOCUMENT RELEVANCE TO QUESTION---
Current Retry Count: 0
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMEN

InvalidUpdateError: At key 'question': Can receive only one value per step. Use an Annotated key to handle multiple values.
For troubleshooting, visit: https://python.langchain.com/docs/troubleshooting/errors/INVALID_CONCURRENT_GRAPH_UPDATE

# Adding Simple RAG Node
- there are certain questions within our dataset that can cause GraphRecursionErrors, which is an infinite loop within our graph that specifically happens when the process can't come up with a good generation and tries rewriting the query until this error occurs
- In our process above, we have a flag that stops this error and tells the process to generate what it has, which inherently hurts performance since there is a big possibility of us generating on documents that are not relelvant to the query at all. 
- To mitigate this, instead of having the process generate what it has upon reaching the stopping condition, we will add a node the the graph that will run a simple RAG chain if this occurs. 

In [46]:
# Define SimpleRAG chains (taken from Simple RAG notebook in this github repo)

## LLM 
llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)

## Retriever wrapper to be able to return the retrieved documents from this process
captureRetriever = RunnableLambda(lambda query: retriever.get_relevant_documents(query))
retrievedDocsStorage = {"docs": None}
captureRetriever = RunnableLambda(
    lambda query: (retrievedDocsStorage.update({"docs": retriever.get_relevant_documents(query)})) or retrievedDocsStorage["docs"]
)
    
## Function to format documents into the prompt
def formatDocs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

## Setup RAG Chain
prompt = hub.pull("rlm/rag-prompt")
simpleRagChain = (
    {"context": captureRetriever | formatDocs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [None]:
# Define Graph structure for Self-RAG

# Graph State
class GraphState(TypedDict):
    """
    Represents the state of our graph.

    Attributes:
        question: question
        generation: LLM generation
        documents: list of documents
        metrics: evaluation metrics for each generation 
    """
    question: Annotated[str, operator.add]
    generation: str
    documents: List[str]
    metrics: Optional[Dict[str, float]]
    retry_count: int = 0


## Nodes

# Retrieval Node     
def retrieve(state):
    """
    Retrieve documents

    Args:
        state (dict): The current graph state

    Returns:
        state (dict): New key added to state, documents, that contains retrieved documents
    """
    print("---RETRIEVE---")
    question = state["question"]

    # Initialize retry count if not set
    print(f"Current Retry Count: {state["retry_count"]}")

    # Retrieval
    documents = retriever.get_relevant_documents(question)
    return {"documents": documents, "question": question, "retry_count": state["retry_count"]}


# Generation Node
def generate(state):
    """
    Generate answer

    Args:
        state (dict): The current graph state

    Returns:
        state (dict): New key added to state, generation, that contains LLM generation
    """
    print(f"Current Retry Count: {state['retry_count']}")
    print("---GENERATE---")
    question = state["question"]
    documents = state["documents"]
    retry_count = state["retry_count"]


    # RAG generation
    generation = ragChain.invoke({"context": documents, "question": question})

    # Return updated state
    updatedState = {"documents": documents, "question": question, "generation": generation, "retry_count": retry_count}

    return updatedState

# Grader Node
def gradeDocuments(state):
    """
    Determines whether the retrieved documents are relevant to the question.

    Args:
        state (dict): The current graph state

    Returns:
        state (dict): Updates documents key with only filtered relevant documents
    """
    print("---CHECK DOCUMENT RELEVANCE TO QUESTION---")
    print(f"Current Retry Count: {state['retry_count']}")
    question = state["question"]
    documents = state["documents"]
    retry_count = state["retry_count"]

    # Recursion base case
    if retry_count > 1:
        print("---MAX RETRY LIMIT REACHED IN TRANSFORM QUERY: FORCING END---")
        return {"documents": state["documents"], "question": state["question"], "retry_count": retry_count}
    
    # Score each doc
    filtered_docs = []
    for d in documents:

        score = retrievalGrader.invoke(
            {"question": question, "document": d.page_content}
        )
        grade = score.binary_score
        if grade == "yes":
            print("---GRADE: DOCUMENT RELEVANT---")
            filtered_docs.append(d)

        else:
            print("---GRADE: DOCUMENT NOT RELEVANT---")
            continue

    return {"documents": filtered_docs, "question": question, "retry_count": retry_count}


# Rewriter node
def transformQuery(state):
    """
    Transform the query to produce a better question.

    Args:
        state (dict): The current graph state

    Returns:
        state (dict): Updates question key with a re-phrased question
    """

    print("---TRANSFORM QUERY---")
    question = state["question"]
    documents = state["documents"]
    retry_count = state["retry_count"]

    # Recursion base case
    print(f"Retry Count: {retry_count}")
    if retry_count > 1:
        print("---MAX RETRY LIMIT REACHED IN TRANSFORM QUERY: FORCING END---")
        return {"documents": state["documents"], "question": state["question"], "retry_count": retry_count}
    
    # Re-write question
    betterQuestion = questionRewriter.invoke({"question": question})
    return {"documents": documents, "question": betterQuestion, "retry_count": retry_count + 1}

def simpleRAG(state):
    
    print("---SIMPLE RAG---")
    question = state["question"]
    documents = state["documents"]
    retry_count = state["retry_count"]


    # RAG generation
    generation = ""
    for chunk in simpleRagChain.stream(question):
        generation += chunk
    
    print(generation)

    # Get retrieved context
    newContext = retrievedDocsStorage["docs"]
    print(f"Context: {newContext}")
    

    # Return updated state
    updatedState = {"documents": newContext, "question": question, "generation": generation, "retry_count": retry_count}

    return updatedState
    
## Edges

# Generation edge
def decideToGenerate(state):
    """
    Determines whether to generate an answer, or re-generate a question.

    Args:
        state (dict): The current graph state

    Returns:
        str: Binary decision for next node to call
    """

    print("---ASSESS GRADED DOCUMENTS---")
    state["question"]
    filtered_documents = state["documents"]
    retry_count = state["retry_count"]

    # Recursion base case
    print(f"Current Retry Count: {retry_count}")
    if retry_count > 1:  
        print("---MAX RETRY LIMIT REACHED: STOPPING---")
        return "simpleRAG"
    
    # All documents have been filtered check_relevance
    # We will re-generate a new query
    if not filtered_documents:
        print(
            "---DECISION: ALL DOCUMENTS ARE NOT RELEVANT TO QUESTION, TRANSFORM QUERY---"
        )
        return "transformQuery"
    
    # We have relevant documents, so generate answer
    else:
        print("---DECISION: GENERATE---")
        return "generate"


def grade_generation_v_documents_and_question(state):
    """
    Determines whether the generation is grounded in the document and answers question.

    Args:
        state (dict): The current graph state

    Returns:
        str: Decision for next node to call
    """

    print("---CHECK HALLUCINATIONS---")
    print(f"Current Retry Count: {state['retry_count']}")
    
    question = state["question"]
    documents = state["documents"]
    generation = state["generation"]
    
    score = hallucinationGrader.invoke(
        {"documents": documents, "generation": generation}
    )
    grade = score.binary_score
    
    # Recursion base case
    retry_count = state["retry_count"]
    print(f"Current Retry Count: {retry_count}")
    if retry_count > 1:  
        print("---MAX RETRY LIMIT REACHED: STOPPING---")
        return "useful"
    
    # Check hallucination
    if grade == "yes":
        print("---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---")

        # Check question-answering
        print("---GRADE GENERATION vs QUESTION---")
        score = answerGrader.invoke({"question": question, "generation": generation})
        grade = score.binary_score

        if grade == "yes":
            print("---DECISION: GENERATION ADDRESSES QUESTION---")
            return "useful"
        
        else:
            print("---DECISION: GENERATION DOES NOT ADDRESS QUESTION---")
            return "not useful"
        
    else:
        pprint("---DECISION: GENERATION IS NOT GROUNDED IN DOCUMENTS, RE-TRY---")
        return "not supported"

async def evaluateMetrics(state):
    """
    Evaluate metrics for the current RAG pipeline response.

    Args:
        state (dict): The current graph state.

    Returns:
        state (dict): Adds a 'metrics' key containing evaluation scores.
    """
    print("---EVALUATING METRICS---")
    retry_count = state.get("retry_count", 0)
    print(f"Final Retry Count Before Reset: {retry_count}")
    state["retry_count"] = 0
    question = state["question"]
    generation = state["generation"]
    documents = state["documents"]

    # Mock ground truth if unavailable (replace with actual reference if possible)
    groundTruth = state.get("groundTruth", "Expected answer based on context.")

    # Prepare retrieved contexts
    retrievedContexts = [doc.page_content for doc in documents]

    # Create a SingleTurnSample object
    sample = SingleTurnSample(
        user_input=question,
        response=generation,
        reference=groundTruth,
        retrieved_contexts=retrievedContexts,
    )

    # Evaluate metrics
    state["metrics"] = {
        "LLMContextRecall": await contextRecall.single_turn_ascore(sample),
        "Faithfulness": await faithfulness.single_turn_ascore(sample),
        "FactualCorrectness": await factualCorrectness.single_turn_ascore(sample),
        "SemanticSimilarity": await semanticSimilarity.single_turn_ascore(sample),
    }

    return state

In [48]:
# Build Self-RAG Graph
workflow = StateGraph(GraphState)

# Define the nodes
workflow.add_node("retrieve", retrieve)  
workflow.add_node("gradeDocuments", gradeDocuments)  
workflow.add_node("generate", generate)  
workflow.add_node("transformQuery", transformQuery) 
workflow.add_node("simpleRAG", simpleRAG)
workflow.add_node("evaluateMetrics", evaluateMetrics)

# Build graph
workflow.add_edge(START, "retrieve")
workflow.add_edge("retrieve", "gradeDocuments")
workflow.add_conditional_edges(
    "gradeDocuments",
    decideToGenerate,
    {
        "transformQuery": "transformQuery",
        "generate": "generate",
        "simpleRAG": "simpleRAG"
    },
)
workflow.add_edge("transformQuery", "retrieve")
workflow.add_conditional_edges(
    "generate",
    grade_generation_v_documents_and_question,
    {
        "not supported": "generate",
        "useful": "evaluateMetrics",
        "not useful": "transformQuery",
    },
)
workflow.add_edge("generate", "evaluateMetrics")
workflow.add_edge("simpleRAG", "evaluateMetrics")
workflow.add_edge("evaluateMetrics", END)


# Compile
app = workflow.compile()


In [49]:
# Put our process into a function
evaluationSamples = []
ragResults = []
async def selfRAG(userQuery, history, correctAnswer):
    """
    Gradio-compatible function to process SelfRAG workflow.
    Args:
        userQuery (str): The user's question.
        history (list): Conversation history.
        correctAnswer (str): The ground truth answer for metrics (optional).

    Returns:
        list: history
    """

    # Variables
    inputs = {"question": userQuery, "retry_count": 0}
    finalOutput = None
    retryFlag = False

    # Start the workflow
    async for output in app.astream(inputs, config={"recursion_limit": 100}):

        # Saving final output for metric purposes
        finalOutput = output

        # Check if the process went into a loop (generated answer with simpleRAG)
        retryFlag = "simpleRAG" in output

        # Printing out each node state for clarity
        for key, value in output.items():

            # Print node
            pprint(f"Node '{key}':")

            # Print metrics
            if "metrics" in value:
                pprint("Metrics: ")
                pprint(value["metrics"])
    
    # Get the generation and its metrics
    finalNodeKey = list(finalOutput.keys())[-1]  # Get the key of the last executed node
    nodeOutput = finalOutput[finalNodeKey]  # Access the nested state
    generation = nodeOutput.get("generation", "No generation produced.")
    metrics = nodeOutput.get("metrics", {})

    # Update history
    if history is None:
        history = []
    
    history.extend([
        {"role": "user", "content": userQuery},
        {"role": "llm", "content": generation}
    ])

    # Append metrics to evaluationSamples for tracking (if correctAnswer is provided)
    if correctAnswer:
        evaluationSamples.append({
            "user_input": userQuery,
            "retrieved_contexts": [doc.page_content for doc in finalOutput.get("documents", [])],
            "response": generation,
            "reference": correctAnswer,
            "metrics": metrics,
        })
    
    # Save the result of the query with the metrics 
    ragResults.append({
        "question": userQuery,
        "generation": generation,
        "retry": retryFlag
    })

    return history  

In [50]:
# Function to evaluate our RAG pipeline when given ground truth
async def pipelineEvaluation(dataset, metrics):

    # Run through our runs
    results = []
    for run in dataset:

        # Save our inputs/outputs
        inputQuery = run["user_input"]
        groundTruthAnswer = run["reference"]
        contexts = run["retrieved_contexts"]
        response = run["response"]

        # Create a SingleTurnSample object
        sample = SingleTurnSample(
            user_input=inputQuery,
            response=response,
            reference=groundTruthAnswer,
            retrieved_contexts=contexts 
        )

        # Evaluate metrics
        runResults = {"question": inputQuery}
        for metric in metrics:

            # Get the score for the given metric
            try:

                score = await metric.single_turn_ascore(sample)
                runResults[type(metric).__name__] = score

            except Exception as e:
                # Catch errors for debugging
                runResults[type(metric).__name__] = f"Error: {str(e)}"
        
        # Save metric results
        results.append(runResults)
    
    # Calculate mean and standard deviation for each metric
    metricsStats = {}
    for metric in metrics:
        metricName = type(metric).__name__
        scores = [result[metricName] for result in results if isinstance(result[metricName], (int, float))]
        
        # Only calculate stats if there are valid scores
        if scores:
            metricsStats[metricName] = {
                "mean": np.mean(scores),
                "std_dev": np.std(scores),
            }
            
        else:
            metricsStats[metricName] = {
                "mean": "No valid scores",
                "std_dev": "No valid scores",
            }
    
    return results, metricsStats
    

In [51]:
# Load metrics
evalMetrics = [LLMContextRecall(llm=LangchainLLMWrapper(llm)), 
               FactualCorrectness(llm=LangchainLLMWrapper(llm)), 
               Faithfulness(llm=LangchainLLMWrapper(llm)), 
               SemanticSimilarity(embeddings=LangchainEmbeddingsWrapper(OpenAIEmbeddings()))]

In [52]:
# Sample records from our dataset
maristTestSample = maristQA.sample(10, replace=False)
maristTestSample.head()

Unnamed: 0,0,1
141,are classes at marist big?,AcademicsAcademicsNo matter your academic inte...
91,Graduate school GPA requirement.,Graduate: FAQsFrequently asked questions about...
371,Professional Accountancy graduate earnings,Master of Science in Professional Accountancy ...
335,phone applications,"An Entrepreneur for AccessibilityMarch 25, 201..."
617,Graduate Study in the UK,Center for Career Services Department Center f...


In [53]:
# Run our chain with each question and evaluate
chatHistory = None
for row in maristTestSample.iterrows():
    print(f"Question: {row[1][0]}")
    chatHistory = await selfRAG(row[1][0], chatHistory, row[1][1])
    
## Evaluation
evalResults, metricStats = await pipelineEvaluation(evaluationSamples, evalMetrics)
for result in evalResults:
    print(result)

for metric in metricStats.keys():
    print(f"{metric} - Mean: {metricStats[metric]['mean']}, St. Dev: {metricStats[metric]['std_dev']}")

Question: are classes at marist big?
---RETRIEVE---
Current Retry Count: 0
"Node 'retrieve':"
---CHECK DOCUMENT RELEVANCE TO QUESTION---
Current Retry Count: 0
---GRADE: DOCUMENT NOT RELEVANT---
---GRADE: DOCUMENT NOT RELEVANT---
---GRADE: DOCUMENT NOT RELEVANT---
---ASSESS GRADED DOCUMENTS---
Current Retry Count: 0
---DECISION: ALL DOCUMENTS ARE NOT RELEVANT TO QUESTION, TRANSFORM QUERY---
"Node 'gradeDocuments':"
---TRANSFORM QUERY---
Retry Count: 0
"Node 'transformQuery':"
---RETRIEVE---
Current Retry Count: 1
"Node 'retrieve':"
---CHECK DOCUMENT RELEVANCE TO QUESTION---
Current Retry Count: 1
---GRADE: DOCUMENT NOT RELEVANT---
---GRADE: DOCUMENT NOT RELEVANT---
---GRADE: DOCUMENT NOT RELEVANT---
---ASSESS GRADED DOCUMENTS---
Current Retry Count: 1
---DECISION: ALL DOCUMENTS ARE NOT RELEVANT TO QUESTION, TRANSFORM QUERY---
"Node 'gradeDocuments':"
---TRANSFORM QUERY---
Retry Count: 1
"Node 'transformQuery':"
---RETRIEVE---
Current Retry Count: 2
"Node 'retrieve':"
---CHECK DOCUMENT 

BadRequestError: Error code: 400 - {'error': {'message': "This model's maximum context length is 128000 tokens. However, your messages resulted in 192770 tokens. Please reduce the length of the messages.", 'type': 'invalid_request_error', 'param': 'messages', 'code': 'context_length_exceeded'}}

In [182]:
# Make our dataframe of results
processResults = pd.DataFrame(ragResults)
evalResultsDF = pd.DataFrame(evalResults)
finalResultsDF = processResults.merge(evalResultsDF, on="question", how="left")
finalResultsDF.head()

Unnamed: 0,question,generation,retry,LLMContextRecall,FactualCorrectness,Faithfulness,SemanticSimilarity
0,What are the various Marist School codes?,The retrieved context does not provide specifi...,False,0.0,0.22,0.6,0.850097
1,Tell me more about the allied health building,"The Allied Health building, completed in Janua...",False,0.894737,0.32,0.0,0.831505
2,email of Dr. Wermuth?,I don't know.,False,0.166667,0.0,0.0,0.740081
3,How long is the IMC degree?,The IMC degree can be completed in three to fo...,False,0.818182,0.35,0.0,0.845312
4,How many states / countries are represented at...,Marist welcomes students from nearly 70 countr...,False,0.25,0.14,0.0,0.917484


In [None]:
# Averages & St. devs
print(f"LLM Context Recall - Mean: {finalResultsDF["LLMContextRecall"].mean()}, St. Dev: {finalResultsDF["LLMContextRecall"].std()}")
print(f"FactualCorrectness - Mean: {finalResultsDF["FactualCorrectness"].mean()}, St. Dev: {finalResultsDF["FactualCorrectness"].std()}")
print(f"Faithfulness - Mean: {finalResultsDF["Faithfulness"].mean()}, St. Dev: {finalResultsDF["Faithfulness"].std()}")
print(f"SemanticSimilarity - Mean: {finalResultsDF["SemanticSimilarity"].mean()}, St. Dev: {finalResultsDF["SemanticSimilarity"].std()}")


LLM Context Recall - Mean: 0.42591706539074964, St. Dev: 0.4041121592597289
FactualCorrectness - Mean: 0.20600000000000002, St. Dev: 0.1420563268566381
Faithfulness - Mean: 0.12, St. Dev: 0.2683281572999748
SemanticSimilarity - Mean: 0.8368958429569708, St. Dev: 0.06352500214746996


# Keeping Track of Graph Loops

In [12]:
# Define Graph structure for Self-RAG

def overwriteOperator(_, new_value):
    return new_value

# Graph State
class GraphState(TypedDict):
    """
    Represents the state of our graph.

    Attributes:
        question: question
        generation: LLM generation
        documents: list of documents
        metrics: evaluation metrics for each generation 
    """
    question: Annotated[str, operator.add]
    generation: Annotated[str, operator.add]
    documents: Annotated[List[str], overwriteOperator]
    metrics: Annotated[Optional[Dict[str, float]], lambda x, y: {**x, **y} if x and y else x or y]
    retry_count: Annotated[int, operator.add]


## Nodes

# Retrieval Node     
def retrieve(state):
    """
    Retrieve documents

    Args:
        state (dict): The current graph state

    Returns:
        state (dict): New key added to state, documents, that contains retrieved documents
    """
    print("---RETRIEVE---")
    question = state["question"]

    # Initialize retry count if not set
    print(f"Current Retry Count: {state["retry_count"]}")

    # Retrieval
    documents = retriever.get_relevant_documents(question)
    return {"documents": documents, "question": question, "retry_count": state["retry_count"]}


# Generation Node
def generate(state):
    """
    Generate answer

    Args:
        state (dict): The current graph state

    Returns:
        state (dict): New key added to state, generation, that contains LLM generation
    """
    print(f"Current Retry Count: {state['retry_count']}")
    print("---GENERATE---")
    question = state["question"]
    documents = state["documents"]
    retry_count = state["retry_count"]

    # RAG generation
    generation = ragChain.invoke({"context": documents, "question": question})

    # Return updated state
    updatedState = {"documents": documents, "question": question, "generation": generation, "retry_count": retry_count}

    return updatedState

# Grader Node
def gradeDocuments(state):
    """
    Determines whether the retrieved documents are relevant to the question.

    Args:
        state (dict): The current graph state

    Returns:
        state (dict): Updates documents key with only filtered relevant documents
    """
    print("---CHECK DOCUMENT RELEVANCE TO QUESTION---")
    print(f"Current Retry Count: {state['retry_count']}")
    question = state["question"]
    documents = state["documents"]
    retry_count = state["retry_count"]

    # Recursion base case
    if retry_count > 5:
        print("---MAX RETRY LIMIT REACHED IN TRANSFORM QUERY: FORCING END---")
        return {"documents": state["documents"], "question": state["question"], "retry_count": retry_count}
    
    # Score each doc
    filtered_docs = []
    for d in documents:

        score = retrievalGrader.invoke(
            {"question": question, "document": d.page_content}
        )
        grade = score.binary_score
        if grade == "yes":
            print("---GRADE: DOCUMENT RELEVANT---")
            filtered_docs.append(d)

        else:
            print("---GRADE: DOCUMENT NOT RELEVANT---")
            continue

    return {"documents": filtered_docs, "question": question, "retry_count": retry_count}


# Rewriter node
def transformQuery(state):
    """
    Transform the query to produce a better question.

    Args:
        state (dict): The current graph state

    Returns:
        state (dict): Updates question key with a re-phrased question
    """

    print("---TRANSFORM QUERY---")
    question = state["question"]
    documents = state["documents"]
    retry_count = state["retry_count"]

    # Recursion base case
    print(f"Retry Count: {retry_count}")
    if retry_count > 5:
        print("---MAX RETRY LIMIT REACHED IN TRANSFORM QUERY: FORCING END---")
        return {"documents": state["documents"], "question": state["question"], "retry_count": retry_count}
    
    # Re-write question
    betterQuestion = questionRewriter.invoke({"question": question})
    return {"documents": documents, "question": betterQuestion, "retry_count": retry_count + 1}

def graphLoopFallback(state):
    
    print("---GRAPH LOOP - NO GENERATION---")

    '''
    question = state["question"]
    documents = state["documents"]
    retry_count = state["retry_count"]


    # RAG generation
    # Still done because end node (evaluateMetrics) still needs to calculate something
    generation = ragChain.invoke({"context": documents, "question": question})

    # Return updated state
    updatedState = {"documents": documents, "question": question, "generation": generation, "retry_count": retry_count}

    return updatedState
    
    '''

    # Evaluate metrics
    state["metrics"] = {
        "LLMContextRecall": 0,
        "Faithfulness": 0,
        "FactualCorrectness": 0,
        "SemanticSimilarity": 0,
    }

    return state

async def evaluateMetrics(state):
    """
    Evaluate metrics for the current RAG pipeline response.

    Args:
        state (dict): The current graph state.

    Returns:
        state (dict): Adds a 'metrics' key containing evaluation scores.
    """
    print("---EVALUATING METRICS---")
    retry_count = state.get("retry_count", 0)
    print(f"Final Retry Count Before Reset: {retry_count}")
    state["retry_count"] = 0
    question = state["question"]
    generation = state["generation"]
    documents = state["documents"]

    # Mock ground truth if unavailable (replace with actual reference if possible)
    groundTruth = state.get("groundTruth", "Expected answer based on context.")

    # Prepare retrieved contexts
    retrievedContexts = [doc.page_content for doc in documents]

    # Create a SingleTurnSample object
    sample = SingleTurnSample(
        user_input=question,
        response=generation,
        reference=groundTruth,
        retrieved_contexts=retrievedContexts,
    )

    # Evaluate metrics
    if "metrics" not in state:
        state["metrics"] = {}
    state["metrics"] = {
        "LLMContextRecall": await contextRecall.single_turn_ascore(sample),
        "Faithfulness": await faithfulness.single_turn_ascore(sample),
        "FactualCorrectness": await factualCorrectness.single_turn_ascore(sample),
        "SemanticSimilarity": await semanticSimilarity.single_turn_ascore(sample),
    }

    return state
    
## Edges

# Generation edge
def decideToGenerate(state):
    """
    Determines whether to generate an answer, or re-generate a question.

    Args:
        state (dict): The current graph state

    Returns:
        str: Binary decision for next node to call
    """

    print("---ASSESS GRADED DOCUMENTS---")
    state["question"]
    filtered_documents = state["documents"]
    retry_count = state["retry_count"]

    # Recursion base case
    print(f"Current Retry Count: {retry_count}")
    if retry_count > 5:  
        print("---MAX RETRY LIMIT REACHED: STOPPING---")
        return "graphLoopFallback"
    
    # All documents have been filtered check_relevance
    # We will re-generate a new query
    if not filtered_documents:
        print(
            "---DECISION: ALL DOCUMENTS ARE NOT RELEVANT TO QUESTION, TRANSFORM QUERY---"
        )
        return "transformQuery"
    
    # We have relevant documents, so generate answer
    else:
        print("---DECISION: GENERATE---")
        return "generate"


def grade_generation_v_documents_and_question(state):
    """
    Determines whether the generation is grounded in the document and answers question.

    Args:
        state (dict): The current graph state

    Returns:
        str: Decision for next node to call
    """

    print("---CHECK HALLUCINATIONS---")
    print(f"Current Retry Count: {state['retry_count']}")
    
    question = state["question"]
    documents = state["documents"]
    generation = state["generation"]
    
    score = hallucinationGrader.invoke(
        {"documents": documents, "generation": generation}
    )
    grade = score.binary_score
    
    # Recursion base case
    retry_count = state["retry_count"]
    print(f"Current Retry Count: {retry_count}")
    if retry_count > 5:  
        print("---MAX RETRY LIMIT REACHED: STOPPING---")
        return "graphLoopFallback"
    
    # Check hallucination
    if grade == "yes":
        print("---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---")

        # Check question-answering
        print("---GRADE GENERATION vs QUESTION---")
        score = answerGrader.invoke({"question": question, "generation": generation})
        grade = score.binary_score

        if grade == "yes":
            print("---DECISION: GENERATION ADDRESSES QUESTION---")
            return "useful"
        
        else:
            print("---DECISION: GENERATION DOES NOT ADDRESS QUESTION---")
            return "not useful"
        
    else:
        pprint("---DECISION: GENERATION IS NOT GROUNDED IN DOCUMENTS, RE-TRY---")
        return "not supported"


In [13]:
# Build Self-RAG Graph
workflow = StateGraph(GraphState)

# Define the nodes
workflow.add_node("retrieve", retrieve)  
workflow.add_node("gradeDocuments", gradeDocuments)  
workflow.add_node("generate", generate)  
workflow.add_node("transformQuery", transformQuery) 
workflow.add_node("graphLoopFallback", graphLoopFallback)
workflow.add_node("evaluateMetrics", evaluateMetrics)

# Build graph
workflow.add_edge(START, "retrieve")
workflow.add_edge("retrieve", "gradeDocuments")
workflow.add_conditional_edges(
    "gradeDocuments",
    decideToGenerate,
    {
        "transformQuery": "transformQuery",
        "generate": "generate",
        "graphLoopFallback": "graphLoopFallback"
    },
)
workflow.add_edge("transformQuery", "retrieve")
workflow.add_conditional_edges(
    "generate",
    grade_generation_v_documents_and_question,
    {
        "not supported": "generate",
        "useful": "evaluateMetrics",
        "not useful": "transformQuery",
        "graphLoopFallback": "graphLoopFallback"
    },
)
workflow.add_edge("generate", "evaluateMetrics")
workflow.add_edge("graphLoopFallback", END)
workflow.add_edge("evaluateMetrics", END)


# Compile
app = workflow.compile()


In [14]:
# Put our process into a function
evaluationSamples = []
ragResults = []
async def selfRAG(userQuery, history, correctAnswer):
    """
    Gradio-compatible function to process SelfRAG workflow.
    Args:
        userQuery (str): The user's question.
        history (list): Conversation history.
        correctAnswer (str): The ground truth answer for metrics (optional).

    Returns:
        list: history
    """

    # Variables
    inputs = {"question": userQuery, "retry_count": 0}
    finalOutput = None
    retryFlag = False

    # Start the workflow
    async for output in app.astream(inputs, config={"recursion_limit": 100}):

        # Saving final output for metric purposes
        finalOutput = output

        # Check if the process went into a loop (generated answer with simpleRAG)
        if "graphLoopFallback" in output:
            retryFlag = True

        # Printing out each node state for clarity
        for key, value in output.items():

            # Print node
            pprint(f"Node '{key}':")

            # Print metrics
            if "metrics" in value:
                pprint("Metrics: ")
                pprint(value["metrics"])
    
    # Get the generation and its metrics
    finalNodeKey = list(finalOutput.keys())[-1]  # Get the key of the last executed node
    nodeOutput = finalOutput[finalNodeKey]  # Access the nested state
    generation = nodeOutput.get("generation", "No generation produced.")
    metrics = nodeOutput.get("metrics", {})

    # Update history
    if history is None:
        history = []
    
    history.extend([
        {"role": "user", "content": userQuery},
        {"role": "llm", "content": generation}
    ])

    # Append metrics to evaluationSamples for tracking (if correctAnswer is provided)
    if correctAnswer:
        evaluationSamples.append({
            "user_input": userQuery,
            "retrieved_contexts": [doc.page_content for doc in finalOutput.get("documents", [])],
            "response": generation,
            "reference": correctAnswer,
            "metrics": metrics,
        })
    
    # Save the result of the query with the metrics 
    ragResults.append({
        "question": userQuery,
        "generation": generation,
        "ground_truth_response": correctAnswer,
        "retry": retryFlag
    })

    return history  

In [15]:
# Function to evaluate our RAG pipeline when given ground truth
async def pipelineEvaluation(dataset, metrics):

    # Run through our runs
    results = []
    for run in dataset:

        # Save our inputs/outputs
        inputQuery = run["user_input"]
        groundTruthAnswer = run["reference"]
        contexts = run["retrieved_contexts"]
        response = run["response"]

        # Create a SingleTurnSample object
        sample = SingleTurnSample(
            user_input=inputQuery,
            response=response,
            reference=groundTruthAnswer,
            retrieved_contexts=contexts 
        )

        # Evaluate metrics
        runResults = {"question": inputQuery}
        for metric in metrics:

            # Get the score for the given metric
            try:

                score = await metric.single_turn_ascore(sample)
                runResults[type(metric).__name__] = score

            except Exception as e:
                # Catch errors for debugging
                runResults[type(metric).__name__] = f"Error: {str(e)}"
        
        # Save metric results
        results.append(runResults)
    
    # Calculate mean and standard deviation for each metric
    metricsStats = {}
    for metric in metrics:
        metricName = type(metric).__name__
        scores = [result[metricName] for result in results if isinstance(result[metricName], (int, float))]
        
        # Only calculate stats if there are valid scores
        if scores:
            metricsStats[metricName] = {
                "mean": np.mean(scores),
                "std_dev": np.std(scores),
            }
            
        else:
            metricsStats[metricName] = {
                "mean": "No valid scores",
                "std_dev": "No valid scores",
            }
    
    return results, metricsStats
    

In [16]:
# Load metrics
evalMetrics = [LLMContextRecall(llm=LangchainLLMWrapper(llm)), 
               FactualCorrectness(llm=LangchainLLMWrapper(llm)), 
               Faithfulness(llm=LangchainLLMWrapper(llm)), 
               SemanticSimilarity(embeddings=LangchainEmbeddingsWrapper(OpenAIEmbeddings()))]

In [17]:
# Sample records from our dataset
maristTestSample = maristQA.sample(100, replace=False)
maristTestSample.head()

Unnamed: 0,0,1
589,What are the hours of the student financial se...,Transfer Student Admission Department Student ...
656,First Fulbright scholarship awardee?,Scholarships and FellowshipsHelping You Achiev...
543,Witchcraft history class professor?,Contact InformationAcademic SchoolOfficeEmailP...
544,Who's the person to go to for Latin American a...,Contact InformationAcademic SchoolOfficeEmailP...
150,Can I get help with creating my resume and cov...,Center for Career ServicesYour Path to Success...


In [None]:
# Run our chain with each question and evaluate
chatHistory = None
for row in maristTestSample.iterrows():
    print(f"Question: {row[1][0]}")
    chatHistory = await selfRAG(row[1][0], chatHistory, row[1][1])
    
## Evaluation
evalResults, metricStats = await pipelineEvaluation(evaluationSamples, evalMetrics)
for result in evalResults:
    print(result)

for metric in metricStats.keys():
    print(f"{metric} - Mean: {metricStats[metric]['mean']}, St. Dev: {metricStats[metric]['std_dev']}")

Question: What are the hours of the student financial services office?
---RETRIEVE---
Current Retry Count: 0
"Node 'retrieve':"
---CHECK DOCUMENT RELEVANCE TO QUESTION---
Current Retry Count: 0
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---ASSESS GRADED DOCUMENTS---
Current Retry Count: 0
---DECISION: GENERATE---
"Node 'gradeDocuments':"
Current Retry Count: 0
---GENERATE---
---CHECK HALLUCINATIONS---
Current Retry Count: 0
Current Retry Count: 0
---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---
---GRADE GENERATION vs QUESTION---
---DECISION: GENERATION ADDRESSES QUESTION---
"Node 'generate':"
---EVALUATING METRICS---
Final Retry Count Before Reset: 0
"Node 'evaluateMetrics':"
'Metrics: '
{'FactualCorrectness': 0.0,
 'Faithfulness': 1.0,
 'LLMContextRecall': 0.0,
 'SemanticSimilarity': 0.7101235813282704}
Question: First Fulbright scholarship awardee?
---RETRIEVE---
Current Retry Count: 0
"Node 'retrieve':"
---CHECK DOCUMENT RELEVANCE

Failed to batch ingest runs: langsmith.utils.LangSmithRateLimitError: Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError('429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch', '{"detail":"Monthly unique traces usage limit exceeded"}')
post: trace=11da071a-112f-4fb4-b7bc-cdaafe377018,id=11da071a-112f-4fb4-b7bc-cdaafe377018


"Node 'evaluateMetrics':"
'Metrics: '
{'FactualCorrectness': 0.0,
 'Faithfulness': 1.0,
 'LLMContextRecall': 0.0,
 'SemanticSimilarity': 0.723360194823954}
Question: Who's the person to go to for Latin American and Caribbean Studies?
---RETRIEVE---
Current Retry Count: 0
"Node 'retrieve':"
---CHECK DOCUMENT RELEVANCE TO QUESTION---
Current Retry Count: 0
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---ASSESS GRADED DOCUMENTS---
Current Retry Count: 0
---DECISION: GENERATE---
"Node 'gradeDocuments':"
Current Retry Count: 0
---GENERATE---
---CHECK HALLUCINATIONS---
Current Retry Count: 0
Current Retry Count: 0
---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---
---GRADE GENERATION vs QUESTION---
---DECISION: GENERATION ADDRESSES QUESTION---
"Node 'generate':"
---EVALUATING METRICS---
Final Retry Count Before Reset: 0
"Node 'evaluateMetrics':"
'Metrics: '
{'FactualCorrectness': 0.0,
 'Faithfulness': 1.0,
 'LLMContextRecall': 0.0,
 'SemanticS

Failed to batch ingest runs: langsmith.utils.LangSmithError: Failed to POST https://api.smith.langchain.com/runs/batch in LangSmith API. HTTPError('502 Server Error: Bad Gateway for url: https://api.smith.langchain.com/runs/batch', '\n<html><head>\n<meta http-equiv="content-type" content="text/html;charset=utf-8">\n<title>502 Server Error</title>\n</head>\n<body text=#000000 bgcolor=#ffffff>\n<h1>Error: Server Error</h1>\n<h2>The server encountered a temporary error and could not complete your request.<p>Please try again in 30 seconds.</h2>\n<h2></h2>\n</body></html>\n')


"Node 'evaluateMetrics':"
'Metrics: '
{'FactualCorrectness': 0.0,
 'Faithfulness': 1.0,
 'LLMContextRecall': 0.0,
 'SemanticSimilarity': 0.7430772989476265}
Question: Who made the strategic plan?
---RETRIEVE---
Current Retry Count: 0
"Node 'retrieve':"
---CHECK DOCUMENT RELEVANCE TO QUESTION---
Current Retry Count: 0
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---ASSESS GRADED DOCUMENTS---
Current Retry Count: 0
---DECISION: GENERATE---
"Node 'gradeDocuments':"
Current Retry Count: 0
---GENERATE---
---CHECK HALLUCINATIONS---
Current Retry Count: 0
Current Retry Count: 0
---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---
---GRADE GENERATION vs QUESTION---
---DECISION: GENERATION ADDRESSES QUESTION---
"Node 'generate':"
---EVALUATING METRICS---
Final Retry Count Before Reset: 0
"Node 'evaluateMetrics':"
'Metrics: '
{'FactualCorrectness': 0.0,
 'Faithfulness': 1.0,
 'LLMContextRecall': 0.0,
 'SemanticSimilarity': 0.7269221407735531}
Questi

Failed to batch ingest runs: langsmith.utils.LangSmithRateLimitError: Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError('429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch', '{"detail":"Monthly unique traces usage limit exceeded"}')
post: trace=8a2baa55-f563-4099-8fbc-2de9cde00d75,id=ee8ce6d5-3f04-4edc-a97d-089309bc5374; trace=8a2baa55-f563-4099-8fbc-2de9cde00d75,id=31a3ba99-3783-4799-8c00-648b8630f5f4; trace=8a2baa55-f563-4099-8fbc-2de9cde00d75,id=b99e8c97-f022-4965-a885-447370103b53; trace=8a2baa55-f563-4099-8fbc-2de9cde00d75,id=3fbfc29b-6f2c-431c-98f2-3e96084cc554; trace=8a2baa55-f563-4099-8fbc-2de9cde00d75,id=7e2b2e18-ed7d-4fcc-a9f4-4d8900ed2811; trace=8a2baa55-f563-4099-8fbc-2de9cde00d75,id=600d66f3-1281-4899-987f-aff0a541895d; trace=8a2baa55-f563-4099-8fbc-2de9cde00d75,id=297de2b0-a04e-436e-a14b-5c746db24076; trace=8a2baa55-f563-4099-8fbc-2de9cde00d75,id=f3a164b7-95cc-4124-9fa1-da8d06f5f25c; trace=8a2baa55-f563-4099

---CHECK HALLUCINATIONS---
Current Retry Count: 0
Current Retry Count: 0
---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---
---GRADE GENERATION vs QUESTION---
---DECISION: GENERATION ADDRESSES QUESTION---
"Node 'generate':"
---EVALUATING METRICS---
Final Retry Count Before Reset: 0
"Node 'evaluateMetrics':"
'Metrics: '
{'FactualCorrectness': 0.0,
 'Faithfulness': 1.0,
 'LLMContextRecall': 0.0,
 'SemanticSimilarity': 0.7240822806848485}
Question: What are some off-campus activities offered by Marist?
---RETRIEVE---
Current Retry Count: 0
"Node 'retrieve':"
---CHECK DOCUMENT RELEVANCE TO QUESTION---
Current Retry Count: 0
---GRADE: DOCUMENT NOT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---ASSESS GRADED DOCUMENTS---
Current Retry Count: 0
---DECISION: GENERATE---
"Node 'gradeDocuments':"
Current Retry Count: 0
---GENERATE---
---CHECK HALLUCINATIONS---
Current Retry Count: 0
Current Retry Count: 0
---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---
---GRADE

Failed to batch ingest runs: langsmith.utils.LangSmithRateLimitError: Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError('429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch', '{"detail":"Monthly unique traces usage limit exceeded"}')
post: trace=fb3cd2d4-1b91-4ace-9b2d-b86b9897c6f2,id=269778be-61bc-40fb-985f-379d463d7e85; trace=fb3cd2d4-1b91-4ace-9b2d-b86b9897c6f2,id=2755175e-f881-42e4-bbd8-ea60b85487c2; trace=fb3cd2d4-1b91-4ace-9b2d-b86b9897c6f2,id=967ba18e-c4d8-4684-a138-2c4b8a74e4c7; trace=fb3cd2d4-1b91-4ace-9b2d-b86b9897c6f2,id=31c0e3e7-dde0-4489-914a-e9b1335bbecd; trace=fb3cd2d4-1b91-4ace-9b2d-b86b9897c6f2,id=df2e3dd0-3d9b-469a-8687-41b56e61e829; trace=fb3cd2d4-1b91-4ace-9b2d-b86b9897c6f2,id=b7b6590a-0b5d-45ef-b27e-13b60dd483e7; trace=fb3cd2d4-1b91-4ace-9b2d-b86b9897c6f2,id=d4642cff-47c0-48fa-9507-295e001ef63e; trace=fb3cd2d4-1b91-4ace-9b2d-b86b9897c6f2,id=443b6a4d-5d3d-4b4a-8410-8d6ae4d263e8; trace=fb3cd2d4-1b91-4ace

"Node 'evaluateMetrics':"
'Metrics: '
{'FactualCorrectness': 0.0,
 'Faithfulness': 1.0,
 'LLMContextRecall': 0.0,
 'SemanticSimilarity': 0.7237184818250818}
Question: How many courses should be applied by a student in  global studies.
---RETRIEVE---
Current Retry Count: 0
"Node 'retrieve':"
---CHECK DOCUMENT RELEVANCE TO QUESTION---
Current Retry Count: 0
---GRADE: DOCUMENT NOT RELEVANT---
---GRADE: DOCUMENT NOT RELEVANT---
---GRADE: DOCUMENT NOT RELEVANT---
---ASSESS GRADED DOCUMENTS---
Current Retry Count: 0
---DECISION: ALL DOCUMENTS ARE NOT RELEVANT TO QUESTION, TRANSFORM QUERY---
"Node 'gradeDocuments':"
---TRANSFORM QUERY---
Retry Count: 0
"Node 'transformQuery':"
---RETRIEVE---
Current Retry Count: 1
"Node 'retrieve':"
---CHECK DOCUMENT RELEVANCE TO QUESTION---
Current Retry Count: 2
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---ASSESS GRADED DOCUMENTS---
Current Retry Count: 4
---DECISION: GENERATE---
"Node 'gradeDocuments':"
Cu

Failed to batch ingest runs: langsmith.utils.LangSmithRateLimitError: Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError('429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch', '{"detail":"Monthly unique traces usage limit exceeded"}')
post: trace=45409713-2273-452a-917d-0c81a11d61a8,id=56c512c1-ad83-4af2-ad6c-b27c767585c8; trace=45409713-2273-452a-917d-0c81a11d61a8,id=73a223cb-ae85-4484-ab5a-14f239b32b20; trace=45409713-2273-452a-917d-0c81a11d61a8,id=5e0b628c-58c4-4c8b-bb38-e18bdf901642; trace=45409713-2273-452a-917d-0c81a11d61a8,id=6153e6fb-ab0d-4a55-a094-017c671b37a5; trace=45409713-2273-452a-917d-0c81a11d61a8,id=4186aabc-52c3-4824-91fd-9520d78f716c; trace=45409713-2273-452a-917d-0c81a11d61a8,id=6aa6f71a-ee30-4220-bd4b-5c5aaa6ea656; trace=45409713-2273-452a-917d-0c81a11d61a8,id=e5a34b31-8061-40a7-a096-c8c6f73a103c; trace=45409713-2273-452a-917d-0c81a11d61a8,id=65807721-047f-45bb-979a-e810f6f47077; trace=45409713-2273-452a

"Node 'evaluateMetrics':"
'Metrics: '
{'FactualCorrectness': 0.0,
 'Faithfulness': 1.0,
 'LLMContextRecall': 0.0,
 'SemanticSimilarity': 0.7094916572859257}
Question: Goal of Hudson River Valley Regional Studies?
---RETRIEVE---
Current Retry Count: 0
"Node 'retrieve':"
---CHECK DOCUMENT RELEVANCE TO QUESTION---
Current Retry Count: 0
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---ASSESS GRADED DOCUMENTS---
Current Retry Count: 0
---DECISION: GENERATE---
"Node 'gradeDocuments':"
Current Retry Count: 0
---GENERATE---
---CHECK HALLUCINATIONS---
Current Retry Count: 0
Current Retry Count: 0
---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---
---GRADE GENERATION vs QUESTION---
---DECISION: GENERATION ADDRESSES QUESTION---
"Node 'generate':"
---EVALUATING METRICS---
Final Retry Count Before Reset: 0
"Node 'evaluateMetrics':"
'Metrics: '
{'FactualCorrectness': 0.0,
 'Faithfulness': 1.0,
 'LLMContextRecall': 0.0,
 'SemanticSimilarity': 0.7374052

Failed to batch ingest runs: langsmith.utils.LangSmithRateLimitError: Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError('429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch', '{"detail":"Monthly unique traces usage limit exceeded"}')
post: trace=18425db2-e545-4aa4-9a3f-109b9ac3eda9,id=97359fb7-56f5-430b-9e14-f7b10c1ce512; trace=18425db2-e545-4aa4-9a3f-109b9ac3eda9,id=48f582b6-25e8-4edb-8769-10a86a0d7398; trace=18425db2-e545-4aa4-9a3f-109b9ac3eda9,id=694a421f-c501-4196-aa3b-83439cb2d54b; trace=273ec01e-7505-459c-89c5-58798a1e1aeb,id=273ec01e-7505-459c-89c5-58798a1e1aeb; trace=273ec01e-7505-459c-89c5-58798a1e1aeb,id=c848e68d-211c-474a-910e-bb8b404e11aa; trace=273ec01e-7505-459c-89c5-58798a1e1aeb,id=bc5bf4a8-4ed5-4b2c-8537-c0e1dc530e01; trace=273ec01e-7505-459c-89c5-58798a1e1aeb,id=38ad2c57-460c-4540-b718-5375ae509895; trace=273ec01e-7505-459c-89c5-58798a1e1aeb,id=5b3b5ae8-bf97-45ae-9352-49a7afa06e67; trace=273ec01e-7505-459c

"Node 'retrieve':"
---CHECK DOCUMENT RELEVANCE TO QUESTION---
Current Retry Count: 0
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---ASSESS GRADED DOCUMENTS---
Current Retry Count: 0
---DECISION: GENERATE---
"Node 'gradeDocuments':"
Current Retry Count: 0
---GENERATE---
---CHECK HALLUCINATIONS---
Current Retry Count: 0
Current Retry Count: 0
---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---
---GRADE GENERATION vs QUESTION---
---DECISION: GENERATION ADDRESSES QUESTION---
"Node 'generate':"
---EVALUATING METRICS---
Final Retry Count Before Reset: 0
"Node 'evaluateMetrics':"
'Metrics: '
{'FactualCorrectness': 0.0,
 'Faithfulness': 1.0,
 'LLMContextRecall': 0.0,
 'SemanticSimilarity': 0.7358740413276068}
Question: Where is the registrar's office located?
---RETRIEVE---
Current Retry Count: 0
"Node 'retrieve':"
---CHECK DOCUMENT RELEVANCE TO QUESTION---
Current Retry Count: 0
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---G

Failed to batch ingest runs: langsmith.utils.LangSmithRateLimitError: Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError('429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch', '{"detail":"Monthly unique traces usage limit exceeded"}')
post: trace=e51e5053-849b-4757-b484-ec13ecd30420,id=7cd34a0d-07ca-4b8d-84b4-6e70e8190a83; trace=e51e5053-849b-4757-b484-ec13ecd30420,id=59f9fae6-91b1-4899-894e-9b20f4933a35; trace=e51e5053-849b-4757-b484-ec13ecd30420,id=538f41f3-883a-43bc-b70e-06d794ddf691; trace=e51e5053-849b-4757-b484-ec13ecd30420,id=dd41174d-e3f5-4abe-9281-1c65128a950c; trace=e51e5053-849b-4757-b484-ec13ecd30420,id=0ef05771-589c-4b24-a624-07adca61720a; trace=e51e5053-849b-4757-b484-ec13ecd30420,id=4e3314eb-69d9-4c0b-9f74-4f930762b57b; trace=e51e5053-849b-4757-b484-ec13ecd30420,id=102ff38e-b54b-4c8d-93e2-ac990be35e95; trace=e51e5053-849b-4757-b484-ec13ecd30420,id=ff0984ee-add7-47c3-91cf-930c40af7a35; trace=e51e5053-849b-4757

"Node 'evaluateMetrics':"
'Metrics: '
{'FactualCorrectness': 0.75,
 'Faithfulness': 1.0,
 'LLMContextRecall': 0.0,
 'SemanticSimilarity': 0.7521738210571728}
---RETRIEVE---
Current Retry Count: 1
"Node 'retrieve':"
---CHECK DOCUMENT RELEVANCE TO QUESTION---
Current Retry Count: 2
---GRADE: DOCUMENT NOT RELEVANT---
---GRADE: DOCUMENT NOT RELEVANT---
---GRADE: DOCUMENT NOT RELEVANT---
---ASSESS GRADED DOCUMENTS---
Current Retry Count: 4
---DECISION: ALL DOCUMENTS ARE NOT RELEVANT TO QUESTION, TRANSFORM QUERY---
"Node 'gradeDocuments':"
---TRANSFORM QUERY---
Retry Count: 4
"Node 'transformQuery':"
---RETRIEVE---
Current Retry Count: 9
"Node 'retrieve':"
---CHECK DOCUMENT RELEVANCE TO QUESTION---
Current Retry Count: 18
---MAX RETRY LIMIT REACHED IN TRANSFORM QUERY: FORCING END---
---ASSESS GRADED DOCUMENTS---
Current Retry Count: 36
---MAX RETRY LIMIT REACHED: STOPPING---
"Node 'gradeDocuments':"
---GRAPH LOOP - NO GENERATION---
"Node 'graphLoopFallback':"
'Metrics: '
{'FactualCorrectness

Failed to batch ingest runs: langsmith.utils.LangSmithRateLimitError: Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError('429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch', '{"detail":"Monthly unique traces usage limit exceeded"}')
post: trace=ba966140-ca4d-444a-b6b7-eafa95d567a4,id=a69df5dc-ad03-4500-a011-7a34ff5b7dc6; trace=ba966140-ca4d-444a-b6b7-eafa95d567a4,id=2b8d9938-af45-466b-8cf7-15cf4c8660bb; trace=ba966140-ca4d-444a-b6b7-eafa95d567a4,id=6c173f3c-e567-4262-8ce1-6e6220150b51; trace=ba966140-ca4d-444a-b6b7-eafa95d567a4,id=02b4c85c-215c-49b2-a673-31bc3600cc17; trace=ba966140-ca4d-444a-b6b7-eafa95d567a4,id=7bb2d396-0799-404d-a24c-ebe8ed40b1f2; trace=ba966140-ca4d-444a-b6b7-eafa95d567a4,id=fc5fc53e-5aa5-4899-b4ec-151f9e48ffd6; trace=ba966140-ca4d-444a-b6b7-eafa95d567a4,id=a149a747-c981-42e6-b559-36b34d9ea96e; trace=ba966140-ca4d-444a-b6b7-eafa95d567a4,id=394c6db5-7317-4507-9263-2b0e57af085d; trace=ba966140-ca4d-444a

"Node 'evaluateMetrics':"
'Metrics: '
{'FactualCorrectness': 0.0,
 'Faithfulness': 0.8181818181818182,
 'LLMContextRecall': 0.0,
 'SemanticSimilarity': 0.7197207242535373}
Question: What's the purpose of the Central gate?
---RETRIEVE---
Current Retry Count: 0
"Node 'retrieve':"
---CHECK DOCUMENT RELEVANCE TO QUESTION---
Current Retry Count: 0
---GRADE: DOCUMENT NOT RELEVANT---
---GRADE: DOCUMENT NOT RELEVANT---
---GRADE: DOCUMENT NOT RELEVANT---
---ASSESS GRADED DOCUMENTS---
Current Retry Count: 0
---DECISION: ALL DOCUMENTS ARE NOT RELEVANT TO QUESTION, TRANSFORM QUERY---
"Node 'gradeDocuments':"
---TRANSFORM QUERY---
Retry Count: 0
"Node 'transformQuery':"
---RETRIEVE---
Current Retry Count: 1
"Node 'retrieve':"
---CHECK DOCUMENT RELEVANCE TO QUESTION---
Current Retry Count: 2
---GRADE: DOCUMENT NOT RELEVANT---
---GRADE: DOCUMENT NOT RELEVANT---
---GRADE: DOCUMENT NOT RELEVANT---
---ASSESS GRADED DOCUMENTS---
Current Retry Count: 4
---DECISION: ALL DOCUMENTS ARE NOT RELEVANT TO QUESTI

Failed to batch ingest runs: langsmith.utils.LangSmithRateLimitError: Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError('429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch', '{"detail":"Monthly unique traces usage limit exceeded"}')
post: trace=ba966140-ca4d-444a-b6b7-eafa95d567a4,id=20530cfb-2e15-4739-a039-36e2ff7ec2a0; trace=ba966140-ca4d-444a-b6b7-eafa95d567a4,id=0299fd21-75f1-413d-9aba-ad954723d487; trace=ba966140-ca4d-444a-b6b7-eafa95d567a4,id=ff158e73-1f5f-4449-982c-309b63172f60; trace=ba966140-ca4d-444a-b6b7-eafa95d567a4,id=e945958b-dfad-4d68-ab1c-11185dce716d; trace=ba966140-ca4d-444a-b6b7-eafa95d567a4,id=facd8edb-e4c8-455d-a01b-310ba87b274d; trace=ba966140-ca4d-444a-b6b7-eafa95d567a4,id=2ea18294-b4d9-4504-9e77-e935fdfa5e05; trace=ba966140-ca4d-444a-b6b7-eafa95d567a4,id=5e40f1cc-7185-4109-91df-27e6fae91c50; trace=ba966140-ca4d-444a-b6b7-eafa95d567a4,id=8e4c5bab-79dd-4a68-87b4-043a0d6dc37f; trace=ba966140-ca4d-444a

"Node 'evaluateMetrics':"
'Metrics: '
{'FactualCorrectness': 0.0,
 'Faithfulness': 1.0,
 'LLMContextRecall': 0.0,
 'SemanticSimilarity': 0.7298899931052355}
Question: What if I completed my credits under 36?
---RETRIEVE---
Current Retry Count: 0
"Node 'retrieve':"
---CHECK DOCUMENT RELEVANCE TO QUESTION---
Current Retry Count: 0
---GRADE: DOCUMENT NOT RELEVANT---
---GRADE: DOCUMENT NOT RELEVANT---
---GRADE: DOCUMENT NOT RELEVANT---
---ASSESS GRADED DOCUMENTS---
Current Retry Count: 0
---DECISION: ALL DOCUMENTS ARE NOT RELEVANT TO QUESTION, TRANSFORM QUERY---
"Node 'gradeDocuments':"
---TRANSFORM QUERY---
Retry Count: 0
"Node 'transformQuery':"
---RETRIEVE---
Current Retry Count: 1
"Node 'retrieve':"
---CHECK DOCUMENT RELEVANCE TO QUESTION---
Current Retry Count: 2
---GRADE: DOCUMENT NOT RELEVANT---
---GRADE: DOCUMENT NOT RELEVANT---
---GRADE: DOCUMENT NOT RELEVANT---
---ASSESS GRADED DOCUMENTS---
Current Retry Count: 4
---DECISION: ALL DOCUMENTS ARE NOT RELEVANT TO QUESTION, TRANSFORM 

Failed to batch ingest runs: langsmith.utils.LangSmithRateLimitError: Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError('429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch', '{"detail":"Monthly unique traces usage limit exceeded"}')
post: trace=cbe6116a-6d0e-430e-8239-1a4c4eaef69a,id=5f7d4c2d-070c-4578-a547-ed2e79c8a5cc; trace=cbe6116a-6d0e-430e-8239-1a4c4eaef69a,id=19a8c32c-e36a-4100-90a3-2b3b61513f23; trace=cbe6116a-6d0e-430e-8239-1a4c4eaef69a,id=21691981-27e3-44e8-b576-aeb829a18942; trace=cbe6116a-6d0e-430e-8239-1a4c4eaef69a,id=4a352fa5-e1e5-4d48-bbe0-cf43508f1cfb; trace=cbe6116a-6d0e-430e-8239-1a4c4eaef69a,id=fa5c2ee7-1f1b-4084-80f0-3b6950bbf488; trace=cbe6116a-6d0e-430e-8239-1a4c4eaef69a,id=2ebdf9de-9cf1-479b-9fc6-c0bd910e8dfe; trace=cbe6116a-6d0e-430e-8239-1a4c4eaef69a,id=96dc6286-6432-4e71-8323-0ca6727bab36; trace=cbe6116a-6d0e-430e-8239-1a4c4eaef69a,id=d187488d-3b50-43cb-be37-bebdebae24a3; trace=cbe6116a-6d0e-430e

"Node 'evaluateMetrics':"
'Metrics: '
{'FactualCorrectness': 0.0,
 'Faithfulness': 0.7777777777777778,
 'LLMContextRecall': 0.0,
 'SemanticSimilarity': 0.7490551625131693}
Question: Master of Science in Information Systems concentrations
---RETRIEVE---
Current Retry Count: 0
"Node 'retrieve':"
---CHECK DOCUMENT RELEVANCE TO QUESTION---
Current Retry Count: 0
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---ASSESS GRADED DOCUMENTS---
Current Retry Count: 0
---DECISION: GENERATE---
"Node 'gradeDocuments':"
Current Retry Count: 0
---GENERATE---
---CHECK HALLUCINATIONS---
Current Retry Count: 0
Current Retry Count: 0
---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---
---GRADE GENERATION vs QUESTION---
---DECISION: GENERATION ADDRESSES QUESTION---
"Node 'generate':"
---EVALUATING METRICS---
Final Retry Count Before Reset: 0


Failed to batch ingest runs: langsmith.utils.LangSmithRateLimitError: Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError('429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch', '{"detail":"Monthly unique traces usage limit exceeded"}')
post: trace=5610f727-d634-46e2-86a2-a05ba43ef9a8,id=ccd0f4d4-8790-4abb-927d-b155da4b4b72; trace=5610f727-d634-46e2-86a2-a05ba43ef9a8,id=1d6810d0-42e7-400b-91cb-c10d8716b0fb; trace=5610f727-d634-46e2-86a2-a05ba43ef9a8,id=9b8de35a-6c77-49b1-bf62-07d9da49d935; trace=5610f727-d634-46e2-86a2-a05ba43ef9a8,id=8052b9ad-20c4-46bf-b0d1-a896da3ee01b; trace=5610f727-d634-46e2-86a2-a05ba43ef9a8,id=fc3ec2ac-5791-4daf-a185-3c573116edd3; trace=5610f727-d634-46e2-86a2-a05ba43ef9a8,id=3ae4138e-f37b-4c2b-9cf1-5cceb687dfe5; trace=5610f727-d634-46e2-86a2-a05ba43ef9a8,id=fef223a2-5f16-4805-b9c2-ab054f4fd431; trace=5610f727-d634-46e2-86a2-a05ba43ef9a8,id=40270166-11cf-4ae0-ae14-0a5518ba7f19; trace=5610f727-d634-46e2

"Node 'evaluateMetrics':"
'Metrics: '
{'FactualCorrectness': 0.0,
 'Faithfulness': 0.8,
 'LLMContextRecall': 0.0,
 'SemanticSimilarity': 0.7130723987974671}
Question: Who can apply to the Five-Year BA-BS/MAT program?
---RETRIEVE---
Current Retry Count: 0
"Node 'retrieve':"
---CHECK DOCUMENT RELEVANCE TO QUESTION---
Current Retry Count: 0
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---ASSESS GRADED DOCUMENTS---
Current Retry Count: 0
---DECISION: GENERATE---
"Node 'gradeDocuments':"
Current Retry Count: 0
---GENERATE---
---CHECK HALLUCINATIONS---
Current Retry Count: 0
Current Retry Count: 0
---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---
---GRADE GENERATION vs QUESTION---
---DECISION: GENERATION ADDRESSES QUESTION---
"Node 'generate':"
---EVALUATING METRICS---
Final Retry Count Before Reset: 0
"Node 'evaluateMetrics':"
'Metrics: '
{'FactualCorrectness': 0.0,
 'Faithfulness': 1.0,
 'LLMContextRecall': 0.0,
 'SemanticSimilarity': 0.733

Failed to batch ingest runs: langsmith.utils.LangSmithRateLimitError: Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError('429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch', '{"detail":"Monthly unique traces usage limit exceeded"}')
post: trace=f13f1226-cd73-48ba-a4e5-7c7e4349b66f,id=f13f1226-cd73-48ba-a4e5-7c7e4349b66f; trace=f13f1226-cd73-48ba-a4e5-7c7e4349b66f,id=00b69369-5e5e-4f1a-b320-0f77db3883f4; trace=f13f1226-cd73-48ba-a4e5-7c7e4349b66f,id=674eaf9c-6d82-46fc-8cbe-61ed4796d2b8; trace=f13f1226-cd73-48ba-a4e5-7c7e4349b66f,id=0acec197-218d-4f78-b59f-b5e58f5de222; trace=f13f1226-cd73-48ba-a4e5-7c7e4349b66f,id=8cea4e37-747e-4953-85ea-42c21bfeadc0; trace=f13f1226-cd73-48ba-a4e5-7c7e4349b66f,id=b3bf47eb-046b-4d7d-8381-094942cc2cbd; trace=f13f1226-cd73-48ba-a4e5-7c7e4349b66f,id=76fadb49-4861-4953-b2cd-301b44ab4147; trace=f13f1226-cd73-48ba-a4e5-7c7e4349b66f,id=8712cc57-7f3a-4e62-8f6a-0679839eb25d; trace=f13f1226-cd73-48ba

"Node 'evaluateMetrics':"
'Metrics: '
{'FactualCorrectness': 0.0,
 'Faithfulness': 0.875,
 'LLMContextRecall': 0.0,
 'SemanticSimilarity': 0.7526458635914938}
Question: What part do foreign languages need to choose?
---RETRIEVE---
Current Retry Count: 0
"Node 'retrieve':"
---CHECK DOCUMENT RELEVANCE TO QUESTION---
Current Retry Count: 0
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---ASSESS GRADED DOCUMENTS---
Current Retry Count: 0
---DECISION: GENERATE---
"Node 'gradeDocuments':"
Current Retry Count: 0
---GENERATE---
---CHECK HALLUCINATIONS---
Current Retry Count: 0
Current Retry Count: 0
---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---
---GRADE GENERATION vs QUESTION---
---DECISION: GENERATION ADDRESSES QUESTION---
"Node 'generate':"
---EVALUATING METRICS---
Final Retry Count Before Reset: 0


Failed to batch ingest runs: langsmith.utils.LangSmithRateLimitError: Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError('429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch', '{"detail":"Monthly unique traces usage limit exceeded"}')
post: trace=dbfd6e67-3cd0-485e-b06a-445572576d37,id=e718484b-b120-442a-89a2-b1f44ce55c39; trace=dbfd6e67-3cd0-485e-b06a-445572576d37,id=889c02d3-adea-4217-89e1-042e6d1c8c11; trace=dbfd6e67-3cd0-485e-b06a-445572576d37,id=afda39a7-49dc-4582-890a-f925f714c67c; trace=dbfd6e67-3cd0-485e-b06a-445572576d37,id=e16f55de-c600-427c-9aa5-2e77de4ec44f; trace=dbfd6e67-3cd0-485e-b06a-445572576d37,id=e4436f31-72bb-4d82-8b72-45c3d95b789d; trace=dbfd6e67-3cd0-485e-b06a-445572576d37,id=b1d5119c-4459-4d94-b5a4-4328a40abc24; trace=dbfd6e67-3cd0-485e-b06a-445572576d37,id=36a53671-f4ea-4509-8878-9e2bfcb7a6d0; trace=dbfd6e67-3cd0-485e-b06a-445572576d37,id=7ed8e58a-113c-42d7-9049-ff85473d59a1; trace=dbfd6e67-3cd0-485e

"Node 'evaluateMetrics':"
'Metrics: '
{'FactualCorrectness': 0.29,
 'Faithfulness': 1.0,
 'LLMContextRecall': 0.0,
 'SemanticSimilarity': 0.7141356480417771}
Question: about Cathleen Muller
---RETRIEVE---
Current Retry Count: 0
"Node 'retrieve':"
---CHECK DOCUMENT RELEVANCE TO QUESTION---
Current Retry Count: 0
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---ASSESS GRADED DOCUMENTS---
Current Retry Count: 0
---DECISION: GENERATE---
"Node 'gradeDocuments':"
Current Retry Count: 0
---GENERATE---
---CHECK HALLUCINATIONS---
Current Retry Count: 0
Current Retry Count: 0
---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---
---GRADE GENERATION vs QUESTION---
---DECISION: GENERATION ADDRESSES QUESTION---
"Node 'generate':"
---EVALUATING METRICS---
Final Retry Count Before Reset: 0
"Node 'evaluateMetrics':"
'Metrics: '
{'FactualCorrectness': 0.0,
 'Faithfulness': 1.0,
 'LLMContextRecall': 0.0,
 'SemanticSimilarity': 0.7430772989476265}
Question: Dr

Failed to batch ingest runs: langsmith.utils.LangSmithRateLimitError: Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError('429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch', '{"detail":"Monthly unique traces usage limit exceeded"}')
post: trace=cb0d95c9-c7be-433f-a7cb-b7054f6470d1,id=3163e6c2-7aaf-494a-90b9-b562d9305ef5; trace=cb0d95c9-c7be-433f-a7cb-b7054f6470d1,id=0def5350-f8e8-44af-85e1-3577332dcd2e; trace=cb0d95c9-c7be-433f-a7cb-b7054f6470d1,id=6e08ec4d-b7df-4a53-99a7-637190d60139; trace=cb0d95c9-c7be-433f-a7cb-b7054f6470d1,id=8d6f7aa7-b02b-4d9c-bd2f-7e09f7e28b30; trace=cb0d95c9-c7be-433f-a7cb-b7054f6470d1,id=3b2c09e7-9cc2-4371-bc3a-3f0613519667; trace=cb0d95c9-c7be-433f-a7cb-b7054f6470d1,id=8ba9d2a2-9bde-410e-975c-e3167742daab; trace=f5941d3a-e726-46d3-81a2-22873c2f53ce,id=f5941d3a-e726-46d3-81a2-22873c2f53ce; trace=f5941d3a-e726-46d3-81a2-22873c2f53ce,id=fe243e0e-acff-467c-8f2c-173897719ac5; trace=f5941d3a-e726-46d3

"Node 'evaluateMetrics':"
'Metrics: '
{'FactualCorrectness': 0.0,
 'Faithfulness': 0.875,
 'LLMContextRecall': 0.0,
 'SemanticSimilarity': 0.7429985272233486}
Question: When are flu shots due by?
---RETRIEVE---
Current Retry Count: 0
"Node 'retrieve':"
---CHECK DOCUMENT RELEVANCE TO QUESTION---
Current Retry Count: 0
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---ASSESS GRADED DOCUMENTS---
Current Retry Count: 0
---DECISION: GENERATE---
"Node 'gradeDocuments':"
Current Retry Count: 0
---GENERATE---
---CHECK HALLUCINATIONS---
Current Retry Count: 0
Current Retry Count: 0
---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---
---GRADE GENERATION vs QUESTION---
---DECISION: GENERATION ADDRESSES QUESTION---
"Node 'generate':"
---EVALUATING METRICS---
Final Retry Count Before Reset: 0


Failed to batch ingest runs: langsmith.utils.LangSmithRateLimitError: Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError('429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch', '{"detail":"Monthly unique traces usage limit exceeded"}')
post: trace=766fed0c-8b1d-4103-b67f-828030da747a,id=d00b4d0a-8c07-4d16-95de-3d17eade6f24; trace=766fed0c-8b1d-4103-b67f-828030da747a,id=dd0230ed-ee3e-4945-a22e-3c4dd84afd54; trace=766fed0c-8b1d-4103-b67f-828030da747a,id=988eddfc-408f-45f2-8c0a-fbd3c3968b6d; trace=766fed0c-8b1d-4103-b67f-828030da747a,id=2ac9c00c-8b6d-4959-93c8-8ab72f9111c4; trace=766fed0c-8b1d-4103-b67f-828030da747a,id=971b7fff-e971-45df-95bc-f5e3e74745b8; trace=766fed0c-8b1d-4103-b67f-828030da747a,id=4d59080f-1a75-4eb2-bc06-4fe903c9b0f2; trace=766fed0c-8b1d-4103-b67f-828030da747a,id=44c61f32-dfa9-4171-b93b-642f0f227cc7; trace=766fed0c-8b1d-4103-b67f-828030da747a,id=dc79a6af-770d-4f14-a281-bbb69d6b7ccc; trace=766fed0c-8b1d-4103

"Node 'evaluateMetrics':"
'Metrics: '
{'FactualCorrectness': 0.0,
 'Faithfulness': 1.0,
 'LLMContextRecall': 0.0,
 'SemanticSimilarity': 0.6982982873792769}
Question: Who is Dr. Steven Garabedian?
---RETRIEVE---
Current Retry Count: 0
"Node 'retrieve':"
---CHECK DOCUMENT RELEVANCE TO QUESTION---
Current Retry Count: 0
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---ASSESS GRADED DOCUMENTS---
Current Retry Count: 0
---DECISION: GENERATE---
"Node 'gradeDocuments':"
Current Retry Count: 0
---GENERATE---
---CHECK HALLUCINATIONS---
Current Retry Count: 0
Current Retry Count: 0
---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---
---GRADE GENERATION vs QUESTION---
---DECISION: GENERATION ADDRESSES QUESTION---
"Node 'generate':"
---EVALUATING METRICS---
Final Retry Count Before Reset: 0
"Node 'evaluateMetrics':"
'Metrics: '
{'FactualCorrectness': 0.0,
 'Faithfulness': 1.0,
 'LLMContextRecall': 0.0,
 'SemanticSimilarity': 0.726272267156028}
Questi

Failed to batch ingest runs: langsmith.utils.LangSmithRateLimitError: Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError('429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch', '{"detail":"Monthly unique traces usage limit exceeded"}')
post: trace=00c75846-e3c3-4605-b4c4-2231d08d6be0,id=6353f1f6-d029-4f72-8492-6b4b9ab14b75; trace=00c75846-e3c3-4605-b4c4-2231d08d6be0,id=bb12337d-2f47-4845-9e9e-9c568dc57816; trace=00c75846-e3c3-4605-b4c4-2231d08d6be0,id=6a02f97f-1cfe-4e41-bfca-8e80a6505fe4; trace=00c75846-e3c3-4605-b4c4-2231d08d6be0,id=d36e35e1-7d13-4427-af4f-7929ca17a945; trace=00c75846-e3c3-4605-b4c4-2231d08d6be0,id=29edffe1-bc2b-4b73-8d4f-96af19e7bd84; trace=00c75846-e3c3-4605-b4c4-2231d08d6be0,id=f91b1107-1c2e-47be-a726-25ed0ee7a3bc; trace=00c75846-e3c3-4605-b4c4-2231d08d6be0,id=ef2e4f56-2a46-4e5b-a840-ed0d9df3c63c; trace=00c75846-e3c3-4605-b4c4-2231d08d6be0,id=7d7798f1-fb70-4ebc-bc43-7cd738f415e1; trace=00c75846-e3c3-4605

"Node 'evaluateMetrics':"
'Metrics: '
{'FactualCorrectness': 0.0,
 'Faithfulness': 0.8333333333333334,
 'LLMContextRecall': 0.0,
 'SemanticSimilarity': 0.725272633695492}
Question: Who is Dr. Melissa A. Gaeke?
---RETRIEVE---
Current Retry Count: 0
"Node 'retrieve':"
---CHECK DOCUMENT RELEVANCE TO QUESTION---
Current Retry Count: 0
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---ASSESS GRADED DOCUMENTS---
Current Retry Count: 0
---DECISION: GENERATE---
"Node 'gradeDocuments':"
Current Retry Count: 0
---GENERATE---
---CHECK HALLUCINATIONS---
Current Retry Count: 0
Current Retry Count: 0
---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---
---GRADE GENERATION vs QUESTION---
---DECISION: GENERATION ADDRESSES QUESTION---
"Node 'generate':"
---EVALUATING METRICS---
Final Retry Count Before Reset: 0


Failed to batch ingest runs: langsmith.utils.LangSmithRateLimitError: Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError('429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch', '{"detail":"Monthly unique traces usage limit exceeded"}')
post: trace=5299d198-fa32-4389-aa38-d28df524ed7c,id=9770a6c7-fc0e-472a-8524-cffd65f874ee; trace=5299d198-fa32-4389-aa38-d28df524ed7c,id=6dd962b8-48fb-4a74-bcef-ce733557cf67; trace=5299d198-fa32-4389-aa38-d28df524ed7c,id=299d6724-2168-4b7a-af0b-b9d04787f72a; trace=5299d198-fa32-4389-aa38-d28df524ed7c,id=b26270eb-07ba-4706-bc7a-d2064eb8efdd; trace=5299d198-fa32-4389-aa38-d28df524ed7c,id=d756d620-6db3-4c6c-b579-cc2d988b6548; trace=5299d198-fa32-4389-aa38-d28df524ed7c,id=9a78814e-2e0f-4c1a-a897-04b5d6921ba4; trace=5299d198-fa32-4389-aa38-d28df524ed7c,id=6395c9ef-8be3-4bb9-bfca-87a6241b029e; trace=5299d198-fa32-4389-aa38-d28df524ed7c,id=5e65cedc-0785-4a4f-8f2b-f297431f9d38; trace=5299d198-fa32-4389

"Node 'evaluateMetrics':"
'Metrics: '
{'FactualCorrectness': 0.0,
 'Faithfulness': 1.0,
 'LLMContextRecall': 0.0,
 'SemanticSimilarity': 0.7201203400125518}
Question: What is the fashion advisory board
---RETRIEVE---
Current Retry Count: 0
"Node 'retrieve':"
---CHECK DOCUMENT RELEVANCE TO QUESTION---
Current Retry Count: 0
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT NOT RELEVANT---
---GRADE: DOCUMENT NOT RELEVANT---
---ASSESS GRADED DOCUMENTS---
Current Retry Count: 0
---DECISION: GENERATE---
"Node 'gradeDocuments':"
Current Retry Count: 0
---GENERATE---
---CHECK HALLUCINATIONS---
Current Retry Count: 0
Current Retry Count: 0
---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---
---GRADE GENERATION vs QUESTION---
---DECISION: GENERATION ADDRESSES QUESTION---
"Node 'generate':"
---EVALUATING METRICS---
Final Retry Count Before Reset: 0


Failed to batch ingest runs: langsmith.utils.LangSmithRateLimitError: Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError('429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch', '{"detail":"Monthly unique traces usage limit exceeded"}')
post: trace=98753a55-fdd1-4f80-9ac2-8f84315ebb4e,id=9fa9c0d3-af97-43b5-b8fb-312ac8dce0cd; trace=98753a55-fdd1-4f80-9ac2-8f84315ebb4e,id=468fd99e-6311-4647-b684-11519fa59b55; trace=98753a55-fdd1-4f80-9ac2-8f84315ebb4e,id=b7dc4a4b-a69c-49d9-849e-2274d33759c8; trace=98753a55-fdd1-4f80-9ac2-8f84315ebb4e,id=07f922f2-384a-48d2-80b4-a82809778420; trace=98753a55-fdd1-4f80-9ac2-8f84315ebb4e,id=778905f4-7b49-49f7-a323-b4cb0987d835; trace=98753a55-fdd1-4f80-9ac2-8f84315ebb4e,id=7b4e88c9-6d81-4ab0-9cc2-b1d0563c94a0; trace=98753a55-fdd1-4f80-9ac2-8f84315ebb4e,id=a233f3ea-cabb-4b7d-8e4e-1f63cd3c8ed1; trace=98753a55-fdd1-4f80-9ac2-8f84315ebb4e,id=b24197f7-8618-4cb1-9a4e-3b99f660667f; trace=98753a55-fdd1-4f80

"Node 'evaluateMetrics':"
'Metrics: '
{'FactualCorrectness': 0.0,
 'Faithfulness': 1.0,
 'LLMContextRecall': 0.0,
 'SemanticSimilarity': 0.7028095269339847}
Question: What is an internship?
---RETRIEVE---
Current Retry Count: 0
"Node 'retrieve':"
---CHECK DOCUMENT RELEVANCE TO QUESTION---
Current Retry Count: 0
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---ASSESS GRADED DOCUMENTS---
Current Retry Count: 0
---DECISION: GENERATE---
"Node 'gradeDocuments':"
Current Retry Count: 0
---GENERATE---
---CHECK HALLUCINATIONS---
Current Retry Count: 0
Current Retry Count: 0
---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---
---GRADE GENERATION vs QUESTION---
---DECISION: GENERATION ADDRESSES QUESTION---
"Node 'generate':"
---EVALUATING METRICS---
Final Retry Count Before Reset: 0


Failed to batch ingest runs: langsmith.utils.LangSmithRateLimitError: Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError('429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch', '{"detail":"Monthly unique traces usage limit exceeded"}')
post: trace=08b02f3d-242c-4d3c-9575-39341952a5f4,id=3a5d047d-d473-458d-a018-99debf2048ad; trace=08b02f3d-242c-4d3c-9575-39341952a5f4,id=26ec8312-5dd1-45bf-aa61-22f0ced3a0ff; trace=08b02f3d-242c-4d3c-9575-39341952a5f4,id=b2874568-bb86-4329-81c7-28510e7b8456; trace=08b02f3d-242c-4d3c-9575-39341952a5f4,id=bd7a168a-077e-4e83-abe1-6fb9aaadb495; trace=08b02f3d-242c-4d3c-9575-39341952a5f4,id=2588bb72-d83f-417e-9cfd-ede1f3a5314a; trace=08b02f3d-242c-4d3c-9575-39341952a5f4,id=a54582c9-8f0b-4c41-968d-55823425016e; trace=08b02f3d-242c-4d3c-9575-39341952a5f4,id=6ac66c3c-2ddf-40d7-a23c-41fe38ee2a4b; trace=08b02f3d-242c-4d3c-9575-39341952a5f4,id=d13dd3b5-87b2-4d9c-8ce4-7b2f1faec589; trace=08b02f3d-242c-4d3c

"Node 'evaluateMetrics':"
'Metrics: '
{'FactualCorrectness': 0.0,
 'Faithfulness': 1.0,
 'LLMContextRecall': 0.0,
 'SemanticSimilarity': 0.7243359349683114}
Question: is Lori Beth an adjunct?
---RETRIEVE---
Current Retry Count: 0
"Node 'retrieve':"
---CHECK DOCUMENT RELEVANCE TO QUESTION---
Current Retry Count: 0
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---ASSESS GRADED DOCUMENTS---
Current Retry Count: 0
---DECISION: GENERATE---
"Node 'gradeDocuments':"
Current Retry Count: 0
---GENERATE---
---CHECK HALLUCINATIONS---
Current Retry Count: 0
Current Retry Count: 0
---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---
---GRADE GENERATION vs QUESTION---
---DECISION: GENERATION ADDRESSES QUESTION---
"Node 'generate':"
---EVALUATING METRICS---
Final Retry Count Before Reset: 0


Failed to batch ingest runs: langsmith.utils.LangSmithRateLimitError: Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError('429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch', '{"detail":"Monthly unique traces usage limit exceeded"}')
post: trace=10d8dc56-5635-42b0-af52-0a26f41d3e52,id=f94c21cc-1e77-47a5-8836-f46cb363a5fd; trace=10d8dc56-5635-42b0-af52-0a26f41d3e52,id=05c60bc2-e266-415b-a95a-c5b24b178eb8; trace=10d8dc56-5635-42b0-af52-0a26f41d3e52,id=94c53bc4-d579-4805-9f02-33c6b270070b; trace=10d8dc56-5635-42b0-af52-0a26f41d3e52,id=5a53927e-af29-413d-addb-36ba3ba708c0; trace=10d8dc56-5635-42b0-af52-0a26f41d3e52,id=07ae6a12-3dc9-4850-807d-b1527bb1cf46; trace=10d8dc56-5635-42b0-af52-0a26f41d3e52,id=e1a60c75-3915-4c11-a38c-2e04634e47ad; trace=10d8dc56-5635-42b0-af52-0a26f41d3e52,id=1603923f-b02b-4d86-8554-990b49d572e9; trace=10d8dc56-5635-42b0-af52-0a26f41d3e52,id=66885cec-15d8-4acc-b90b-b78b391b2aaf; trace=10d8dc56-5635-42b0

"Node 'evaluateMetrics':"
'Metrics: '
{'FactualCorrectness': 0.0,
 'Faithfulness': 1.0,
 'LLMContextRecall': 0.0,
 'SemanticSimilarity': 0.7307643010631353}
Question: email of Dr. Wermuth?
---RETRIEVE---
Current Retry Count: 0
"Node 'retrieve':"
---CHECK DOCUMENT RELEVANCE TO QUESTION---
Current Retry Count: 0
---GRADE: DOCUMENT NOT RELEVANT---
---GRADE: DOCUMENT NOT RELEVANT---
---GRADE: DOCUMENT NOT RELEVANT---
---ASSESS GRADED DOCUMENTS---
Current Retry Count: 0
---DECISION: ALL DOCUMENTS ARE NOT RELEVANT TO QUESTION, TRANSFORM QUERY---
"Node 'gradeDocuments':"
---TRANSFORM QUERY---
Retry Count: 0
"Node 'transformQuery':"
---RETRIEVE---
Current Retry Count: 1
"Node 'retrieve':"
---CHECK DOCUMENT RELEVANCE TO QUESTION---
Current Retry Count: 2
---GRADE: DOCUMENT NOT RELEVANT---
---GRADE: DOCUMENT NOT RELEVANT---
---GRADE: DOCUMENT NOT RELEVANT---
---ASSESS GRADED DOCUMENTS---
Current Retry Count: 4
---DECISION: ALL DOCUMENTS ARE NOT RELEVANT TO QUESTION, TRANSFORM QUERY---
"Node 'gra

Failed to batch ingest runs: langsmith.utils.LangSmithRateLimitError: Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError('429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch', '{"detail":"Monthly unique traces usage limit exceeded"}')
post: trace=ce4d6590-7cdc-47f1-bd7d-50ad1fda086d,id=78e683ce-adb1-485b-88b9-e73557eb30cd; trace=f7125303-f906-49ef-93a8-6b40ccc97321,id=f7125303-f906-49ef-93a8-6b40ccc97321; trace=f7125303-f906-49ef-93a8-6b40ccc97321,id=910e5691-1e1f-4b7e-86ee-a75bc205a65d; trace=f7125303-f906-49ef-93a8-6b40ccc97321,id=6ffedaf8-4e36-497b-ac7d-2b8715546b1c; trace=f7125303-f906-49ef-93a8-6b40ccc97321,id=d3ee2b8c-078e-4bb0-99b1-d4996fd9bb8a; trace=f7125303-f906-49ef-93a8-6b40ccc97321,id=84d582f3-7d6d-41b4-b176-a3c191c6c4a7; trace=f7125303-f906-49ef-93a8-6b40ccc97321,id=804a5e67-1f51-4495-8cf1-86a4c40e9f6d; trace=f7125303-f906-49ef-93a8-6b40ccc97321,id=8a24cfd4-5fc0-41f6-9bbe-6784b90d3caf; trace=f7125303-f906-49ef

"Node 'evaluateMetrics':"
'Metrics: '
{'FactualCorrectness': 0.0,
 'Faithfulness': 1.0,
 'LLMContextRecall': 0.0,
 'SemanticSimilarity': 0.7105550402213924}
Question: When is the CAAS open?
---RETRIEVE---
Current Retry Count: 0
"Node 'retrieve':"
---CHECK DOCUMENT RELEVANCE TO QUESTION---
Current Retry Count: 0
---GRADE: DOCUMENT NOT RELEVANT---
---GRADE: DOCUMENT NOT RELEVANT---
---GRADE: DOCUMENT NOT RELEVANT---
---ASSESS GRADED DOCUMENTS---
Current Retry Count: 0
---DECISION: ALL DOCUMENTS ARE NOT RELEVANT TO QUESTION, TRANSFORM QUERY---
"Node 'gradeDocuments':"
---TRANSFORM QUERY---
Retry Count: 0
"Node 'transformQuery':"
---RETRIEVE---
Current Retry Count: 1
"Node 'retrieve':"
---CHECK DOCUMENT RELEVANCE TO QUESTION---
Current Retry Count: 2
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---ASSESS GRADED DOCUMENTS---
Current Retry Count: 4
---DECISION: GENERATE---
"Node 'gradeDocuments':"
Current Retry Count: 4
---GENERATE---
---CHECK 

Failed to batch ingest runs: langsmith.utils.LangSmithRateLimitError: Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError('429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch', '{"detail":"Monthly unique traces usage limit exceeded"}')
post: trace=f7125303-f906-49ef-93a8-6b40ccc97321,id=ceb52a0a-ce4f-44f1-a593-69df6ebb5f59; trace=f7125303-f906-49ef-93a8-6b40ccc97321,id=e211fd08-0a80-4dc1-bbd5-7b9330c621a5; trace=f7125303-f906-49ef-93a8-6b40ccc97321,id=7769a170-6c54-487f-bed7-1a110c138917; trace=f7125303-f906-49ef-93a8-6b40ccc97321,id=160dd7f1-59fe-4687-aaf1-c36de4946d61; trace=f7125303-f906-49ef-93a8-6b40ccc97321,id=f4bd7416-93eb-4d5d-83f9-0c94a2862ac2; trace=f7125303-f906-49ef-93a8-6b40ccc97321,id=b3931b2f-af73-400c-abf0-ef7893e8fa2d; trace=913b1a59-1edc-4a26-af3d-2623fd37ad17,id=913b1a59-1edc-4a26-af3d-2623fd37ad17; trace=913b1a59-1edc-4a26-af3d-2623fd37ad17,id=1f086d09-5810-4405-8e66-c6c78c88c346; trace=913b1a59-1edc-4a26

"Node 'evaluateMetrics':"
'Metrics: '
{'FactualCorrectness': 0.0,
 'Faithfulness': 1.0,
 'LLMContextRecall': 0.0,
 'SemanticSimilarity': 0.7270022852974423}
Question: Where did Kopchik graduate
---RETRIEVE---
Current Retry Count: 0
"Node 'retrieve':"
---CHECK DOCUMENT RELEVANCE TO QUESTION---
Current Retry Count: 0
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---ASSESS GRADED DOCUMENTS---
Current Retry Count: 0
---DECISION: GENERATE---
"Node 'gradeDocuments':"
Current Retry Count: 0
---GENERATE---
---CHECK HALLUCINATIONS---
Current Retry Count: 0
Current Retry Count: 0
---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---
---GRADE GENERATION vs QUESTION---
---DECISION: GENERATION ADDRESSES QUESTION---
"Node 'generate':"
---EVALUATING METRICS---
Final Retry Count Before Reset: 0
"Node 'evaluateMetrics':"
'Metrics: '
{'FactualCorrectness': 0.0,
 'Faithfulness': 1.0,
 'LLMContextRecall': 0.0,
 'SemanticSimilarity': 0.7397147806385499}
Question

Failed to batch ingest runs: langsmith.utils.LangSmithRateLimitError: Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError('429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch', '{"detail":"Monthly unique traces usage limit exceeded"}')
post: trace=e6fe34a4-942d-43b3-9803-6a518b1df67a,id=1eb0ff38-477b-476a-949f-a35bc10b9be2; trace=e6fe34a4-942d-43b3-9803-6a518b1df67a,id=aac44ef4-a70b-4d42-a59e-0af6645bb8e5; trace=e6fe34a4-942d-43b3-9803-6a518b1df67a,id=bc779947-ac4d-4021-8a7b-2ea5c3401fc3; trace=e6fe34a4-942d-43b3-9803-6a518b1df67a,id=103cab5e-0042-471f-bc18-71e69cf67f1d; trace=e6fe34a4-942d-43b3-9803-6a518b1df67a,id=bc6a452c-03e7-410a-b068-51d84ed53d6c; trace=e6fe34a4-942d-43b3-9803-6a518b1df67a,id=4305d018-1a5f-4939-8d15-213cca599133; trace=e6fe34a4-942d-43b3-9803-6a518b1df67a,id=3ac934e0-b814-4a17-a62d-91cab0872606; trace=e6fe34a4-942d-43b3-9803-6a518b1df67a,id=8d5bf04f-83dc-475b-998a-d8b16c0329b9; trace=e6fe34a4-942d-43b3

---GRADE: DOCUMENT NOT RELEVANT---
---GRADE: DOCUMENT NOT RELEVANT---
---ASSESS GRADED DOCUMENTS---
Current Retry Count: 0
---DECISION: ALL DOCUMENTS ARE NOT RELEVANT TO QUESTION, TRANSFORM QUERY---
"Node 'gradeDocuments':"
---TRANSFORM QUERY---
Retry Count: 0
"Node 'transformQuery':"
---RETRIEVE---
Current Retry Count: 1
"Node 'retrieve':"
---CHECK DOCUMENT RELEVANCE TO QUESTION---
Current Retry Count: 2


Failed to batch ingest runs: langsmith.utils.LangSmithRateLimitError: Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError('429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch', '{"detail":"Monthly unique traces usage limit exceeded"}')
post: trace=5429312d-0fb3-4ffd-b8c1-ae07036cd43e,id=5c45e370-331e-4503-8e42-aaf4fae0ddb8; trace=5429312d-0fb3-4ffd-b8c1-ae07036cd43e,id=3191bfb6-28b1-4ecd-bac9-ad2ed9900f87; trace=5429312d-0fb3-4ffd-b8c1-ae07036cd43e,id=0bc4a14b-db79-48bd-a205-175b219a4c09; trace=5429312d-0fb3-4ffd-b8c1-ae07036cd43e,id=94a5178f-ed09-4935-bbb1-e351c6d5ca6c; trace=5429312d-0fb3-4ffd-b8c1-ae07036cd43e,id=1edaa2cb-fcdd-48d5-bae4-639ae2172373; trace=5429312d-0fb3-4ffd-b8c1-ae07036cd43e,id=0d3686d8-fe66-4a40-914a-5cc5905a7352; trace=5429312d-0fb3-4ffd-b8c1-ae07036cd43e,id=687b627f-8bed-4334-952d-ac73292b862d; trace=5429312d-0fb3-4ffd-b8c1-ae07036cd43e,id=33d0ff71-146c-44b4-aaf8-9148e76f748d; trace=5429312d-0fb3-4ffd

---GRADE: DOCUMENT NOT RELEVANT---
---GRADE: DOCUMENT NOT RELEVANT---
---GRADE: DOCUMENT NOT RELEVANT---
---ASSESS GRADED DOCUMENTS---
Current Retry Count: 4
---DECISION: ALL DOCUMENTS ARE NOT RELEVANT TO QUESTION, TRANSFORM QUERY---
"Node 'gradeDocuments':"
---TRANSFORM QUERY---
Retry Count: 4
"Node 'transformQuery':"
---RETRIEVE---
Current Retry Count: 9
"Node 'retrieve':"
---CHECK DOCUMENT RELEVANCE TO QUESTION---
Current Retry Count: 18
---MAX RETRY LIMIT REACHED IN TRANSFORM QUERY: FORCING END---
---ASSESS GRADED DOCUMENTS---
Current Retry Count: 36
---MAX RETRY LIMIT REACHED: STOPPING---
"Node 'gradeDocuments':"
---GRAPH LOOP - NO GENERATION---
"Node 'graphLoopFallback':"
'Metrics: '
{'FactualCorrectness': 0,
 'Faithfulness': 0,
 'LLMContextRecall': 0,
 'SemanticSimilarity': 0}
Question: Dr. Nicholas Marshall
---RETRIEVE---
Current Retry Count: 0
"Node 'retrieve':"
---CHECK DOCUMENT RELEVANCE TO QUESTION---
Current Retry Count: 0
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT 

Failed to batch ingest runs: langsmith.utils.LangSmithRateLimitError: Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError('429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch', '{"detail":"Monthly unique traces usage limit exceeded"}')
post: trace=5d3e7df3-1c08-4934-898f-9de7d1f61409,id=657ad219-d5b4-42f4-8ce7-5590398c1b4e; trace=5d3e7df3-1c08-4934-898f-9de7d1f61409,id=c4a5c5ef-9d80-48fb-91d8-0aae951f650d; trace=5d3e7df3-1c08-4934-898f-9de7d1f61409,id=006f9c12-b4dd-41b2-bc80-30179fef5c95; trace=5d3e7df3-1c08-4934-898f-9de7d1f61409,id=204994b3-5f8f-47ca-a301-d3d4549e2fcc; trace=5d3e7df3-1c08-4934-898f-9de7d1f61409,id=7093f7df-63e0-4ed0-915d-6ac21b74b4e7; trace=5d3e7df3-1c08-4934-898f-9de7d1f61409,id=907a5696-df90-4576-9086-c7fbe1ab0d69; trace=5d3e7df3-1c08-4934-898f-9de7d1f61409,id=58cd4a5b-51db-4b70-aa26-408e692b08e7; trace=5d3e7df3-1c08-4934-898f-9de7d1f61409,id=88efbc81-1b89-4df8-a2bb-1585bb1ad057; trace=5d3e7df3-1c08-4934

---CHECK HALLUCINATIONS---
Current Retry Count: 0
Current Retry Count: 0
---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---
---GRADE GENERATION vs QUESTION---
---DECISION: GENERATION ADDRESSES QUESTION---
"Node 'generate':"
---EVALUATING METRICS---
Final Retry Count Before Reset: 0
"Node 'evaluateMetrics':"
'Metrics: '
{'FactualCorrectness': 0.0,
 'Faithfulness': 1.0,
 'LLMContextRecall': 0.0,
 'SemanticSimilarity': 0.7209812671380791}
Question: Does Marist accept transfer credits?
---RETRIEVE---
Current Retry Count: 0
"Node 'retrieve':"
---CHECK DOCUMENT RELEVANCE TO QUESTION---
Current Retry Count: 0
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---ASSESS GRADED DOCUMENTS---
Current Retry Count: 0
---DECISION: GENERATE---
"Node 'gradeDocuments':"
Current Retry Count: 0
---GENERATE---


Failed to batch ingest runs: langsmith.utils.LangSmithRateLimitError: Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError('429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch', '{"detail":"Monthly unique traces usage limit exceeded"}')
post: trace=5d3e7df3-1c08-4934-898f-9de7d1f61409,id=d345fd98-6094-47b2-b4a5-2de6ac5946b6; trace=28aea237-41e6-4f1a-a578-c5df2ba39cb3,id=28aea237-41e6-4f1a-a578-c5df2ba39cb3; trace=28aea237-41e6-4f1a-a578-c5df2ba39cb3,id=e62dbdef-0619-4bfb-aeb1-a7a33c6ee619; trace=28aea237-41e6-4f1a-a578-c5df2ba39cb3,id=fa50b32f-cedc-49f9-bbbf-6fdc2881d2c9; trace=28aea237-41e6-4f1a-a578-c5df2ba39cb3,id=89b1d7c5-6a4d-48e8-8ac1-e173ec99a04b; trace=28aea237-41e6-4f1a-a578-c5df2ba39cb3,id=860405f8-91e1-4d8e-afdb-61887e32a7f1; trace=28aea237-41e6-4f1a-a578-c5df2ba39cb3,id=4fac239a-107b-41c6-92fe-28f17c7df5c1; trace=28aea237-41e6-4f1a-a578-c5df2ba39cb3,id=f410307f-412a-4703-a81b-ea468f6ebd6c; trace=28aea237-41e6-4f1a

---CHECK HALLUCINATIONS---
Current Retry Count: 0
Current Retry Count: 0
---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---
---GRADE GENERATION vs QUESTION---
---DECISION: GENERATION ADDRESSES QUESTION---
"Node 'generate':"
---EVALUATING METRICS---
Final Retry Count Before Reset: 0
"Node 'evaluateMetrics':"
'Metrics: '
{'FactualCorrectness': 0.0,
 'Faithfulness': 1.0,
 'LLMContextRecall': 0.0,
 'SemanticSimilarity': 0.7176800716129963}
Question: Does Marist offer internships to students?
---RETRIEVE---
Current Retry Count: 0
"Node 'retrieve':"
---CHECK DOCUMENT RELEVANCE TO QUESTION---
Current Retry Count: 0
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---ASSESS GRADED DOCUMENTS---
Current Retry Count: 0
---DECISION: GENERATE---
"Node 'gradeDocuments':"
Current Retry Count: 0
---GENERATE---
---CHECK HALLUCINATIONS---
Current Retry Count: 0
Current Retry Count: 0
---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---
---GRADE GENERATION vs Q

Failed to batch ingest runs: langsmith.utils.LangSmithRateLimitError: Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError('429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch', '{"detail":"Monthly unique traces usage limit exceeded"}')
post: trace=28aea237-41e6-4f1a-a578-c5df2ba39cb3,id=3b0f99c3-8dbd-4e5d-bc82-ac8503800cd8; trace=28aea237-41e6-4f1a-a578-c5df2ba39cb3,id=92b527aa-142a-417f-a1d2-0c9ff275c843; trace=28aea237-41e6-4f1a-a578-c5df2ba39cb3,id=7b134694-2c79-4dbb-b8fb-715c7b3498ff; trace=28aea237-41e6-4f1a-a578-c5df2ba39cb3,id=fa7273d9-d506-4086-b83a-1a4935b0c5af; trace=28aea237-41e6-4f1a-a578-c5df2ba39cb3,id=bd0e9e78-6259-4131-8d8c-6df0f01995b2; trace=28aea237-41e6-4f1a-a578-c5df2ba39cb3,id=688e8241-77b9-4b80-a993-d200aff3a142; trace=28aea237-41e6-4f1a-a578-c5df2ba39cb3,id=be34ccbf-28d7-4da5-80cf-659f993f408f; trace=28aea237-41e6-4f1a-a578-c5df2ba39cb3,id=9f9d50e6-5e0a-437d-ad29-cb7c57f8a8c7; trace=28aea237-41e6-4f1a

"Node 'evaluateMetrics':"
'Metrics: '
{'FactualCorrectness': 0.0,
 'Faithfulness': 1.0,
 'LLMContextRecall': 0.0,
 'SemanticSimilarity': 0.7091688885621704}
Question: What courses guide students on how to use technology?
---RETRIEVE---
Current Retry Count: 0
"Node 'retrieve':"
---CHECK DOCUMENT RELEVANCE TO QUESTION---
Current Retry Count: 0
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---ASSESS GRADED DOCUMENTS---
Current Retry Count: 0
---DECISION: GENERATE---
"Node 'gradeDocuments':"
Current Retry Count: 0
---GENERATE---
---CHECK HALLUCINATIONS---
Current Retry Count: 0
Current Retry Count: 0
---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---
---GRADE GENERATION vs QUESTION---
---DECISION: GENERATION ADDRESSES QUESTION---
"Node 'generate':"
---EVALUATING METRICS---
Final Retry Count Before Reset: 0


Failed to batch ingest runs: langsmith.utils.LangSmithRateLimitError: Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError('429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch', '{"detail":"Monthly unique traces usage limit exceeded"}')
post: trace=86e3d80e-8026-4fb9-b4ab-b4ed86eaa08a,id=fbb15cfc-280e-4630-89a5-391b39b16a37; trace=86e3d80e-8026-4fb9-b4ab-b4ed86eaa08a,id=bc3fd52e-629e-4a98-9f1d-86a276933a39; trace=86e3d80e-8026-4fb9-b4ab-b4ed86eaa08a,id=7cd80b48-2bbe-425b-862b-085ba6af706c; trace=86e3d80e-8026-4fb9-b4ab-b4ed86eaa08a,id=b16890d7-0939-4f50-94bd-4f1a2aacc048; trace=86e3d80e-8026-4fb9-b4ab-b4ed86eaa08a,id=a847a379-260d-487b-84ce-6342e2b49892; trace=86e3d80e-8026-4fb9-b4ab-b4ed86eaa08a,id=ff03d9f0-17c6-41bf-8dc6-89ef77d75ce1; trace=86e3d80e-8026-4fb9-b4ab-b4ed86eaa08a,id=c57ed27c-0410-4666-bb4e-a1159ab3358d; trace=86e3d80e-8026-4fb9-b4ab-b4ed86eaa08a,id=76b621b0-ceb0-41d2-af97-c037f7cde947; trace=86e3d80e-8026-4fb9

"Node 'evaluateMetrics':"
'Metrics: '
{'FactualCorrectness': 0.0,
 'Faithfulness': 1.0,
 'LLMContextRecall': 0.0,
 'SemanticSimilarity': 0.7223245698843662}
Question: Purchase textbooks for class.
---RETRIEVE---
Current Retry Count: 0
"Node 'retrieve':"
---CHECK DOCUMENT RELEVANCE TO QUESTION---
Current Retry Count: 0
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---ASSESS GRADED DOCUMENTS---
Current Retry Count: 0
---DECISION: GENERATE---
"Node 'gradeDocuments':"
Current Retry Count: 0
---GENERATE---
---CHECK HALLUCINATIONS---
Current Retry Count: 0
Current Retry Count: 0
---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---
---GRADE GENERATION vs QUESTION---
---DECISION: GENERATION ADDRESSES QUESTION---
"Node 'generate':"
---EVALUATING METRICS---
Final Retry Count Before Reset: 0
"Node 'evaluateMetrics':"
'Metrics: '
{'FactualCorrectness': 0.0,
 'Faithfulness': 1.0,
 'LLMContextRecall': 0.0,
 'SemanticSimilarity': 0.7153734138167936}
Quest

Failed to batch ingest runs: langsmith.utils.LangSmithRateLimitError: Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError('429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch', '{"detail":"Monthly unique traces usage limit exceeded"}')
post: trace=94870c08-65c5-48dc-8961-e48db0c59139,id=5a66e0dc-d51c-4ad3-9c7e-d37b70a295bb; trace=94870c08-65c5-48dc-8961-e48db0c59139,id=9609e447-686d-4aee-ab94-3a36cd8394cb; trace=94870c08-65c5-48dc-8961-e48db0c59139,id=282dd869-e3e9-42d2-949e-6c7767618171; trace=94870c08-65c5-48dc-8961-e48db0c59139,id=3fff4b34-e142-4f1b-8584-fcf242b26355; trace=94870c08-65c5-48dc-8961-e48db0c59139,id=26901e49-46a6-44c8-819e-2be53a405156; trace=e72159ed-8bd9-41ff-8673-1f69ac787e02,id=e72159ed-8bd9-41ff-8673-1f69ac787e02; trace=e72159ed-8bd9-41ff-8673-1f69ac787e02,id=3f6ace6e-9a82-4892-a1e1-84bc93303e31; trace=e72159ed-8bd9-41ff-8673-1f69ac787e02,id=5c7c7e31-27ca-41a5-a99e-af83753e4ab5; trace=e72159ed-8bd9-41ff

---CHECK HALLUCINATIONS---
Current Retry Count: 8
Current Retry Count: 8
---MAX RETRY LIMIT REACHED: STOPPING---
"Node 'generate':"
---EVALUATING METRICS---
Final Retry Count Before Reset: 8
---GRAPH LOOP - NO GENERATION---
"Node 'graphLoopFallback':"
'Metrics: '
{'FactualCorrectness': 0,
 'Faithfulness': 0,
 'LLMContextRecall': 0,
 'SemanticSimilarity': 0}


Failed to batch ingest runs: langsmith.utils.LangSmithRateLimitError: Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError('429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch', '{"detail":"Monthly unique traces usage limit exceeded"}')
post: trace=1f0914d5-4f1d-45f4-a0d4-7c6aedf22c7b,id=a3961a4a-14ef-4166-9926-128efbbcc2ae; trace=1f0914d5-4f1d-45f4-a0d4-7c6aedf22c7b,id=1f0daac2-9ade-45e2-a1b8-389a58a97a0f; trace=1f0914d5-4f1d-45f4-a0d4-7c6aedf22c7b,id=506d89c8-2834-4359-8e86-143d1e43e57e; trace=1f0914d5-4f1d-45f4-a0d4-7c6aedf22c7b,id=375e26b6-d61d-48ff-a52c-060e30769a99; trace=1f0914d5-4f1d-45f4-a0d4-7c6aedf22c7b,id=ae8434cc-d565-43fd-b4d6-54a46fa5da28; trace=1f0914d5-4f1d-45f4-a0d4-7c6aedf22c7b,id=632b85df-2104-43e4-b9f0-f4701c415896; trace=1f0914d5-4f1d-45f4-a0d4-7c6aedf22c7b,id=1991132e-1094-4460-a58d-5fd7fcb262de; trace=1f0914d5-4f1d-45f4-a0d4-7c6aedf22c7b,id=d67e0788-64d8-4f0c-9549-1452eaaef9d8; trace=1f0914d5-4f1d-45f4

"Node 'evaluateMetrics':"
'Metrics: '
{'FactualCorrectness': 0.0,
 'Faithfulness': 1.0,
 'LLMContextRecall': 0.0,
 'SemanticSimilarity': 0.7448685204562904}
Question: Admissions contact?
---RETRIEVE---
Current Retry Count: 0
"Node 'retrieve':"
---CHECK DOCUMENT RELEVANCE TO QUESTION---
Current Retry Count: 0
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---ASSESS GRADED DOCUMENTS---
Current Retry Count: 0
---DECISION: GENERATE---
"Node 'gradeDocuments':"
Current Retry Count: 0
---GENERATE---
---CHECK HALLUCINATIONS---
Current Retry Count: 0
Current Retry Count: 0
---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---
---GRADE GENERATION vs QUESTION---
---DECISION: GENERATION ADDRESSES QUESTION---
"Node 'generate':"
---EVALUATING METRICS---
Final Retry Count Before Reset: 0
"Node 'evaluateMetrics':"
'Metrics: '
{'FactualCorrectness': 0.0,
 'Faithfulness': 1.0,
 'LLMContextRecall': 0.0,
 'SemanticSimilarity': 0.7026575961025846}
Question: on wh

Failed to batch ingest runs: langsmith.utils.LangSmithRateLimitError: Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError('429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch', '{"detail":"Monthly unique traces usage limit exceeded"}')
post: trace=b6ab5499-5c3e-4dc7-86e5-b304852b7a2c,id=fad1419a-3689-4de3-9ccf-6dfe4467d633; trace=b6ab5499-5c3e-4dc7-86e5-b304852b7a2c,id=0cf466e3-7168-411e-b394-332b7753dadf; trace=b6ab5499-5c3e-4dc7-86e5-b304852b7a2c,id=8b30c09c-8c0c-4b04-a6e1-9c0fb2d60370; trace=b6ab5499-5c3e-4dc7-86e5-b304852b7a2c,id=7c89bbcf-3b09-4840-95c5-4f9a9e7e7095; trace=b6ab5499-5c3e-4dc7-86e5-b304852b7a2c,id=dcf155af-1a3e-4d05-9764-bb0817c2b56b; trace=b6ab5499-5c3e-4dc7-86e5-b304852b7a2c,id=aee8159a-9948-4b0a-a652-ba47d66c88ff; trace=b6ab5499-5c3e-4dc7-86e5-b304852b7a2c,id=2cc4c53a-70c2-4d89-8ea3-e4b46e29fd21; trace=b6ab5499-5c3e-4dc7-86e5-b304852b7a2c,id=166e0f04-4b96-4a03-a2b3-1ade63dcf037; trace=b6ab5499-5c3e-4dc7

"Node 'evaluateMetrics':"
'Metrics: '
{'FactualCorrectness': 0.0,
 'Faithfulness': 1.0,
 'LLMContextRecall': 0.0,
 'SemanticSimilarity': 0.7451913802349293}
Question: Dr. Joanne Myers role
---RETRIEVE---
Current Retry Count: 0
"Node 'retrieve':"
---CHECK DOCUMENT RELEVANCE TO QUESTION---
Current Retry Count: 0
---GRADE: DOCUMENT NOT RELEVANT---
---GRADE: DOCUMENT NOT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---ASSESS GRADED DOCUMENTS---
Current Retry Count: 0
---DECISION: GENERATE---
"Node 'gradeDocuments':"
Current Retry Count: 0
---GENERATE---
---CHECK HALLUCINATIONS---
Current Retry Count: 0
Current Retry Count: 0
---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---
---GRADE GENERATION vs QUESTION---
---DECISION: GENERATION ADDRESSES QUESTION---
"Node 'generate':"
---EVALUATING METRICS---
Final Retry Count Before Reset: 0


Failed to batch ingest runs: langsmith.utils.LangSmithRateLimitError: Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError('429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch', '{"detail":"Monthly unique traces usage limit exceeded"}')
post: trace=4d868629-5195-471f-aa9a-cc7a97cba817,id=e4c5f72a-3a87-4802-ab2f-087198e3e1a6; trace=4d868629-5195-471f-aa9a-cc7a97cba817,id=16a4910b-a5be-4727-ba39-95c184c0ccbd; trace=4d868629-5195-471f-aa9a-cc7a97cba817,id=05e1a599-3f19-4fa0-a5a2-148dd7d60269; trace=4d868629-5195-471f-aa9a-cc7a97cba817,id=2930cde5-e249-4d69-83c0-ed8f25fae771; trace=4d868629-5195-471f-aa9a-cc7a97cba817,id=dfa8d83e-5a2a-4b7e-9445-7457db43501a; trace=4d868629-5195-471f-aa9a-cc7a97cba817,id=bb859266-6075-4fca-9386-c6cef883125f; trace=4d868629-5195-471f-aa9a-cc7a97cba817,id=5419e0e4-6e39-4eaf-9a2f-4f0305454892; trace=4d868629-5195-471f-aa9a-cc7a97cba817,id=d73cc0a6-9e39-4b25-a9e2-23fe53ffaf71; trace=4d868629-5195-471f

"Node 'evaluateMetrics':"
'Metrics: '
{'FactualCorrectness': 0.0,
 'Faithfulness': 1.0,
 'LLMContextRecall': 0.0,
 'SemanticSimilarity': 0.7143162506251383}
Question: Who published Displacements and Transformations in Caribbean Cultures?
---RETRIEVE---
Current Retry Count: 0
"Node 'retrieve':"
---CHECK DOCUMENT RELEVANCE TO QUESTION---
Current Retry Count: 0
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---ASSESS GRADED DOCUMENTS---
Current Retry Count: 0
---DECISION: GENERATE---
"Node 'gradeDocuments':"
Current Retry Count: 0
---GENERATE---
---CHECK HALLUCINATIONS---
Current Retry Count: 0
Current Retry Count: 0
---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---
---GRADE GENERATION vs QUESTION---
---DECISION: GENERATION ADDRESSES QUESTION---
"Node 'generate':"
---EVALUATING METRICS---
Final Retry Count Before Reset: 0
"Node 'evaluateMetrics':"
'Metrics: '
{'FactualCorrectness': 0.0,
 'Faithfulness': 1.0,
 'LLMContextRecall': 0.0,
 'Seman

Failed to batch ingest runs: langsmith.utils.LangSmithRateLimitError: Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError('429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch', '{"detail":"Monthly unique traces usage limit exceeded"}')
post: trace=4d868629-5195-471f-aa9a-cc7a97cba817,id=43b709f5-8d9e-4d04-96fa-a9a96b3f452f; trace=cb82a620-2aa0-476c-9f61-eb6352286488,id=cb82a620-2aa0-476c-9f61-eb6352286488; trace=cb82a620-2aa0-476c-9f61-eb6352286488,id=eeb658f3-0b2b-492f-9641-ed6a1cf4ddee; trace=cb82a620-2aa0-476c-9f61-eb6352286488,id=fd45d77f-7dd9-46c6-9094-66dc4b5d326a; trace=cb82a620-2aa0-476c-9f61-eb6352286488,id=f5b9b079-b867-4ce7-80f6-c5d414c1aa29; trace=cb82a620-2aa0-476c-9f61-eb6352286488,id=f0649ac2-53e8-470a-9e4c-8d11ec90bdf0; trace=cb82a620-2aa0-476c-9f61-eb6352286488,id=09ce1bd5-5f5f-49e3-864b-d3e6f3df523d; trace=cb82a620-2aa0-476c-9f61-eb6352286488,id=31830750-db2a-4507-8744-bccd2605498c; trace=cb82a620-2aa0-476c

"Node 'evaluateMetrics':"
'Metrics: '
{'FactualCorrectness': 0.0,
 'Faithfulness': 1.0,
 'LLMContextRecall': 0.0,
 'SemanticSimilarity': 0.7258086506586573}
Question: Pau-San Haruta
---RETRIEVE---
Current Retry Count: 0
"Node 'retrieve':"
---CHECK DOCUMENT RELEVANCE TO QUESTION---
Current Retry Count: 0
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---ASSESS GRADED DOCUMENTS---
Current Retry Count: 0
---DECISION: GENERATE---
"Node 'gradeDocuments':"
Current Retry Count: 0
---GENERATE---
---CHECK HALLUCINATIONS---
Current Retry Count: 0
Current Retry Count: 0
---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---
---GRADE GENERATION vs QUESTION---
---DECISION: GENERATION ADDRESSES QUESTION---
"Node 'generate':"
---EVALUATING METRICS---
Final Retry Count Before Reset: 0


Failed to batch ingest runs: langsmith.utils.LangSmithRateLimitError: Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError('429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch', '{"detail":"Monthly unique traces usage limit exceeded"}')
post: trace=4c19a301-70cd-4f0a-a983-cfb9f2fd71b9,id=c289d117-5431-4550-a411-639e87c85460; trace=4c19a301-70cd-4f0a-a983-cfb9f2fd71b9,id=830d49b2-f251-403e-8cbc-b1be38a1d8c4; trace=4c19a301-70cd-4f0a-a983-cfb9f2fd71b9,id=6f0479bd-57f9-4c42-aaf3-c4e40c6364c3; trace=4c19a301-70cd-4f0a-a983-cfb9f2fd71b9,id=863e397b-1c60-4feb-873b-09497add237a; trace=4c19a301-70cd-4f0a-a983-cfb9f2fd71b9,id=b0585521-adf4-4f7e-b632-2f6193df0b8f; trace=4c19a301-70cd-4f0a-a983-cfb9f2fd71b9,id=8895af11-4b65-4997-ba68-ef6fcd99f2d1; trace=4c19a301-70cd-4f0a-a983-cfb9f2fd71b9,id=67a87abd-9d5d-4a88-9d01-22b245f9ad5b; patch: trace=4c19a301-70cd-4f0a-a983-cfb9f2fd71b9,id=4eb53326-28f7-4678-be6c-f1745ae08ec9; trace=4c19a301-70

"Node 'evaluateMetrics':"
'Metrics: '
{'FactualCorrectness': 0.0,
 'Faithfulness': 1.0,
 'LLMContextRecall': 0.0,
 'SemanticSimilarity': 0.741986851617501}
Question: How big was marist back in the day?
---RETRIEVE---
Current Retry Count: 0
"Node 'retrieve':"
---CHECK DOCUMENT RELEVANCE TO QUESTION---
Current Retry Count: 0
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT NOT RELEVANT---
---ASSESS GRADED DOCUMENTS---
Current Retry Count: 0
---DECISION: GENERATE---
"Node 'gradeDocuments':"
Current Retry Count: 0
---GENERATE---
---CHECK HALLUCINATIONS---
Current Retry Count: 0
Current Retry Count: 0
---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---
---GRADE GENERATION vs QUESTION---
---DECISION: GENERATION ADDRESSES QUESTION---
"Node 'generate':"
---EVALUATING METRICS---
Final Retry Count Before Reset: 0


Failed to batch ingest runs: langsmith.utils.LangSmithRateLimitError: Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError('429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch', '{"detail":"Monthly unique traces usage limit exceeded"}')
post: trace=4c19a301-70cd-4f0a-a983-cfb9f2fd71b9,id=3c5a9604-97be-4abf-bd11-89d7889bb0e2; trace=4c19a301-70cd-4f0a-a983-cfb9f2fd71b9,id=44ac4b40-a63d-4a92-a0ce-49a0993e47ed; trace=4c19a301-70cd-4f0a-a983-cfb9f2fd71b9,id=437d5e30-8b77-44a3-8915-f18b503a0805; trace=4c19a301-70cd-4f0a-a983-cfb9f2fd71b9,id=55722aa7-1fe3-4f16-94eb-b979b8aaa2a4; trace=86be225a-b74e-4dc5-a956-9d118c114c66,id=86be225a-b74e-4dc5-a956-9d118c114c66; trace=86be225a-b74e-4dc5-a956-9d118c114c66,id=47459fa6-db25-4ad1-9f5e-d7df278ff931; trace=86be225a-b74e-4dc5-a956-9d118c114c66,id=429b6aa4-c1ce-4f6f-bf77-21f150500f89; trace=86be225a-b74e-4dc5-a956-9d118c114c66,id=6d5507f6-5f7d-44ef-a795-d1e49fbf7653; trace=86be225a-b74e-4dc5

"Node 'evaluateMetrics':"
'Metrics: '
{'FactualCorrectness': 0.0,
 'Faithfulness': 1.0,
 'LLMContextRecall': 0.0,
 'SemanticSimilarity': 0.7174195822015209}
Question: Dr. Gregory Machacek interests?
---RETRIEVE---
Current Retry Count: 0
"Node 'retrieve':"
---CHECK DOCUMENT RELEVANCE TO QUESTION---
Current Retry Count: 0
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---ASSESS GRADED DOCUMENTS---
Current Retry Count: 0
---DECISION: GENERATE---
"Node 'gradeDocuments':"
Current Retry Count: 0
---GENERATE---
---CHECK HALLUCINATIONS---
Current Retry Count: 0
Current Retry Count: 0
---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---
---GRADE GENERATION vs QUESTION---
---DECISION: GENERATION ADDRESSES QUESTION---
"Node 'generate':"
---EVALUATING METRICS---
Final Retry Count Before Reset: 0


Failed to batch ingest runs: langsmith.utils.LangSmithRateLimitError: Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError('429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch', '{"detail":"Monthly unique traces usage limit exceeded"}')
post: trace=f1879819-93da-4280-a7c6-4cbb61b89f30,id=34f7488d-4c67-4f80-bfbe-503d2db81a21; trace=f1879819-93da-4280-a7c6-4cbb61b89f30,id=d71cd925-dcbc-4a86-a414-1c102ca13864; trace=f1879819-93da-4280-a7c6-4cbb61b89f30,id=f65e7548-e015-477e-84a5-4924e7be16dd; trace=f1879819-93da-4280-a7c6-4cbb61b89f30,id=e504843c-6d42-4d59-8264-84e1623c2add; trace=f1879819-93da-4280-a7c6-4cbb61b89f30,id=4f47cdbf-a6f2-435a-adc4-988cdcc7b3a8; trace=f1879819-93da-4280-a7c6-4cbb61b89f30,id=bdae51c6-2eb2-4c59-9c1e-85e5dcb8a9b9; trace=f1879819-93da-4280-a7c6-4cbb61b89f30,id=59fe743e-097d-4b3e-9c60-17dedc81f857; trace=f1879819-93da-4280-a7c6-4cbb61b89f30,id=1dbb67cd-8672-4a47-95a8-bde872cae409; trace=f1879819-93da-4280

"Node 'evaluateMetrics':"
'Metrics: '
{'FactualCorrectness': 0.0,
 'Faithfulness': 1.0,
 'LLMContextRecall': 0.0,
 'SemanticSimilarity': 0.7414684225434539}
Question: Where is the Museum studies program offered?
---RETRIEVE---
Current Retry Count: 0
"Node 'retrieve':"
---CHECK DOCUMENT RELEVANCE TO QUESTION---
Current Retry Count: 0
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT NOT RELEVANT---
---ASSESS GRADED DOCUMENTS---
Current Retry Count: 0
---DECISION: GENERATE---
"Node 'gradeDocuments':"
Current Retry Count: 0
---GENERATE---
---CHECK HALLUCINATIONS---
Current Retry Count: 0
Current Retry Count: 0
---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---
---GRADE GENERATION vs QUESTION---
---DECISION: GENERATION ADDRESSES QUESTION---
"Node 'generate':"
---EVALUATING METRICS---
Final Retry Count Before Reset: 0


Failed to batch ingest runs: langsmith.utils.LangSmithRateLimitError: Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError('429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch', '{"detail":"Monthly unique traces usage limit exceeded"}')
post: trace=cc47c42d-358c-48e5-855d-152f9715bb4b,id=6eb9cdce-e099-4255-99f6-9226dcd3254a; trace=cc47c42d-358c-48e5-855d-152f9715bb4b,id=c072638d-d284-43fc-bb62-6d11cb518275; trace=cc47c42d-358c-48e5-855d-152f9715bb4b,id=e9a0bfa3-6dfa-4bbd-b10e-0f433b4b66a7; trace=cc47c42d-358c-48e5-855d-152f9715bb4b,id=e22a4f48-5a98-40db-8a13-afeadcc502ef; trace=cc47c42d-358c-48e5-855d-152f9715bb4b,id=3514ecee-7479-43b8-b484-cb1c03c9ff78; trace=cc47c42d-358c-48e5-855d-152f9715bb4b,id=c6d576f9-a05f-4f36-befc-9092dad73835; trace=cc47c42d-358c-48e5-855d-152f9715bb4b,id=8c586ef5-0f39-4c57-9bbc-09f8ee88e8d7; trace=cc47c42d-358c-48e5-855d-152f9715bb4b,id=026d549f-fc86-4766-bf99-e835ecb13f78; trace=cc47c42d-358c-48e5

"Node 'evaluateMetrics':"
'Metrics: '
{'FactualCorrectness': 0.0,
 'Faithfulness': 1.0,
 'LLMContextRecall': 0.0,
 'SemanticSimilarity': 0.7038718780317633}
Question: Where did Phillip Scepanski go to school?
---RETRIEVE---
Current Retry Count: 0
"Node 'retrieve':"
---CHECK DOCUMENT RELEVANCE TO QUESTION---
Current Retry Count: 0
---GRADE: DOCUMENT NOT RELEVANT---
---GRADE: DOCUMENT NOT RELEVANT---
---GRADE: DOCUMENT NOT RELEVANT---
---ASSESS GRADED DOCUMENTS---
Current Retry Count: 0
---DECISION: ALL DOCUMENTS ARE NOT RELEVANT TO QUESTION, TRANSFORM QUERY---
"Node 'gradeDocuments':"
---TRANSFORM QUERY---
Retry Count: 0
"Node 'transformQuery':"
---RETRIEVE---
Current Retry Count: 1
"Node 'retrieve':"
---CHECK DOCUMENT RELEVANCE TO QUESTION---
Current Retry Count: 2
---GRADE: DOCUMENT NOT RELEVANT---
---GRADE: DOCUMENT NOT RELEVANT---
---GRADE: DOCUMENT NOT RELEVANT---
---ASSESS GRADED DOCUMENTS---
Current Retry Count: 4
---DECISION: ALL DOCUMENTS ARE NOT RELEVANT TO QUESTION, TRANSFORM

Failed to batch ingest runs: langsmith.utils.LangSmithRateLimitError: Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError('429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch', '{"detail":"Monthly unique traces usage limit exceeded"}')
post: trace=54bd1aa5-3aa5-4bff-86c9-f0f00096e97f,id=1d341f33-1e7f-44d5-9f1f-3ab98f39f1bd; trace=54bd1aa5-3aa5-4bff-86c9-f0f00096e97f,id=b8a4f24f-e53d-4bdc-a941-6a0ccc9ed9b4; trace=54bd1aa5-3aa5-4bff-86c9-f0f00096e97f,id=b66afe6d-2bcb-4d5b-aacc-b5b187431847; trace=54bd1aa5-3aa5-4bff-86c9-f0f00096e97f,id=0a142fb4-bf51-429c-8a67-715f3694fb4b; trace=54bd1aa5-3aa5-4bff-86c9-f0f00096e97f,id=3e605235-0463-44bc-aa90-a68968f8507e; trace=54bd1aa5-3aa5-4bff-86c9-f0f00096e97f,id=9b407eaf-4489-4204-9efb-2e9a101f22dd; trace=54bd1aa5-3aa5-4bff-86c9-f0f00096e97f,id=91c4dc8a-4d43-4749-90f0-27722cc511d0; trace=54bd1aa5-3aa5-4bff-86c9-f0f00096e97f,id=688408ba-5554-4900-864d-f1f0e9d61ffb; trace=54bd1aa5-3aa5-4bff

"Node 'evaluateMetrics':"
'Metrics: '
{'FactualCorrectness': 0.0,
 'Faithfulness': 0.8333333333333334,
 'LLMContextRecall': 0.0,
 'SemanticSimilarity': 0.7190130773309972}
Question: What is the Cutty spark scholarship?
---RETRIEVE---
Current Retry Count: 0
"Node 'retrieve':"
---CHECK DOCUMENT RELEVANCE TO QUESTION---
Current Retry Count: 0
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---ASSESS GRADED DOCUMENTS---
Current Retry Count: 0
---DECISION: GENERATE---
"Node 'gradeDocuments':"
Current Retry Count: 0
---GENERATE---
---CHECK HALLUCINATIONS---
Current Retry Count: 0
Current Retry Count: 0
---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---
---GRADE GENERATION vs QUESTION---
---DECISION: GENERATION ADDRESSES QUESTION---
"Node 'generate':"
---EVALUATING METRICS---
Final Retry Count Before Reset: 0


Failed to batch ingest runs: langsmith.utils.LangSmithRateLimitError: Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError('429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch', '{"detail":"Monthly unique traces usage limit exceeded"}')
post: trace=fea9fb7c-ea94-4160-9930-0311da8a1890,id=7621aace-11b1-40d9-ac8a-3f433420c944; trace=fea9fb7c-ea94-4160-9930-0311da8a1890,id=d5f0bc7a-f77e-4b57-bfc4-37d39bc499fb; trace=fea9fb7c-ea94-4160-9930-0311da8a1890,id=118fef4d-160c-4a24-8eaf-10c801561c7e; trace=fea9fb7c-ea94-4160-9930-0311da8a1890,id=574ed12b-0254-4eca-bfd1-ea340fca0798; trace=fea9fb7c-ea94-4160-9930-0311da8a1890,id=67ebcc53-09ac-4abb-8dc7-142c4fd099c0; trace=fea9fb7c-ea94-4160-9930-0311da8a1890,id=f67b697e-7bf0-4da6-b218-91b60eb45d23; trace=fea9fb7c-ea94-4160-9930-0311da8a1890,id=8d55e595-6d3f-4a1b-8a65-4ae922e6c814; trace=fea9fb7c-ea94-4160-9930-0311da8a1890,id=bc7a66b7-0bfd-4261-8a37-3bfe7079f951; trace=fea9fb7c-ea94-4160

"Node 'evaluateMetrics':"
'Metrics: '
{'FactualCorrectness': 0.0,
 'Faithfulness': 1.0,
 'LLMContextRecall': 0.0,
 'SemanticSimilarity': 0.7339048861546659}
Question: Who is Jeffrey Canino?
---RETRIEVE---
Current Retry Count: 0
"Node 'retrieve':"
---CHECK DOCUMENT RELEVANCE TO QUESTION---
Current Retry Count: 0
---GRADE: DOCUMENT NOT RELEVANT---
---GRADE: DOCUMENT NOT RELEVANT---
---GRADE: DOCUMENT NOT RELEVANT---
---ASSESS GRADED DOCUMENTS---
Current Retry Count: 0
---DECISION: ALL DOCUMENTS ARE NOT RELEVANT TO QUESTION, TRANSFORM QUERY---
"Node 'gradeDocuments':"
---TRANSFORM QUERY---
Retry Count: 0
"Node 'transformQuery':"
---RETRIEVE---
Current Retry Count: 1
"Node 'retrieve':"
---CHECK DOCUMENT RELEVANCE TO QUESTION---
Current Retry Count: 2
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT NOT RELEVANT---
---ASSESS GRADED DOCUMENTS---
Current Retry Count: 4
---DECISION: GENERATE---
"Node 'gradeDocuments':"
Current Retry Count: 4
---GENERATE---
---CH

Failed to batch ingest runs: langsmith.utils.LangSmithRateLimitError: Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError('429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch', '{"detail":"Monthly unique traces usage limit exceeded"}')
post: trace=b20d908a-40b2-47db-b9ee-f6fb3079b921,id=1da9aacb-158c-44b5-9193-6618d67bc3b1; trace=b20d908a-40b2-47db-b9ee-f6fb3079b921,id=ef85d725-1a3a-4a88-a865-49a9881e2e95; trace=b20d908a-40b2-47db-b9ee-f6fb3079b921,id=8989abda-e0bf-4704-913b-47de879603b6; trace=b20d908a-40b2-47db-b9ee-f6fb3079b921,id=602b9ce0-febb-4285-acb3-f8517447b7ce; trace=42912acb-8fed-483b-8fcd-91dfa63a8373,id=42912acb-8fed-483b-8fcd-91dfa63a8373; trace=42912acb-8fed-483b-8fcd-91dfa63a8373,id=f13170a1-7465-4543-a35b-dd61b13f2f23; trace=42912acb-8fed-483b-8fcd-91dfa63a8373,id=2b208995-446f-49bf-b965-51aadff78550; trace=42912acb-8fed-483b-8fcd-91dfa63a8373,id=cc3dfb62-e4ca-421a-9d99-0fe10309a6a2; trace=42912acb-8fed-483b

"Node 'evaluateMetrics':"
'Metrics: '
{'FactualCorrectness': 0.0,
 'Faithfulness': 0.875,
 'LLMContextRecall': 0.0,
 'SemanticSimilarity': 0.7397888477378214}
Question: Who is carolyn Matheus
---RETRIEVE---
Current Retry Count: 0
"Node 'retrieve':"
---CHECK DOCUMENT RELEVANCE TO QUESTION---
Current Retry Count: 0
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---ASSESS GRADED DOCUMENTS---
Current Retry Count: 0
---DECISION: GENERATE---
"Node 'gradeDocuments':"
Current Retry Count: 0
---GENERATE---
---CHECK HALLUCINATIONS---
Current Retry Count: 0
Current Retry Count: 0
---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---
---GRADE GENERATION vs QUESTION---
---DECISION: GENERATION ADDRESSES QUESTION---
"Node 'generate':"
---EVALUATING METRICS---
Final Retry Count Before Reset: 0


Failed to batch ingest runs: langsmith.utils.LangSmithRateLimitError: Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError('429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch', '{"detail":"Monthly unique traces usage limit exceeded"}')
post: trace=42912acb-8fed-483b-8fcd-91dfa63a8373,id=5f100f7a-fa42-4374-bcae-b20b86e0c13a; trace=42912acb-8fed-483b-8fcd-91dfa63a8373,id=f84c2679-1adc-4f11-8b13-55672916110c; trace=42912acb-8fed-483b-8fcd-91dfa63a8373,id=f40ec282-7a58-42f2-90dc-36ef3afac42f; trace=42912acb-8fed-483b-8fcd-91dfa63a8373,id=9056f0c4-61fb-459b-be28-a75ee07803e7; trace=42912acb-8fed-483b-8fcd-91dfa63a8373,id=2f7b2118-06c8-49b1-a2ab-b27eeae2615f; trace=42912acb-8fed-483b-8fcd-91dfa63a8373,id=204c6338-9471-4389-b180-7b101d2f2a47; trace=42912acb-8fed-483b-8fcd-91dfa63a8373,id=b4f353f0-b27d-4420-b181-346a4bbaacda; trace=42912acb-8fed-483b-8fcd-91dfa63a8373,id=cc219c4d-7a9d-477e-bc42-8fb7e1191362; trace=42912acb-8fed-483b

"Node 'evaluateMetrics':"
'Metrics: '
{'FactualCorrectness': 0.0,
 'Faithfulness': 1.0,
 'LLMContextRecall': 0.0,
 'SemanticSimilarity': 0.7237063745627265}
Question: Who is considered an adult undergraduate student?
---RETRIEVE---
Current Retry Count: 0
"Node 'retrieve':"
---CHECK DOCUMENT RELEVANCE TO QUESTION---
Current Retry Count: 0
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---ASSESS GRADED DOCUMENTS---
Current Retry Count: 0
---DECISION: GENERATE---
"Node 'gradeDocuments':"
Current Retry Count: 0
---GENERATE---
---CHECK HALLUCINATIONS---
Current Retry Count: 0
Current Retry Count: 0
---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---
---GRADE GENERATION vs QUESTION---
---DECISION: GENERATION ADDRESSES QUESTION---
"Node 'generate':"
---EVALUATING METRICS---
Final Retry Count Before Reset: 0


Failed to batch ingest runs: langsmith.utils.LangSmithRateLimitError: Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError('429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch', '{"detail":"Monthly unique traces usage limit exceeded"}')
post: trace=9b4d9578-1874-4816-866c-61f68fb4b0a4,id=bb70bf85-37f4-4fe6-98e3-eafeed13a777; trace=9b4d9578-1874-4816-866c-61f68fb4b0a4,id=0673a387-eae2-4f6d-9db4-c8f5abf806d1; trace=9b4d9578-1874-4816-866c-61f68fb4b0a4,id=a2200b8a-5615-4e80-ad18-3428a912433b; trace=9b4d9578-1874-4816-866c-61f68fb4b0a4,id=e4ccf92a-e689-476c-92c5-4fc5a5bfd737; trace=9b4d9578-1874-4816-866c-61f68fb4b0a4,id=444bee35-f442-40db-8452-2a7c2124e806; trace=9b4d9578-1874-4816-866c-61f68fb4b0a4,id=5db7d52f-ff6a-4ce3-81ee-c30a98c6ac08; trace=9b4d9578-1874-4816-866c-61f68fb4b0a4,id=8cf5ec54-bcc8-45a3-889a-ae4e27b67d7e; trace=9b4d9578-1874-4816-866c-61f68fb4b0a4,id=720a87e5-b980-4be8-b0cc-113ef5bb2f0b; trace=9b4d9578-1874-4816

"Node 'evaluateMetrics':"
'Metrics: '
{'FactualCorrectness': 0.0,
 'Faithfulness': 1.0,
 'LLMContextRecall': 0.0,
 'SemanticSimilarity': 0.7430186516536245}
Question: What are some clubs/intramurals that Marist offers?
---RETRIEVE---
Current Retry Count: 0
"Node 'retrieve':"
---CHECK DOCUMENT RELEVANCE TO QUESTION---
Current Retry Count: 0
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---ASSESS GRADED DOCUMENTS---
Current Retry Count: 0
---DECISION: GENERATE---
"Node 'gradeDocuments':"
Current Retry Count: 0
---GENERATE---
---CHECK HALLUCINATIONS---
Current Retry Count: 0
Current Retry Count: 0
---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---
---GRADE GENERATION vs QUESTION---
---DECISION: GENERATION ADDRESSES QUESTION---
"Node 'generate':"
---EVALUATING METRICS---
Final Retry Count Before Reset: 0


Failed to batch ingest runs: langsmith.utils.LangSmithRateLimitError: Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError('429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch', '{"detail":"Monthly unique traces usage limit exceeded"}')
post: trace=07aeebc1-5914-475c-8b85-d9df57618091,id=33212126-4185-4974-9f7f-4eba2674c9f8; trace=07aeebc1-5914-475c-8b85-d9df57618091,id=f257bf6f-123c-4558-855d-79e6c47f92d9; trace=07aeebc1-5914-475c-8b85-d9df57618091,id=1de5ef2e-2664-4cc4-b9fb-df11e450fb5d; trace=07aeebc1-5914-475c-8b85-d9df57618091,id=a5da9735-3c00-4581-b28e-63742064a2ac; trace=f00afac9-bd18-4d12-b3f7-d1e27c2c7b32,id=f00afac9-bd18-4d12-b3f7-d1e27c2c7b32; trace=f00afac9-bd18-4d12-b3f7-d1e27c2c7b32,id=dfddd399-91b2-4297-ac2d-d621d58bec65; trace=f00afac9-bd18-4d12-b3f7-d1e27c2c7b32,id=365e7e6a-807c-4902-a59c-3cb7be5c67d0; trace=f00afac9-bd18-4d12-b3f7-d1e27c2c7b32,id=cb53f84a-342b-4a01-b782-3249bda87b28; trace=f00afac9-bd18-4d12

"Node 'evaluateMetrics':"
'Metrics: '
{'FactualCorrectness': 0.12,
 'Faithfulness': 1.0,
 'LLMContextRecall': 0.0,
 'SemanticSimilarity': 0.7092668223669483}
Question: What classes does Dr. Sally Dwyer-McNulty teach?
---RETRIEVE---
Current Retry Count: 0
"Node 'retrieve':"
---CHECK DOCUMENT RELEVANCE TO QUESTION---
Current Retry Count: 0
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---ASSESS GRADED DOCUMENTS---
Current Retry Count: 0
---DECISION: GENERATE---
"Node 'gradeDocuments':"
Current Retry Count: 0
---GENERATE---
---CHECK HALLUCINATIONS---
Current Retry Count: 0
Current Retry Count: 0
---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---
---GRADE GENERATION vs QUESTION---
---DECISION: GENERATION ADDRESSES QUESTION---
"Node 'generate':"
---EVALUATING METRICS---
Final Retry Count Before Reset: 0


Failed to batch ingest runs: langsmith.utils.LangSmithRateLimitError: Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError('429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch', '{"detail":"Monthly unique traces usage limit exceeded"}')
post: trace=f00afac9-bd18-4d12-b3f7-d1e27c2c7b32,id=3532df90-ffee-4e20-9050-ad008775dd0d; trace=f00afac9-bd18-4d12-b3f7-d1e27c2c7b32,id=f650d896-5910-41e5-9367-45a33d25fa2b; trace=f00afac9-bd18-4d12-b3f7-d1e27c2c7b32,id=efba24e6-be37-424f-a0b4-981ab58b3b7f; trace=f00afac9-bd18-4d12-b3f7-d1e27c2c7b32,id=f0933781-2e36-4e2c-a551-dd56f71af1ed; trace=f00afac9-bd18-4d12-b3f7-d1e27c2c7b32,id=2125aac8-c591-4d15-b140-692a2f706fb0; trace=f00afac9-bd18-4d12-b3f7-d1e27c2c7b32,id=25a72b91-6dcb-4c24-afda-75d6f1abe956; trace=f00afac9-bd18-4d12-b3f7-d1e27c2c7b32,id=95bd5e16-a56a-4659-a3c1-0798a5a1841e; trace=f00afac9-bd18-4d12-b3f7-d1e27c2c7b32,id=59bbd508-2e9c-43bd-b403-866248d422ec; trace=f00afac9-bd18-4d12

"Node 'evaluateMetrics':"
'Metrics: '
{'FactualCorrectness': 0.0,
 'Faithfulness': 1.0,
 'LLMContextRecall': 0.0,
 'SemanticSimilarity': 0.7200529790612678}
Question: Where did Ivette Romero get her PHD?
---RETRIEVE---
Current Retry Count: 0
"Node 'retrieve':"
---CHECK DOCUMENT RELEVANCE TO QUESTION---
Current Retry Count: 0
---GRADE: DOCUMENT NOT RELEVANT---
---GRADE: DOCUMENT NOT RELEVANT---
---GRADE: DOCUMENT NOT RELEVANT---
---ASSESS GRADED DOCUMENTS---
Current Retry Count: 0
---DECISION: ALL DOCUMENTS ARE NOT RELEVANT TO QUESTION, TRANSFORM QUERY---
"Node 'gradeDocuments':"
---TRANSFORM QUERY---
Retry Count: 0
"Node 'transformQuery':"
---RETRIEVE---
Current Retry Count: 1
"Node 'retrieve':"
---CHECK DOCUMENT RELEVANCE TO QUESTION---
Current Retry Count: 2
---GRADE: DOCUMENT NOT RELEVANT---
---GRADE: DOCUMENT NOT RELEVANT---
---GRADE: DOCUMENT NOT RELEVANT---
---ASSESS GRADED DOCUMENTS---
Current Retry Count: 4
---DECISION: ALL DOCUMENTS ARE NOT RELEVANT TO QUESTION, TRANSFORM QUER

Failed to batch ingest runs: langsmith.utils.LangSmithRateLimitError: Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError('429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch', '{"detail":"Monthly unique traces usage limit exceeded"}')
post: trace=25d13bbf-5c4f-4793-9c27-ea01e9a01b56,id=b2c93e51-40d5-4e9d-9232-1c60a6f50c25; trace=25d13bbf-5c4f-4793-9c27-ea01e9a01b56,id=9ae2f3cd-6f8f-4a08-8b0d-d2cc489ce774; trace=25d13bbf-5c4f-4793-9c27-ea01e9a01b56,id=34b53929-1e37-416d-a8d0-2eb64ef6f905; trace=25d13bbf-5c4f-4793-9c27-ea01e9a01b56,id=3cfbbc16-05ae-4040-ac0e-dbe5546df5a2; trace=25d13bbf-5c4f-4793-9c27-ea01e9a01b56,id=0ecdcfbf-23d3-4ebc-88b2-8403a38d657d; trace=25d13bbf-5c4f-4793-9c27-ea01e9a01b56,id=4d7449d2-24c7-4a25-a55e-3f0caea5f58b; trace=25d13bbf-5c4f-4793-9c27-ea01e9a01b56,id=182d9239-a3e8-4664-a7b6-8834a195d592; trace=25d13bbf-5c4f-4793-9c27-ea01e9a01b56,id=37687113-e274-4613-87cd-a46a1ec717a0; trace=25d13bbf-5c4f-4793

"Node 'evaluateMetrics':"
'Metrics: '
{'FactualCorrectness': 0.0,
 'Faithfulness': 1.0,
 'LLMContextRecall': 0.0,
 'SemanticSimilarity': 0.7242373241308135}
Question: Who is Dr. Pau-San Haruta?
---RETRIEVE---
Current Retry Count: 0
"Node 'retrieve':"
---CHECK DOCUMENT RELEVANCE TO QUESTION---
Current Retry Count: 0
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---ASSESS GRADED DOCUMENTS---
Current Retry Count: 0
---DECISION: GENERATE---
"Node 'gradeDocuments':"
Current Retry Count: 0
---GENERATE---
---CHECK HALLUCINATIONS---
Current Retry Count: 0
Current Retry Count: 0
---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---
---GRADE GENERATION vs QUESTION---
---DECISION: GENERATION ADDRESSES QUESTION---
"Node 'generate':"
---EVALUATING METRICS---
Final Retry Count Before Reset: 0


Failed to batch ingest runs: langsmith.utils.LangSmithRateLimitError: Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError('429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch', '{"detail":"Monthly unique traces usage limit exceeded"}')
post: trace=25d13bbf-5c4f-4793-9c27-ea01e9a01b56,id=cf2d89b7-e90b-44a8-990b-ab684f13f80e; trace=25d13bbf-5c4f-4793-9c27-ea01e9a01b56,id=6421c479-180d-4a94-8c89-5bcb5fc8b5d0; trace=3294124d-bb21-45b0-b139-febf701ce711,id=3294124d-bb21-45b0-b139-febf701ce711; trace=3294124d-bb21-45b0-b139-febf701ce711,id=e40f7df2-adf2-4a7f-b899-f11898a16b0f; trace=3294124d-bb21-45b0-b139-febf701ce711,id=f46654e2-0cd4-4e3a-a229-2c0579756a14; trace=3294124d-bb21-45b0-b139-febf701ce711,id=13c8fa73-7540-42e9-a4d6-34241e6d43ad; trace=3294124d-bb21-45b0-b139-febf701ce711,id=c725d191-0aaa-4f49-8619-e734e44d0093; trace=3294124d-bb21-45b0-b139-febf701ce711,id=401a8691-4e3e-4235-864a-7e19132c7c5a; trace=3294124d-bb21-45b0

"Node 'evaluateMetrics':"
'Metrics: '
{'FactualCorrectness': 0.0,
 'Faithfulness': 1.0,
 'LLMContextRecall': 0.0,
 'SemanticSimilarity': 0.7350964855864434}
Question: What is the distribution threshold that should be reached by students.
---RETRIEVE---
Current Retry Count: 0
"Node 'retrieve':"
---CHECK DOCUMENT RELEVANCE TO QUESTION---
Current Retry Count: 0
---GRADE: DOCUMENT NOT RELEVANT---
---GRADE: DOCUMENT NOT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---ASSESS GRADED DOCUMENTS---
Current Retry Count: 0
---DECISION: GENERATE---
"Node 'gradeDocuments':"
Current Retry Count: 0
---GENERATE---
---CHECK HALLUCINATIONS---
Current Retry Count: 0
Current Retry Count: 0
---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---
---GRADE GENERATION vs QUESTION---
---DECISION: GENERATION ADDRESSES QUESTION---
"Node 'generate':"
---EVALUATING METRICS---
Final Retry Count Before Reset: 0


Failed to batch ingest runs: langsmith.utils.LangSmithRateLimitError: Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError('429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch', '{"detail":"Monthly unique traces usage limit exceeded"}')
post: trace=3294124d-bb21-45b0-b139-febf701ce711,id=c238a01d-fe78-4898-a716-dcce7fc5c6aa; trace=3294124d-bb21-45b0-b139-febf701ce711,id=3c7ef8ff-d07f-4be4-bba3-a17fc3b38f76; trace=3294124d-bb21-45b0-b139-febf701ce711,id=d79af5c9-e8b7-4d18-8205-868de87f058c; trace=3294124d-bb21-45b0-b139-febf701ce711,id=c3b428fd-6665-4b87-801f-9447d729a385; trace=3294124d-bb21-45b0-b139-febf701ce711,id=f1db12b6-12e1-4736-8f75-7985a4413af1; trace=3294124d-bb21-45b0-b139-febf701ce711,id=2aa40cd4-1582-4b31-bd43-b90c169714b5; trace=3294124d-bb21-45b0-b139-febf701ce711,id=715c3c85-05c3-4de6-8b2d-848abaab2546; trace=3294124d-bb21-45b0-b139-febf701ce711,id=695927e9-e2fb-470c-9f92-c6a984d40063; trace=41e83ddb-3a4c-4d1d

"Node 'evaluateMetrics':"
'Metrics: '
{'FactualCorrectness': 0.0,
 'Faithfulness': 0.6,
 'LLMContextRecall': 0.0,
 'SemanticSimilarity': 0.7165110697634512}
Question: What provides students proficiency in a language they already studied or new beginning.
---RETRIEVE---
Current Retry Count: 0
"Node 'retrieve':"
---CHECK DOCUMENT RELEVANCE TO QUESTION---
Current Retry Count: 0
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---ASSESS GRADED DOCUMENTS---
Current Retry Count: 0
---DECISION: GENERATE---
"Node 'gradeDocuments':"
Current Retry Count: 0
---GENERATE---
---CHECK HALLUCINATIONS---
Current Retry Count: 0
Current Retry Count: 0
---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---
---GRADE GENERATION vs QUESTION---
---DECISION: GENERATION ADDRESSES QUESTION---
"Node 'generate':"
---EVALUATING METRICS---
Final Retry Count Before Reset: 0
"Node 'evaluateMetrics':"
'Metrics: '
{'FactualCorrectness': 0.0,
 'Faithfulness': 1.0,
 'LLMContextReca

Failed to batch ingest runs: langsmith.utils.LangSmithRateLimitError: Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError('429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch', '{"detail":"Monthly unique traces usage limit exceeded"}')
post: trace=b0b55853-c10d-405c-9835-7e60473463dc,id=270e0439-def3-4a7f-be6b-232ceaac97db; trace=b0b55853-c10d-405c-9835-7e60473463dc,id=3d81438d-2ab8-46f3-bb7d-c04714f3e62b; trace=b0b55853-c10d-405c-9835-7e60473463dc,id=24049af6-e2b2-456f-8d63-c8d142dac84a; trace=b0b55853-c10d-405c-9835-7e60473463dc,id=668a2851-9d40-4a79-ae70-0dc63690aae1; trace=b0b55853-c10d-405c-9835-7e60473463dc,id=1b032c2f-8829-4b4a-87b3-7fb992ba9942; trace=b0b55853-c10d-405c-9835-7e60473463dc,id=9e9c0b45-450c-4db1-849b-d003c93e544d; trace=b0b55853-c10d-405c-9835-7e60473463dc,id=ccefd2a5-11b9-4202-9ede-3279b13fd2e2; trace=b0b55853-c10d-405c-9835-7e60473463dc,id=52e21ca4-0624-490f-9ae3-76498d20fa79; trace=b0b55853-c10d-405c

---CHECK HALLUCINATIONS---
Current Retry Count: 0
Current Retry Count: 0
---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---
---GRADE GENERATION vs QUESTION---
---DECISION: GENERATION ADDRESSES QUESTION---
"Node 'generate':"
---EVALUATING METRICS---
Final Retry Count Before Reset: 0
"Node 'evaluateMetrics':"
'Metrics: '
{'FactualCorrectness': 0.0,
 'Faithfulness': 1.0,
 'LLMContextRecall': 0.0,
 'SemanticSimilarity': 0.7150418710955706}
Question: Who is the internship coordinator for American Studies?
---RETRIEVE---
Current Retry Count: 0
"Node 'retrieve':"
---CHECK DOCUMENT RELEVANCE TO QUESTION---
Current Retry Count: 0
---GRADE: DOCUMENT NOT RELEVANT---
---GRADE: DOCUMENT NOT RELEVANT---
---GRADE: DOCUMENT NOT RELEVANT---
---ASSESS GRADED DOCUMENTS---
Current Retry Count: 0
---DECISION: ALL DOCUMENTS ARE NOT RELEVANT TO QUESTION, TRANSFORM QUERY---
"Node 'gradeDocuments':"
---TRANSFORM QUERY---
Retry Count: 0
"Node 'transformQuery':"
---RETRIEVE---
Current Retry Count: 1
"Node 'retr

Failed to batch ingest runs: langsmith.utils.LangSmithRateLimitError: Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError('429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch', '{"detail":"Monthly unique traces usage limit exceeded"}')
post: trace=3767272a-4ecd-4972-890d-18989bcc5b4b,id=fb41f3c8-38ca-4376-944a-347fff4bffd6; trace=3767272a-4ecd-4972-890d-18989bcc5b4b,id=7a7972d0-7a33-44fc-89de-3398ff40eea6; trace=3767272a-4ecd-4972-890d-18989bcc5b4b,id=1f183710-4e91-4347-8d0b-8f5ef69afba4; trace=3767272a-4ecd-4972-890d-18989bcc5b4b,id=98d0ded4-f462-4d25-98a4-9d5698454537; trace=3767272a-4ecd-4972-890d-18989bcc5b4b,id=16862c2c-04b4-4d1f-9c5c-cd195ef109e0; trace=3767272a-4ecd-4972-890d-18989bcc5b4b,id=b9f972ba-6cd0-4699-80a8-9ba8c3b70389; trace=3767272a-4ecd-4972-890d-18989bcc5b4b,id=6cf9bcb3-8100-443e-8b67-e07d0adcba35; trace=3767272a-4ecd-4972-890d-18989bcc5b4b,id=bf459708-961c-446f-aae0-960c01fdb56a; trace=3767272a-4ecd-4972

---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---ASSESS GRADED DOCUMENTS---
Current Retry Count: 0
---DECISION: GENERATE---
"Node 'gradeDocuments':"
Current Retry Count: 0
---GENERATE---
---CHECK HALLUCINATIONS---
Current Retry Count: 0
Current Retry Count: 0
---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---
---GRADE GENERATION vs QUESTION---
---DECISION: GENERATION ADDRESSES QUESTION---
"Node 'generate':"
---EVALUATING METRICS---
Final Retry Count Before Reset: 0


Failed to batch ingest runs: langsmith.utils.LangSmithRateLimitError: Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError('429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch', '{"detail":"Monthly unique traces usage limit exceeded"}')
post: trace=ff0385d9-c5ac-4358-8f05-8e39af947c29,id=9415402b-bfa6-44a1-b549-df1c577b99e0; trace=ff0385d9-c5ac-4358-8f05-8e39af947c29,id=64622bc8-95fd-4385-9806-4fe09cfaa8da; trace=ff0385d9-c5ac-4358-8f05-8e39af947c29,id=2a5686d4-c7ee-4ff4-ab09-e4a1cd5c2946; trace=ff0385d9-c5ac-4358-8f05-8e39af947c29,id=9ef3e32d-fad5-47fb-94a6-1a27d97d82cb; trace=ff0385d9-c5ac-4358-8f05-8e39af947c29,id=a9d69528-ddf9-45a1-916b-9640be7a5a21; trace=ff0385d9-c5ac-4358-8f05-8e39af947c29,id=d0474c2a-9e58-4448-91e5-cdbdb53572ec; trace=ff0385d9-c5ac-4358-8f05-8e39af947c29,id=ce21b0eb-e3f8-4447-b8eb-155579657a41; trace=ff0385d9-c5ac-4358-8f05-8e39af947c29,id=116967b3-726f-4373-bd39-4c09033d2f34; trace=ff0385d9-c5ac-4358

"Node 'evaluateMetrics':"
'Metrics: '
{'FactualCorrectness': 0.89,
 'Faithfulness': 0.8333333333333334,
 'LLMContextRecall': 0.0,
 'SemanticSimilarity': 0.7328505649296267}
Question: School housing for graduate students?
---RETRIEVE---
Current Retry Count: 0
"Node 'retrieve':"
---CHECK DOCUMENT RELEVANCE TO QUESTION---
Current Retry Count: 0
---GRADE: DOCUMENT NOT RELEVANT---
---GRADE: DOCUMENT NOT RELEVANT---
---GRADE: DOCUMENT NOT RELEVANT---
---ASSESS GRADED DOCUMENTS---
Current Retry Count: 0
---DECISION: ALL DOCUMENTS ARE NOT RELEVANT TO QUESTION, TRANSFORM QUERY---
"Node 'gradeDocuments':"
---TRANSFORM QUERY---
Retry Count: 0
"Node 'transformQuery':"
---RETRIEVE---
Current Retry Count: 1
"Node 'retrieve':"
---CHECK DOCUMENT RELEVANCE TO QUESTION---
Current Retry Count: 2
---GRADE: DOCUMENT NOT RELEVANT---
---GRADE: DOCUMENT NOT RELEVANT---
---GRADE: DOCUMENT NOT RELEVANT---
---ASSESS GRADED DOCUMENTS---
Current Retry Count: 4
---DECISION: ALL DOCUMENTS ARE NOT RELEVANT TO QUESTIO

Failed to batch ingest runs: langsmith.utils.LangSmithRateLimitError: Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError('429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch', '{"detail":"Monthly unique traces usage limit exceeded"}')
post: trace=ff0385d9-c5ac-4358-8f05-8e39af947c29,id=7b7e603b-be3f-4731-9426-70d8502a3507; trace=ff0385d9-c5ac-4358-8f05-8e39af947c29,id=0cf07ebd-459a-42ee-9d38-adb8859bb033; trace=ff0385d9-c5ac-4358-8f05-8e39af947c29,id=eed34127-e98f-4911-9d57-49c4cc86ce4e; trace=ff0385d9-c5ac-4358-8f05-8e39af947c29,id=d05637d0-d9fd-4004-a4d3-dce2f5594d8b; trace=47a20396-93d2-4bec-8bc8-6aac754a7f3a,id=47a20396-93d2-4bec-8bc8-6aac754a7f3a; trace=47a20396-93d2-4bec-8bc8-6aac754a7f3a,id=10bf8056-1a29-465f-ae99-094b7c46bfc8; trace=47a20396-93d2-4bec-8bc8-6aac754a7f3a,id=e34e1598-32cd-4237-aaaf-9e1e8cf46155; trace=47a20396-93d2-4bec-8bc8-6aac754a7f3a,id=5cbae59e-dd1e-44e7-8609-eeb92e1fc387; trace=47a20396-93d2-4bec

"Node 'evaluateMetrics':"
'Metrics: '
{'FactualCorrectness': 0.0,
 'Faithfulness': 1.0,
 'LLMContextRecall': 0.0,
 'SemanticSimilarity': 0.7311821665888076}
Question: What is the directors of the arts fashion foundation?
---RETRIEVE---
Current Retry Count: 0
"Node 'retrieve':"
---CHECK DOCUMENT RELEVANCE TO QUESTION---
Current Retry Count: 0
---GRADE: DOCUMENT NOT RELEVANT---
---GRADE: DOCUMENT NOT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---ASSESS GRADED DOCUMENTS---
Current Retry Count: 0
---DECISION: GENERATE---
"Node 'gradeDocuments':"
Current Retry Count: 0
---GENERATE---
---CHECK HALLUCINATIONS---
Current Retry Count: 0
Current Retry Count: 0
---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---
---GRADE GENERATION vs QUESTION---
---DECISION: GENERATION DOES NOT ADDRESS QUESTION---
"Node 'generate':"
---EVALUATING METRICS---
Final Retry Count Before Reset: 0
---TRANSFORM QUERY---
Retry Count: 0
"Node 'transformQuery':"
"Node 'evaluateMetrics':"
'Metrics: '
{'FactualCorrectness': 

Failed to batch ingest runs: langsmith.utils.LangSmithRateLimitError: Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError('429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch', '{"detail":"Monthly unique traces usage limit exceeded"}')
post: trace=2b978516-3cd7-4961-a573-8a225a50dfcc,id=ff735aa7-5558-4972-bf02-fc2ed44defd2; trace=2b978516-3cd7-4961-a573-8a225a50dfcc,id=58789b38-bd40-453e-a702-3b06758837a6; trace=2b978516-3cd7-4961-a573-8a225a50dfcc,id=e6028467-548a-47e8-8ce8-eba501ce029b; trace=2b978516-3cd7-4961-a573-8a225a50dfcc,id=5a45ff82-9ab0-4aac-84af-be2b315c9e1a; trace=2b978516-3cd7-4961-a573-8a225a50dfcc,id=a4cbd286-4622-45dd-8178-d1a0108f0d0c; trace=2b978516-3cd7-4961-a573-8a225a50dfcc,id=d30eef2e-aeec-47ed-a0d9-a8a8a81f2b69; trace=2b978516-3cd7-4961-a573-8a225a50dfcc,id=b9198137-59a5-43ab-9d90-9eade241a3ee; trace=2b978516-3cd7-4961-a573-8a225a50dfcc,id=c5135234-984f-4570-afcb-86e8ce1781f5; trace=2b978516-3cd7-4961

"Node 'evaluateMetrics':"
'Metrics: '
{'FactualCorrectness': 0.73,
 'Faithfulness': 1.0,
 'LLMContextRecall': 0.0,
 'SemanticSimilarity': 0.7776764995161843}
Question: How can I find out which courses are equivalent to courses offered at Marist?
---RETRIEVE---
Current Retry Count: 0
"Node 'retrieve':"
---CHECK DOCUMENT RELEVANCE TO QUESTION---
Current Retry Count: 0
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---ASSESS GRADED DOCUMENTS---
Current Retry Count: 0
---DECISION: GENERATE---
"Node 'gradeDocuments':"
Current Retry Count: 0
---GENERATE---
---CHECK HALLUCINATIONS---
Current Retry Count: 0
Current Retry Count: 0
---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---
---GRADE GENERATION vs QUESTION---
---DECISION: GENERATION ADDRESSES QUESTION---
"Node 'generate':"
---EVALUATING METRICS---
Final Retry Count Before Reset: 0


Failed to batch ingest runs: langsmith.utils.LangSmithRateLimitError: Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError('429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch', '{"detail":"Monthly unique traces usage limit exceeded"}')
post: trace=dc257346-0a1d-402c-9c36-8e4f407888c2,id=12ed300a-aa68-463b-a490-1986d2fb5fbb; trace=dc257346-0a1d-402c-9c36-8e4f407888c2,id=9cc897e0-d42d-41e4-8db0-3c3293cb0350; trace=dc257346-0a1d-402c-9c36-8e4f407888c2,id=0e57b8e5-a02c-4e17-b68e-100d6bcce6ab; trace=dc257346-0a1d-402c-9c36-8e4f407888c2,id=6c6f4a0f-703a-474c-b81c-fc1fcf20144d; trace=dc257346-0a1d-402c-9c36-8e4f407888c2,id=29cf9c43-3957-4a78-8986-46facac2e3e2; trace=dc257346-0a1d-402c-9c36-8e4f407888c2,id=854038df-2067-4337-a19e-36692bc22cb8; trace=dc257346-0a1d-402c-9c36-8e4f407888c2,id=2837b981-761e-4e8a-8e46-0375a17ccf60; trace=dc257346-0a1d-402c-9c36-8e4f407888c2,id=8178aef9-bdca-43ee-8e34-d8ca11a521f9; trace=dc257346-0a1d-402c

"Node 'evaluateMetrics':"
'Metrics: '
{'FactualCorrectness': 0.0,
 'Faithfulness': 1.0,
 'LLMContextRecall': 0.0,
 'SemanticSimilarity': 0.7300990093032492}
Question: Annamaria Maciocia info
---RETRIEVE---
Current Retry Count: 0
"Node 'retrieve':"
---CHECK DOCUMENT RELEVANCE TO QUESTION---
Current Retry Count: 0
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---ASSESS GRADED DOCUMENTS---
Current Retry Count: 0
---DECISION: GENERATE---
"Node 'gradeDocuments':"
Current Retry Count: 0
---GENERATE---
---CHECK HALLUCINATIONS---
Current Retry Count: 0
Current Retry Count: 0
---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---
---GRADE GENERATION vs QUESTION---
---DECISION: GENERATION ADDRESSES QUESTION---
"Node 'generate':"
---EVALUATING METRICS---
Final Retry Count Before Reset: 0


Failed to batch ingest runs: langsmith.utils.LangSmithRateLimitError: Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError('429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch', '{"detail":"Monthly unique traces usage limit exceeded"}')
post: trace=ad293b95-038a-499d-8451-c9d1c79cabf7,id=f7743ce5-8cd0-4636-aabc-30a874a40ea7; trace=ad293b95-038a-499d-8451-c9d1c79cabf7,id=1b5a1e7e-3ee0-4783-9ea0-753d0c5ccfca; trace=ad293b95-038a-499d-8451-c9d1c79cabf7,id=e8ab9da9-3d4a-44e5-928d-8b85cdeef149; trace=ad293b95-038a-499d-8451-c9d1c79cabf7,id=94249022-858f-42f4-9d33-739203de6cee; trace=1ed35fb6-286d-4909-b0c8-823c33b82057,id=1ed35fb6-286d-4909-b0c8-823c33b82057; trace=1ed35fb6-286d-4909-b0c8-823c33b82057,id=b5783525-608d-41bc-9033-8b61be6734c8; trace=1ed35fb6-286d-4909-b0c8-823c33b82057,id=d5e215bb-8db7-4c19-b7bc-a0113ce78399; trace=1ed35fb6-286d-4909-b0c8-823c33b82057,id=2ef9e6a7-66c5-4e52-b0d3-4b92a1c2fee5; trace=1ed35fb6-286d-4909

"Node 'evaluateMetrics':"
'Metrics: '
{'FactualCorrectness': 0.0,
 'Faithfulness': 0.75,
 'LLMContextRecall': 0.0,
 'SemanticSimilarity': 0.7251325622395788}
Question: Who leads the Mindset list team?
---RETRIEVE---
Current Retry Count: 0
"Node 'retrieve':"
---CHECK DOCUMENT RELEVANCE TO QUESTION---
Current Retry Count: 0
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---ASSESS GRADED DOCUMENTS---
Current Retry Count: 0
---DECISION: GENERATE---
"Node 'gradeDocuments':"
Current Retry Count: 0
---GENERATE---
---CHECK HALLUCINATIONS---
Current Retry Count: 0
Current Retry Count: 0
---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---
---GRADE GENERATION vs QUESTION---
---DECISION: GENERATION ADDRESSES QUESTION---
"Node 'generate':"
---EVALUATING METRICS---
Final Retry Count Before Reset: 0


Failed to batch ingest runs: langsmith.utils.LangSmithRateLimitError: Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError('429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch', '{"detail":"Monthly unique traces usage limit exceeded"}')
post: trace=243f1eb2-05d3-4bab-99a8-5c035f44e3ae,id=74e5d2cf-64d1-49db-8c26-95342b97933e; trace=243f1eb2-05d3-4bab-99a8-5c035f44e3ae,id=e13a4723-6ad3-439e-9102-865895ecc530; trace=243f1eb2-05d3-4bab-99a8-5c035f44e3ae,id=8114975c-74b1-4a6c-892b-acb0890cc3e0; trace=243f1eb2-05d3-4bab-99a8-5c035f44e3ae,id=c23c3fef-197d-43e5-bae3-4491bccbe371; trace=243f1eb2-05d3-4bab-99a8-5c035f44e3ae,id=e73fa87c-c4e5-47c4-b5ec-2b5349bef0f1; trace=243f1eb2-05d3-4bab-99a8-5c035f44e3ae,id=839a1909-5721-455f-ba6e-ec68049343c2; trace=243f1eb2-05d3-4bab-99a8-5c035f44e3ae,id=16b18594-603a-4142-94b9-53a6b2942b5f; trace=243f1eb2-05d3-4bab-99a8-5c035f44e3ae,id=92d410f8-7960-4f13-950f-fd56cd866eee; trace=243f1eb2-05d3-4bab

"Node 'evaluateMetrics':"
'Metrics: '
{'FactualCorrectness': 0.0,
 'Faithfulness': 0.625,
 'LLMContextRecall': 0.0,
 'SemanticSimilarity': 0.7253946066642027}
Question: Tell me about Marist housing history
---RETRIEVE---
Current Retry Count: 0
"Node 'retrieve':"
---CHECK DOCUMENT RELEVANCE TO QUESTION---
Current Retry Count: 0
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---ASSESS GRADED DOCUMENTS---
Current Retry Count: 0
---DECISION: GENERATE---
"Node 'gradeDocuments':"
Current Retry Count: 0
---GENERATE---
---CHECK HALLUCINATIONS---
Current Retry Count: 0
Current Retry Count: 0
---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---
---GRADE GENERATION vs QUESTION---
---DECISION: GENERATION ADDRESSES QUESTION---
"Node 'generate':"
---EVALUATING METRICS---
Final Retry Count Before Reset: 0


Failed to batch ingest runs: langsmith.utils.LangSmithRateLimitError: Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError('429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch', '{"detail":"Monthly unique traces usage limit exceeded"}')
post: trace=9efa9fdd-6cce-48c3-b0ca-27f515112117,id=bbc4fdac-c1c9-4d62-85b5-c767a830af27; trace=9efa9fdd-6cce-48c3-b0ca-27f515112117,id=34025d4d-66da-49fc-8db6-fcbb11472e5a; trace=9efa9fdd-6cce-48c3-b0ca-27f515112117,id=506d1bc4-5359-47da-98d0-5efc37694721; trace=9efa9fdd-6cce-48c3-b0ca-27f515112117,id=eaf69748-2527-42b9-a487-4dd690f4b95c; trace=9efa9fdd-6cce-48c3-b0ca-27f515112117,id=0c27ccbf-6963-45fe-8a72-4a45c5350b99; trace=9efa9fdd-6cce-48c3-b0ca-27f515112117,id=e0d2d62d-d5a3-4be4-b63c-26a7f4976a51; trace=9efa9fdd-6cce-48c3-b0ca-27f515112117,id=79cecc7e-9e91-4d80-b368-4f0a6f6586ef; trace=9efa9fdd-6cce-48c3-b0ca-27f515112117,id=36b276d6-6b9b-45ff-8f71-7218459ae90a; trace=9efa9fdd-6cce-48c3

"Node 'evaluateMetrics':"
'Metrics: '
{'FactualCorrectness': 0.0,
 'Faithfulness': 1.0,
 'LLMContextRecall': 0.0,
 'SemanticSimilarity': 0.7137745824662732}
Question: Is diversity embraced at Marist?
---RETRIEVE---
Current Retry Count: 0
"Node 'retrieve':"
---CHECK DOCUMENT RELEVANCE TO QUESTION---
Current Retry Count: 0
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---ASSESS GRADED DOCUMENTS---
Current Retry Count: 0
---DECISION: GENERATE---
"Node 'gradeDocuments':"
Current Retry Count: 0
---GENERATE---
---CHECK HALLUCINATIONS---
Current Retry Count: 0
Current Retry Count: 0
---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---
---GRADE GENERATION vs QUESTION---
---DECISION: GENERATION ADDRESSES QUESTION---
"Node 'generate':"
---EVALUATING METRICS---
Final Retry Count Before Reset: 0


Failed to batch ingest runs: langsmith.utils.LangSmithRateLimitError: Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError('429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch', '{"detail":"Monthly unique traces usage limit exceeded"}')
post: trace=2ab7a29e-d59b-40c2-a083-ddd1ab541817,id=aaa32cb2-11dc-4a28-b39b-3860c14cb57a; trace=2ab7a29e-d59b-40c2-a083-ddd1ab541817,id=f4078edd-160f-48d0-80be-5436948fff6e; trace=2ab7a29e-d59b-40c2-a083-ddd1ab541817,id=8c046b9f-29ec-4969-aab8-10ddafaf51b2; trace=2ab7a29e-d59b-40c2-a083-ddd1ab541817,id=f20cb6cc-b1a3-4833-a408-168e52577103; trace=2ab7a29e-d59b-40c2-a083-ddd1ab541817,id=5f85ff75-234b-458d-8ec6-23732fc58c27; trace=2ab7a29e-d59b-40c2-a083-ddd1ab541817,id=f565e9b0-3abc-4dcc-9924-9d24ddf51338; trace=2ab7a29e-d59b-40c2-a083-ddd1ab541817,id=cf351f9d-42e9-4c08-8520-37fe0926dd53; trace=2ab7a29e-d59b-40c2-a083-ddd1ab541817,id=06d1ffdf-657c-4248-8ec9-fdb5da700629; trace=2ab7a29e-d59b-40c2

"Node 'evaluateMetrics':"
'Metrics: '
{'FactualCorrectness': 0.0,
 'Faithfulness': 1.0,
 'LLMContextRecall': 0.0,
 'SemanticSimilarity': 0.7264752373259743}
Question: Why is a red fox the college's mascot?
---RETRIEVE---
Current Retry Count: 0
"Node 'retrieve':"
---CHECK DOCUMENT RELEVANCE TO QUESTION---
Current Retry Count: 0
---GRADE: DOCUMENT NOT RELEVANT---
---GRADE: DOCUMENT NOT RELEVANT---
---GRADE: DOCUMENT NOT RELEVANT---
---ASSESS GRADED DOCUMENTS---
Current Retry Count: 0
---DECISION: ALL DOCUMENTS ARE NOT RELEVANT TO QUESTION, TRANSFORM QUERY---
"Node 'gradeDocuments':"
---TRANSFORM QUERY---
Retry Count: 0
"Node 'transformQuery':"
---RETRIEVE---
Current Retry Count: 1
"Node 'retrieve':"
---CHECK DOCUMENT RELEVANCE TO QUESTION---
Current Retry Count: 2
---GRADE: DOCUMENT NOT RELEVANT---
---GRADE: DOCUMENT NOT RELEVANT---
---GRADE: DOCUMENT NOT RELEVANT---
---ASSESS GRADED DOCUMENTS---
Current Retry Count: 4
---DECISION: ALL DOCUMENTS ARE NOT RELEVANT TO QUESTION, TRANSFORM QU

Failed to batch ingest runs: langsmith.utils.LangSmithRateLimitError: Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError('429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch', '{"detail":"Monthly unique traces usage limit exceeded"}')
post: trace=c0468028-094e-4a5c-8f06-0cb72fdfb1cf,id=a2ef104e-edcb-4a21-8ad0-9b468d5426a6; trace=c0468028-094e-4a5c-8f06-0cb72fdfb1cf,id=d9ab2e0e-fc25-4b5f-addc-211ca8000450; trace=c0468028-094e-4a5c-8f06-0cb72fdfb1cf,id=ffb53de4-64d2-42ea-9f08-3fa6490751a9; trace=c0468028-094e-4a5c-8f06-0cb72fdfb1cf,id=6f8f2991-f225-4a9e-9921-634a585c4539; trace=16acd230-1c3c-4639-a929-3b2bc99465df,id=16acd230-1c3c-4639-a929-3b2bc99465df; trace=16acd230-1c3c-4639-a929-3b2bc99465df,id=4498d595-1e2d-473b-9f01-07c4005d6236; trace=16acd230-1c3c-4639-a929-3b2bc99465df,id=a6919171-436b-44b1-90e9-c3e5e819e3c3; trace=16acd230-1c3c-4639-a929-3b2bc99465df,id=b8589938-fbdf-43db-a8d3-3fb512504b4f; trace=16acd230-1c3c-4639

"Node 'evaluateMetrics':"
'Metrics: '
{'FactualCorrectness': 0.0,
 'Faithfulness': 1.0,
 'LLMContextRecall': 0.0,
 'SemanticSimilarity': 0.7210553027122983}
Question: Can any school be a member of the American talent initiative?
---RETRIEVE---
Current Retry Count: 0
"Node 'retrieve':"
---CHECK DOCUMENT RELEVANCE TO QUESTION---
Current Retry Count: 0
---GRADE: DOCUMENT NOT RELEVANT---
---GRADE: DOCUMENT NOT RELEVANT---
---GRADE: DOCUMENT NOT RELEVANT---
---ASSESS GRADED DOCUMENTS---
Current Retry Count: 0
---DECISION: ALL DOCUMENTS ARE NOT RELEVANT TO QUESTION, TRANSFORM QUERY---
"Node 'gradeDocuments':"
---TRANSFORM QUERY---
Retry Count: 0
"Node 'transformQuery':"
---RETRIEVE---
Current Retry Count: 1
"Node 'retrieve':"
---CHECK DOCUMENT RELEVANCE TO QUESTION---
Current Retry Count: 2
---GRADE: DOCUMENT NOT RELEVANT---
---GRADE: DOCUMENT NOT RELEVANT---
---GRADE: DOCUMENT NOT RELEVANT---
---ASSESS GRADED DOCUMENTS---
Current Retry Count: 4
---DECISION: ALL DOCUMENTS ARE NOT RELEVANT TO

Failed to batch ingest runs: langsmith.utils.LangSmithRateLimitError: Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError('429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch', '{"detail":"Monthly unique traces usage limit exceeded"}')
post: trace=16acd230-1c3c-4639-a929-3b2bc99465df,id=c3b3536e-e0d5-492a-bcb8-e1e1c22a0c0e; trace=16acd230-1c3c-4639-a929-3b2bc99465df,id=6c5a7149-52aa-42c5-99fa-eaa82bfc1360; trace=16acd230-1c3c-4639-a929-3b2bc99465df,id=60c5c81f-0ee1-47f3-89fe-e038c645244e; trace=16acd230-1c3c-4639-a929-3b2bc99465df,id=5cf3aa17-f902-4a6e-a00d-11fcc0572cc2; trace=16acd230-1c3c-4639-a929-3b2bc99465df,id=7506782c-3a94-44ca-aad4-c2dc7007bddc; trace=16acd230-1c3c-4639-a929-3b2bc99465df,id=bfbdf0b4-062a-4e17-832c-245a120af860; trace=16acd230-1c3c-4639-a929-3b2bc99465df,id=428dc3b2-08fe-4974-8f54-6e4b75487f51; trace=16acd230-1c3c-4639-a929-3b2bc99465df,id=db349d78-10ca-4615-ae53-ebb63beb600f; trace=16acd230-1c3c-4639

Current Retry Count: 0
---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---
---GRADE GENERATION vs QUESTION---


Failed to batch ingest runs: langsmith.utils.LangSmithRateLimitError: Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError('429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch', '{"detail":"Monthly unique traces usage limit exceeded"}')
post: trace=57da5078-3406-41f8-ae56-3f2e19b567ee,id=7438d6a5-f90f-44bd-bb68-7b54e9310773; trace=57da5078-3406-41f8-ae56-3f2e19b567ee,id=19c2153f-32c3-42dd-8f85-aa9691b31cdf; trace=57da5078-3406-41f8-ae56-3f2e19b567ee,id=74167263-3543-47f2-b473-275ec0223f0c; trace=57da5078-3406-41f8-ae56-3f2e19b567ee,id=0da2f131-2dd4-4565-954f-ae0f9e6a1802


---DECISION: GENERATION ADDRESSES QUESTION---
"Node 'generate':"
---EVALUATING METRICS---
Final Retry Count Before Reset: 0
"Node 'evaluateMetrics':"
'Metrics: '
{'FactualCorrectness': 0.0,
 'Faithfulness': 1.0,
 'LLMContextRecall': 0.0,
 'SemanticSimilarity': 0.7303506522133615}
Question: Who is Dr. Qihao Ji?
---RETRIEVE---
Current Retry Count: 0
"Node 'retrieve':"
---CHECK DOCUMENT RELEVANCE TO QUESTION---
Current Retry Count: 0
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---ASSESS GRADED DOCUMENTS---
Current Retry Count: 0
---DECISION: GENERATE---
"Node 'gradeDocuments':"
Current Retry Count: 0
---GENERATE---
---CHECK HALLUCINATIONS---
Current Retry Count: 0
Current Retry Count: 0
---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---
---GRADE GENERATION vs QUESTION---
---DECISION: GENERATION ADDRESSES QUESTION---
"Node 'generate':"
---EVALUATING METRICS---
Final Retry Count Before Reset: 0


Failed to batch ingest runs: langsmith.utils.LangSmithRateLimitError: Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError('429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch', '{"detail":"Monthly unique traces usage limit exceeded"}')
post: trace=fc1d3db0-d7f3-4448-9b1c-b761a58e5a89,id=b06d0be0-c020-43be-ab7f-63321d81ff75; trace=fc1d3db0-d7f3-4448-9b1c-b761a58e5a89,id=e3ea5bf9-ef0f-43d8-ac31-d74e63c2da85; trace=fc1d3db0-d7f3-4448-9b1c-b761a58e5a89,id=c0b4b10a-c138-43a0-853e-838210cc66be; trace=fc1d3db0-d7f3-4448-9b1c-b761a58e5a89,id=56d182ac-7f59-451a-9186-4f56c0581cc8; trace=fc1d3db0-d7f3-4448-9b1c-b761a58e5a89,id=55913b01-c32e-4c07-92c1-fb76abf22af2; trace=fc1d3db0-d7f3-4448-9b1c-b761a58e5a89,id=cc521780-0849-4566-85f0-018911997827; trace=fc1d3db0-d7f3-4448-9b1c-b761a58e5a89,id=fe9078a1-31b2-4b3a-a2d9-c110563a6a61; trace=fc1d3db0-d7f3-4448-9b1c-b761a58e5a89,id=12c04eea-1da6-4f3b-b96e-4f783c1210c7; trace=fc1d3db0-d7f3-4448

"Node 'evaluateMetrics':"
'Metrics: '
{'FactualCorrectness': 0.0,
 'Faithfulness': 1.0,
 'LLMContextRecall': 0.0,
 'SemanticSimilarity': 0.7355886284315676}
Question: Research of Michael Osullivan
---RETRIEVE---
Current Retry Count: 0
"Node 'retrieve':"
---CHECK DOCUMENT RELEVANCE TO QUESTION---
Current Retry Count: 0
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---ASSESS GRADED DOCUMENTS---
Current Retry Count: 0
---DECISION: GENERATE---
"Node 'gradeDocuments':"
Current Retry Count: 0
---GENERATE---
---CHECK HALLUCINATIONS---
Current Retry Count: 0
Current Retry Count: 0
---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---
---GRADE GENERATION vs QUESTION---
---DECISION: GENERATION ADDRESSES QUESTION---
"Node 'generate':"
---EVALUATING METRICS---
Final Retry Count Before Reset: 0


Failed to batch ingest runs: langsmith.utils.LangSmithRateLimitError: Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError('429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch', '{"detail":"Monthly unique traces usage limit exceeded"}')
post: trace=fc1d3db0-d7f3-4448-9b1c-b761a58e5a89,id=553cb06b-be46-472b-bda4-2d4178f0a904; trace=fc1d3db0-d7f3-4448-9b1c-b761a58e5a89,id=0a12ac37-89ad-46e8-b8ac-a97c6070f9df; trace=fc1d3db0-d7f3-4448-9b1c-b761a58e5a89,id=42b6658a-b983-42f1-a8e8-19ec41f79c34; trace=fc1d3db0-d7f3-4448-9b1c-b761a58e5a89,id=6a337bcc-7155-4011-b110-a325d8bfac17; trace=fc1d3db0-d7f3-4448-9b1c-b761a58e5a89,id=ceafda49-0866-4f62-be51-12264560fea0; trace=fc1d3db0-d7f3-4448-9b1c-b761a58e5a89,id=f3406c22-fc89-4881-9935-c7a508dc8b93; trace=fc1d3db0-d7f3-4448-9b1c-b761a58e5a89,id=1d170ad4-20c0-4d04-96ac-7e656b840f13; trace=fc1d3db0-d7f3-4448-9b1c-b761a58e5a89,id=09866101-6c45-4459-b1a6-f883bca10c8d; trace=140548f1-57b4-4d38

"Node 'evaluateMetrics':"
'Metrics: '
{'FactualCorrectness': 0.0,
 'Faithfulness': 1.0,
 'LLMContextRecall': 0.0,
 'SemanticSimilarity': 0.7347250985714021}
Question: Who is Dr. Sally Dwyer-McNulty?
---RETRIEVE---
Current Retry Count: 0
"Node 'retrieve':"
---CHECK DOCUMENT RELEVANCE TO QUESTION---
Current Retry Count: 0
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---ASSESS GRADED DOCUMENTS---
Current Retry Count: 0
---DECISION: GENERATE---
"Node 'gradeDocuments':"
Current Retry Count: 0
---GENERATE---
---CHECK HALLUCINATIONS---
Current Retry Count: 0
Current Retry Count: 0
---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---
---GRADE GENERATION vs QUESTION---
---DECISION: GENERATION ADDRESSES QUESTION---
"Node 'generate':"
---EVALUATING METRICS---
Final Retry Count Before Reset: 0


Failed to batch ingest runs: langsmith.utils.LangSmithRateLimitError: Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError('429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch', '{"detail":"Monthly unique traces usage limit exceeded"}')
post: trace=140548f1-57b4-4d38-8330-172627e0f470,id=ecfd90e5-8630-4640-977d-20cba384cd8d; trace=140548f1-57b4-4d38-8330-172627e0f470,id=9bf2c53a-9a9c-47b0-b66b-da887959a7d0; trace=140548f1-57b4-4d38-8330-172627e0f470,id=8a9cc70b-b766-4b47-a62c-c76a87bd3b78; trace=140548f1-57b4-4d38-8330-172627e0f470,id=f6115062-f7d2-4558-a148-46de5b226735; trace=140548f1-57b4-4d38-8330-172627e0f470,id=f4084311-1fb8-45fa-90fc-a5f25381e1a5; trace=140548f1-57b4-4d38-8330-172627e0f470,id=78a402be-de98-4274-8978-31619aeda15c; trace=140548f1-57b4-4d38-8330-172627e0f470,id=d9cba224-3cc6-401d-8ebc-8fa71edce12b; trace=140548f1-57b4-4d38-8330-172627e0f470,id=64489b45-6c7a-4cb5-9b2a-f09168a58991; trace=140548f1-57b4-4d38

"Node 'evaluateMetrics':"
'Metrics: '
{'FactualCorrectness': 0.0,
 'Faithfulness': 0.8,
 'LLMContextRecall': 0.0,
 'SemanticSimilarity': 0.7217657244807498}
Question: Where are Marist classes available?
---RETRIEVE---
Current Retry Count: 0
"Node 'retrieve':"
---CHECK DOCUMENT RELEVANCE TO QUESTION---
Current Retry Count: 0
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---ASSESS GRADED DOCUMENTS---
Current Retry Count: 0
---DECISION: GENERATE---
"Node 'gradeDocuments':"
Current Retry Count: 0
---GENERATE---
---CHECK HALLUCINATIONS---
Current Retry Count: 0
Current Retry Count: 0
---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---
---GRADE GENERATION vs QUESTION---
---DECISION: GENERATION ADDRESSES QUESTION---
"Node 'generate':"
---EVALUATING METRICS---
Final Retry Count Before Reset: 0


Failed to batch ingest runs: langsmith.utils.LangSmithRateLimitError: Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError('429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch', '{"detail":"Monthly unique traces usage limit exceeded"}')
post: trace=7ff3fdfa-77aa-4c9f-aa4a-d95371115ee6,id=6aaa693d-c469-4a22-bdfc-b135cb9eb434; trace=7ff3fdfa-77aa-4c9f-aa4a-d95371115ee6,id=843669a5-1fc9-4515-9315-3dc28a520f09; trace=7ff3fdfa-77aa-4c9f-aa4a-d95371115ee6,id=5cd27962-4f4e-4cd5-a344-c3db7f8eef66; trace=7ff3fdfa-77aa-4c9f-aa4a-d95371115ee6,id=04fbdbdf-b912-4a3b-b579-5a8929dfbfc1; trace=7ff3fdfa-77aa-4c9f-aa4a-d95371115ee6,id=b4b38890-0386-4cc5-b6ad-5a2aaee82fee; trace=7ff3fdfa-77aa-4c9f-aa4a-d95371115ee6,id=1280982c-763d-4206-9eb4-67951cc1d057; trace=7ff3fdfa-77aa-4c9f-aa4a-d95371115ee6,id=12b1756a-da99-4785-a731-75a3cdb6cb2f; trace=7ff3fdfa-77aa-4c9f-aa4a-d95371115ee6,id=7b0ea944-bd4a-4574-931a-7f0c52e9b46d; trace=7ff3fdfa-77aa-4c9f

"Node 'evaluateMetrics':"
'Metrics: '
{'FactualCorrectness': 0.0,
 'Faithfulness': 1.0,
 'LLMContextRecall': 0.0,
 'SemanticSimilarity': 0.7091185461460535}
Question: How is an MA in Educational Psychology like?
---RETRIEVE---
Current Retry Count: 0
"Node 'retrieve':"
---CHECK DOCUMENT RELEVANCE TO QUESTION---
Current Retry Count: 0
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---GRADE: DOCUMENT RELEVANT---
---ASSESS GRADED DOCUMENTS---
Current Retry Count: 0
---DECISION: GENERATE---
"Node 'gradeDocuments':"
Current Retry Count: 0
---GENERATE---
---CHECK HALLUCINATIONS---
Current Retry Count: 0
Current Retry Count: 0
---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---
---GRADE GENERATION vs QUESTION---
---DECISION: GENERATION ADDRESSES QUESTION---
"Node 'generate':"
---EVALUATING METRICS---
Final Retry Count Before Reset: 0


Failed to batch ingest runs: langsmith.utils.LangSmithRateLimitError: Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError('429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch', '{"detail":"Monthly unique traces usage limit exceeded"}')
post: trace=20c273ac-3415-4b76-93a9-a36e72f1c422,id=78989826-5e8c-4177-b371-e265d3cb383e; trace=20c273ac-3415-4b76-93a9-a36e72f1c422,id=1b288779-7a3e-4c85-80af-3bd2c95abaa0; trace=20c273ac-3415-4b76-93a9-a36e72f1c422,id=aef8df22-c194-4d20-8956-53112286f53a; trace=20c273ac-3415-4b76-93a9-a36e72f1c422,id=4603cf34-8d71-4190-90fc-10126c3e3f5f; trace=20c273ac-3415-4b76-93a9-a36e72f1c422,id=4ea8f1be-3f39-4059-9bbd-988f7f1be176; trace=20c273ac-3415-4b76-93a9-a36e72f1c422,id=91fdee8f-7c3c-4d34-a017-949113765db8; trace=20c273ac-3415-4b76-93a9-a36e72f1c422,id=8172d4bb-cc54-4bf7-8c7a-a8cadf4d1856; trace=20c273ac-3415-4b76-93a9-a36e72f1c422,id=47f05013-d4ab-45e8-a00d-67cf501d021e; trace=20c273ac-3415-4b76

"Node 'evaluateMetrics':"
'Metrics: '
{'FactualCorrectness': 0.0,
 'Faithfulness': 1.0,
 'LLMContextRecall': 0.0,
 'SemanticSimilarity': 0.7212986010472986}


Failed to batch ingest runs: langsmith.utils.LangSmithRateLimitError: Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError('429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch', '{"detail":"Monthly unique traces usage limit exceeded"}')
post: trace=6783127f-4df9-4a4b-b611-32423b5c58c4,id=57ae38ed-5197-494b-ab06-174c85150b05; trace=6783127f-4df9-4a4b-b611-32423b5c58c4,id=dce740ed-ae7c-4f63-8cbe-1b1662803808; trace=6783127f-4df9-4a4b-b611-32423b5c58c4,id=8b137a34-386e-4508-894c-dc29bce1cb74; trace=6783127f-4df9-4a4b-b611-32423b5c58c4,id=9ae45b9f-2513-47e9-a61e-9c0ec138579f; trace=6783127f-4df9-4a4b-b611-32423b5c58c4,id=9ac44f3d-ec98-45b3-88cf-b927f6af8ac4; trace=6783127f-4df9-4a4b-b611-32423b5c58c4,id=18bb07f4-80c1-4e62-acde-940b31b69a85; trace=6783127f-4df9-4a4b-b611-32423b5c58c4,id=840a7767-0eae-4abb-b322-29902cb8bb78; trace=6783127f-4df9-4a4b-b611-32423b5c58c4,id=6686711f-f732-4311-96d0-80b212a7c864; trace=6783127f-4df9-4a4b

{'question': 'What are the hours of the student financial services office?', 'LLMContextRecall': 0.5, 'FactualCorrectness': 0.29, 'Faithfulness': 0.0, 'SemanticSimilarity': 0.8438083664343583}
{'question': 'First Fulbright scholarship awardee?', 'LLMContextRecall': 0.07692307692307693, 'FactualCorrectness': "Error: ufunc 'invert' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''", 'Faithfulness': nan, 'SemanticSimilarity': 0.69256337670863}
{'question': 'Witchcraft history class professor?', 'LLMContextRecall': 0.0, 'FactualCorrectness': 0.24, 'Faithfulness': 0.0, 'SemanticSimilarity': 0.9116799600159815}
{'question': "Who's the person to go to for Latin American and Caribbean Studies?", 'LLMContextRecall': 0.0, 'FactualCorrectness': 0.35, 'Faithfulness': 0.0, 'SemanticSimilarity': 0.92530510372576}
{'question': 'Can I get help with creating my resume and cover letters?', 'LLMContextRecall': 0.727

Failed to batch ingest runs: langsmith.utils.LangSmithRateLimitError: Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError('429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch', '{"detail":"Monthly unique traces usage limit exceeded"}')
post: trace=f186c221-fbe5-41fb-8ea3-2dcf4b8f738d,id=f186c221-fbe5-41fb-8ea3-2dcf4b8f738d; trace=f186c221-fbe5-41fb-8ea3-2dcf4b8f738d,id=1d96593b-5342-424b-ac53-238b4259baaa; trace=f186c221-fbe5-41fb-8ea3-2dcf4b8f738d,id=bb48984e-6a18-49d4-ae44-cc93b76b1707; trace=f186c221-fbe5-41fb-8ea3-2dcf4b8f738d,id=e9fc0541-8204-4205-bbcf-309224be7b6e; trace=f186c221-fbe5-41fb-8ea3-2dcf4b8f738d,id=49119470-a162-4a7f-b93b-853605c8b0c2; trace=17afbf3e-9152-4872-935b-9136c7267e5c,id=17afbf3e-9152-4872-935b-9136c7267e5c; trace=ac6907cd-0bb5-45de-90d2-72c71c4c461a,id=ac6907cd-0bb5-45de-90d2-72c71c4c461a; trace=ac6907cd-0bb5-45de-90d2-72c71c4c461a,id=f319074a-ac30-4bc2-8ffb-6ff0a266779a; trace=ac6907cd-0bb5-45de

In [19]:
# Make our dataframe of results
processResults = pd.DataFrame(ragResults)
evalResultsDF = pd.DataFrame(evalResults)
finalResultsDF = processResults.merge(evalResultsDF, on="question", how="left")
finalResultsDF.head()

Unnamed: 0,question,generation,ground_truth_response,retry,LLMContextRecall,FactualCorrectness,Faithfulness,SemanticSimilarity
0,What are the hours of the student financial se...,The hours of the Student Financial Services of...,Transfer Student Admission Department Student ...,False,0.5,0.29,0.0,0.843808
1,First Fulbright scholarship awardee?,,Scholarships and FellowshipsHelping You Achiev...,True,0.076923,Error: ufunc 'invert' not supported for the in...,,0.692563
2,Witchcraft history class professor?,The professor for the Witchcraft history class...,Contact InformationAcademic SchoolOfficeEmailP...,False,0.0,0.24,0.0,0.91168
3,Who's the person to go to for Latin American a...,The person to go to for Latin American and Car...,Contact InformationAcademic SchoolOfficeEmailP...,False,0.0,0.35,0.0,0.925305
4,Can I get help with creating my resume and cov...,,Center for Career ServicesYour Path to Success...,True,0.727273,Error: ufunc 'invert' not supported for the in...,,0.667127


In [20]:
# Filter out faulty generations (graph loops)
trueRunsDF = finalResultsDF[finalResultsDF["retry"] == False]
trueRunsDF.describe()

Unnamed: 0,LLMContextRecall,Faithfulness,SemanticSimilarity
count,73.0,73.0,73.0
mean,0.232991,0.017123,0.889489
std,0.318114,0.120248,0.052391
min,0.0,0.0,0.706034
25%,0.0,0.0,0.858148
50%,0.0,0.0,0.897846
75%,0.5,0.0,0.933078
max,1.0,1.0,0.961005


In [33]:
# Filtering out possible errors with FactualCorrectness computation
excludeString = "Error: The LLM generation was not completed. Please increase try increasing the max_tokens and try again."
trueRunsDF = trueRunsDF[trueRunsDF["FactualCorrectness"] != excludeString]
trueRunsDF.describe()

Unnamed: 0,LLMContextRecall,Faithfulness,SemanticSimilarity
count,72.0,72.0,72.0
mean,0.234243,0.017361,0.89015
std,0.320166,0.121075,0.052451
min,0.0,0.0,0.706034
25%,0.0,0.0,0.85827
50%,0.0,0.0,0.898422
75%,0.5,0.0,0.933565
max,1.0,1.0,0.961005


In [34]:
# Averages & St. devs
print(f"LLM Context Recall - Mean: {trueRunsDF["LLMContextRecall"].mean()}, St. Dev: {trueRunsDF["LLMContextRecall"].std()}")
print(f"FactualCorrectness - Mean: {trueRunsDF["FactualCorrectness"].mean()}, St. Dev: {trueRunsDF["FactualCorrectness"].std()}")
print(f"Faithfulness - Mean: {trueRunsDF["Faithfulness"].mean()}, St. Dev: {trueRunsDF["Faithfulness"].std()}")
print(f"SemanticSimilarity - Mean: {trueRunsDF["SemanticSimilarity"].mean()}, St. Dev: {trueRunsDF["SemanticSimilarity"].std()}")

LLM Context Recall - Mean: 0.23424272260056572, St. Dev: 0.3201656451589497
FactualCorrectness - Mean: 0.3129166666666667, St. Dev: 0.18389707393761573
Faithfulness - Mean: 0.017361111111111112, St. Dev: 0.12107491634422932
SemanticSimilarity - Mean: 0.8901498617587273, St. Dev: 0.052451479081982055


In [36]:
# Export the sampled questions to run these questions (which we know work in our SelfRAG process) with SimpleRAG
savePath = "/Users/christiansarmiento/Library/CloudStorage/OneDrive-MaristCollege/Machine Learning/Data/marist_sampled_verified_QA.csv"
trueRunsDF.to_csv(savePath, index=False)

# Implementing SelfRAG w/ LangChain Instead of LangGraph

In [3]:
# Function to clean up evaluation code
async def computeEvaluationMetrics(samples, metrics):
    '''
    Helper method to compute the metrics and its averages for a given sample. parameter "samples" is a list of
    dictonaries that was aggregated from running a RAG process and collecting the input and output data. parameter
    "metrics" is a list of metric objects from RAGAS to evaluate the given samples. 
    '''

    # Keep track of time
    startTime = time.time()

    # Evaluate every sample
    print("Starting Evaluation...")
    evalResults, metricStats = await pipelineEvaluation(samples, metrics)
    print("Evaluation Finished!")
    for result in evalResults:
        print(result)

    # Compute and output the mean and standard deviation for each metric
    print("+-+-+-+-+-+-+-+-+-+-+-+FINAL RESULTS+-+-+-+-+-+-+-+-+-+-+-+")
    for metric in metricStats.keys():
        print(f"{metric} - Mean: {metricStats[metric]['mean']}, St. Dev: {metricStats[metric]['std_dev']}")
    
    # Elapsed time
    endTime = time.time()
    elapsedTime = endTime - startTime
    if elapsedTime > 60:
        secToMin = elapsedTime / 60
        if secToMin > 60:
            print(f"\nExecution Time: {(secToMin / 60):.2f} hrs")
        else:
            print(f"\nExecution Time: {secToMin:.2f} min")
    
    else:
        print(f"\nExecution Time: {(elapsedTime):.2f} sec")

In [30]:
# LLM
llm = ChatOpenAI(model_name="gpt-4o-mini", temperature=0)

# Prompt templates
class RetrievalResponse(BaseModel):
    response: str = Field(..., title="Determines if retrieval is necessary", description="Output only 'Yes' or 'No'.")
retrieval_prompt = PromptTemplate(
    input_variables=["query"],
    template="Given the query '{query}', determine if retrieval is necessary. Output only 'Yes' or 'No'."
)

class RelevanceResponse(BaseModel):
    response: str = Field(..., title="Determines if context is relevant", description="Output only 'Relevant' or 'Irrelevant'.")
relevance_prompt = PromptTemplate(
    input_variables=["query", "context"],
    template="Given the query '{query}' and the context '{context}', determine if the context is relevant. Output only 'Relevant' or 'Irrelevant'."
)

class GenerationResponse(BaseModel):
    response: str = Field(..., title="Generated response", description="The generated response.")
generation_prompt = PromptTemplate(
    input_variables=["query", "context"],
    template="Given the query '{query}' and the context '{context}', generate a response."
)

class SupportResponse(BaseModel):
    response: str = Field(..., title="Determines if response is supported", description="Output 'Fully supported', 'Partially supported', or 'No support'.")
support_prompt = PromptTemplate(
    input_variables=["response", "context"],
    template="Given the response '{response}' and the context '{context}', determine if the response is supported by the context. Output 'Fully supported', 'Partially supported', or 'No support'."
)

class UtilityResponse(BaseModel):
    response: int = Field(..., title="Utility rating", description="Rate the utility of the response from 1 to 5.")
utility_prompt = PromptTemplate(
    input_variables=["query", "response"],
    template="Given the query '{query}' and the response '{response}', rate the utility of the response from 1 to 5."
)

## Create LLMChains for each step
retrieval_chain = retrieval_prompt | llm.with_structured_output(RetrievalResponse)
relevance_chain = relevance_prompt | llm.with_structured_output(RelevanceResponse)
generation_chain = generation_prompt | llm.with_structured_output(GenerationResponse)
support_chain = support_prompt | llm.with_structured_output(SupportResponse)
utility_chain = utility_prompt | llm.with_structured_output(UtilityResponse)

In [85]:
# SelfRAG w/ LangChain (modified to include RAGAS evaluation)
evaluationSamples = []
samplesOutputDF = pd.DataFrame(columns=["question", "ground_truth", "context", "generation", "TP", "FP", "FN"])
def selfRAGLangChain(query, correctAnswer, vectorstore, top_k=3):

    print(f"\nProcessing query: {query}")

    # Step 1: Determine if retrieval is necessary
    print("Step 1: Determining if retrieval is necessary...")
    input_data = {"query": query}
    retrieval_decision = retrieval_chain.invoke(input_data).response.strip().lower()
    print(f"Retrieval decision: {retrieval_decision}")

    if retrieval_decision == 'yes':

        # Step 2: Retrieve relevant documents
        print("Step 2: Retrieving relevant documents...")
        docs = vectorstore.similarity_search(query, k=top_k)
        contexts = [doc.page_content for doc in docs]
        print(f"Retrieved {len(contexts)} documents")

        # Step 3: Evaluate relevance of retrieved documents
        print("Step 3: Evaluating relevance of retrieved documents...")
        relevant_contexts = []
        for i, context in enumerate(contexts):
            input_data = {"query": query, "context": context}
            relevance = relevance_chain.invoke(input_data).response.strip().lower()
            print(f"Document {i+1} relevance: {relevance}")
            if relevance == 'relevant':
                relevant_contexts.append(context)

        print(f"Number of relevant contexts: {len(relevant_contexts)}")

        # If no relevant contexts found, generate without retrieval
        if not relevant_contexts:

            print("No relevant contexts found. Generating without retrieval...")
            input_data = {"query": query, "context": "No relevant context found."}
            generation = generation_chain.invoke(input_data).response

            evaluationSamples.append({
                "user_input": query,
                "retrieved_contexts": ["no context retrieved"],
                "response": generation,
                "reference": correctAnswer
            })
            samplesOutputDF.loc[len(samplesOutputDF)] = [query, correctAnswer, "no context retrieved", generation, -1, -1, -1]


            return generation

        # Step 4: Generate response using relevant contexts
        print("Step 4: Generating responses using relevant contexts...")
        responses = []
        for i, context in enumerate(relevant_contexts):
            print(f"Generating response for context {i+1}...")
            input_data = {"query": query, "context": context}
            response = generation_chain.invoke(input_data).response

            # Step 5: Assess support
            print(f"Step 5: Assessing support for response {i+1}...")
            input_data = {"response": response, "context": context}
            support = support_chain.invoke(input_data).response.strip().lower()
            print(f"Support assessment: {support}")

            # Step 6: Evaluate utility
            print(f"Step 6: Evaluating utility for response {i+1}...")
            input_data = {"query": query, "response": response}
            utility = int(utility_chain.invoke(input_data).response)
            print(f"Utility score: {utility}")

            responses.append((response, support, utility, context))

        # Select the best response based on support and utility
        print("Selecting the best response...")
        best_response = max(responses, key=lambda x: (x[1] == 'fully supported', x[2]))
        print(f"Best response support: {best_response[1]}, utility: {best_response[2]}")

        evaluationSamples.append({
                "user_input": query,
                "retrieved_contexts": [best_response[3]],
                "response": best_response[0],
                "reference": correctAnswer
            })
        samplesOutputDF.loc[len(samplesOutputDF)] = [query, correctAnswer, best_response[3], best_response[0], -1, -1, -1]
        
        return best_response[0]
    
    else:

        # Generate without retrieval
        print("Generating without retrieval...")
        input_data = {"query": query, "context": "No retrieval necessary."}
        generation = generation_chain.invoke(input_data).response

        evaluationSamples.append({
                "user_input": query,
                "retrieved_contexts": ["no context retrieved"],
                "response": generation,
                "reference": correctAnswer
            })
        samplesOutputDF.loc[len(samplesOutputDF)] = [query, correctAnswer, "no context retrieved", generation, -1, -1, -1]
        
        return generation

In [32]:
# Function to evaluate our RAG pipeline when given ground truth
async def pipelineEvaluation(dataset, metrics):

    # Run through our runs
    results = []
    for run in dataset:

        # Save our inputs/outputs
        inputQuery = run["user_input"]
        groundTruthAnswer = run["reference"]
        contexts = run["retrieved_contexts"]
        response = run["response"]

        # Create a SingleTurnSample object
        sample = SingleTurnSample(
            user_input=inputQuery,
            response=response,
            reference=groundTruthAnswer,
            retrieved_contexts=contexts 
        )

        # Evaluate metrics
        runResults = {"question": inputQuery}
        for metric in metrics:

            # Get the score for the given metric
            try:

                score = await metric.single_turn_ascore(sample)
                runResults[type(metric).__name__] = score

            except Exception as e:
                # Catch errors for debugging
                runResults[type(metric).__name__] = f"Error: {str(e)}"
        
        # Save metric results
        results.append(runResults)
    
    # Calculate mean and standard deviation for each metric
    metricsStats = {}
    for metric in metrics:
        metricName = type(metric).__name__
        scores = [result[metricName] for result in results if isinstance(result[metricName], (int, float))]
        
        # Only calculate stats if there are valid scores
        if scores:
            metricsStats[metricName] = {
                "mean": np.mean(scores),
                "std_dev": np.std(scores),
            }
            
        else:
            metricsStats[metricName] = {
                "mean": "No valid scores",
                "std_dev": "No valid scores",
            }
    
    return results, metricsStats

In [33]:
# Load Data
csvPath = "/Users/christiansarmiento/Library/CloudStorage/OneDrive-MaristCollege/Machine Learning/Data/Cleaned_QA.csv"
maristQA = pd.read_csv(csvPath, header=None)

# To use RecursiveCharacterTextSplitter, we need a list of dictionaries
maristContext = [Document(page_content=text) for text in maristQA[1].tolist()]

# Split Documents into Chunks
textSplitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200, add_start_index=True)
texts = textSplitter.split_documents(maristContext)

# Store Documents in Vector DB (Chroma)
vectorDB = Chroma.from_documents(documents=texts, embedding=OpenAIEmbeddings())

# Setup Retrieval System
#retriever = vectorDB.as_retriever(search_type="similarity", search_kwargs={"k": 3})  # Retrieves 3 documents

In [34]:
# Load metrics
evalMetrics = [LLMContextRecall(llm=LangchainLLMWrapper(llm)), 
               FactualCorrectness(llm=LangchainLLMWrapper(llm)), 
               Faithfulness(llm=LangchainLLMWrapper(llm)), 
               SemanticSimilarity(embeddings=LangchainEmbeddingsWrapper(OpenAIEmbeddings()))]

In [35]:
# Sample records from our dataset
maristTestSample = maristQA.sample(50, replace=False)
maristTestSample.head()

Unnamed: 0,0,1
6,Cost of credit for Masters in Integrated Marke...,Graduate: Tuition and FeesKnow your financial ...
79,How to save money on tuition,Corporate and Organizational PartnershipsAcade...
653,How many courses should be applied by a studen...,Academic CoreForeign Language Study in the Mar...
603,Will my classroom assignment ever change?,Registrar Department Registrar Email registrar...
569,What is Marist Money?,Student Services Department Card Services Name...


In [36]:
# Run our chain with each question and evaluate
for row in maristTestSample.iterrows():
    selfRAGResponse = selfRAGLangChain(row[1][0], row[1][1], vectorDB, top_k=3)
    print(f"Generation: {selfRAGResponse}")


Processing query: Cost of credit for Masters in Integrated Marketing Communication
Step 1: Determining if retrieval is necessary...
Retrieval decision: yes
Step 2: Retrieving relevant documents...
Retrieved 3 documents
Step 3: Evaluating relevance of retrieved documents...
Document 1 relevance: relevant
Document 2 relevance: relevant
Document 3 relevance: relevant
Number of relevant contexts: 3
Step 4: Generating responses using relevant contexts...
Generating response for context 1...
Step 5: Assessing support for response 1...
Support assessment: fully supported
Step 6: Evaluating utility for response 1...
Utility score: 4
Generating response for context 2...
Step 5: Assessing support for response 2...
Support assessment: fully supported
Step 6: Evaluating utility for response 2...
Utility score: 4
Generating response for context 3...
Step 5: Assessing support for response 3...
Support assessment: fully supported
Step 6: Evaluating utility for response 3...
Utility score: 4
Selectin

Failed to batch ingest runs: langsmith.utils.LangSmithRateLimitError: Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError('429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch', '{"detail":"Monthly unique traces usage limit exceeded"}')
post: trace=0b0153a4-498d-4385-99c3-e5cbefd205fd,id=0b0153a4-498d-4385-99c3-e5cbefd205fd; trace=0b0153a4-498d-4385-99c3-e5cbefd205fd,id=b12c7077-65db-4c43-a9b4-649e1c87155e; trace=0b0153a4-498d-4385-99c3-e5cbefd205fd,id=d52df420-cb6a-4657-9e0a-4fd941926c97


Step 5: Assessing support for response 2...
Support assessment: fully supported
Step 6: Evaluating utility for response 2...
Utility score: 5
Generating response for context 3...
Step 5: Assessing support for response 3...
Support assessment: fully supported
Step 6: Evaluating utility for response 3...
Utility score: 4
Selecting the best response...
Best response support: fully supported, utility: 5
Generation: The Communications Advisory Board plays a crucial role in guiding and enhancing the communication programs within an academic institution. It typically consists of professionals and experts from various fields of communication, including public relations, journalism, advertising, and broadcasting. Members of the board provide valuable insights into industry trends, curriculum development, and best practices in communication education.

Given your extensive background in communication, including your current roles as the Speaking Across the Curriculum Coordinator and the First Ye

Failed to batch ingest runs: langsmith.utils.LangSmithRateLimitError: Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError('429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch', '{"detail":"Monthly unique traces usage limit exceeded"}')
post: trace=0b0153a4-498d-4385-99c3-e5cbefd205fd,id=bd7ea91a-689c-4e62-ad36-9851d0030b5f; trace=ecf7c3d0-1911-4dd7-96d6-bf865a755807,id=ecf7c3d0-1911-4dd7-96d6-bf865a755807; trace=ecf7c3d0-1911-4dd7-96d6-bf865a755807,id=8179ace8-406a-4c9d-a6f5-66bc556b81e1; trace=ecf7c3d0-1911-4dd7-96d6-bf865a755807,id=5f454f70-e308-4039-9164-571d77b75f40; trace=ecf7c3d0-1911-4dd7-96d6-bf865a755807,id=0e723e28-ff18-4d7c-86a7-982c9983d8aa; trace=7981bc30-7ead-4c98-ba0f-c1f23420ae17,id=7981bc30-7ead-4c98-ba0f-c1f23420ae17; trace=7981bc30-7ead-4c98-ba0f-c1f23420ae17,id=147cff60-9db8-4df4-b788-9f38ecc45dec; trace=7981bc30-7ead-4c98-ba0f-c1f23420ae17,id=c812209b-6423-4ac8-b791-cab50e93c473; trace=7981bc30-7ead-4c98

Step 5: Assessing support for response 3...
Support assessment: fully supported
Step 6: Evaluating utility for response 3...
Utility score: 5
Selecting the best response...
Best response support: fully supported, utility: 5
Generation: The Marist College Core Program is designed to provide students with a comprehensive educational foundation that emphasizes the liberal arts and sciences. This program aims to foster critical thinking, creativity, and effective communication skills across various disciplines. 

The Core curriculum is structured into several components:

1. **Academic Foundation Courses**: These are typically taken during the freshman year and include courses that explore themes such as Cultural Diversity, Nature & the Environment, Civic Engagement, and Quantitative Reasoning. Notable courses in this category include:
   - **FYS 101 First Year Seminar**: This course helps students transition into college life and develop essential academic skills.
   - **ENG 120 Writing f

Failed to batch ingest runs: langsmith.utils.LangSmithRateLimitError: Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError('429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch', '{"detail":"Monthly unique traces usage limit exceeded"}')
post: trace=4607706b-bf33-4fcc-9052-b51652f0b7f1,id=4607706b-bf33-4fcc-9052-b51652f0b7f1; trace=4607706b-bf33-4fcc-9052-b51652f0b7f1,id=5478ba76-6733-4285-b83d-3be91102679a; trace=4607706b-bf33-4fcc-9052-b51652f0b7f1,id=4ff6b81e-5667-4362-ab37-17f509290c52; trace=4607706b-bf33-4fcc-9052-b51652f0b7f1,id=dcf70d25-251d-4f6c-a336-6a06d7328604; trace=bdd059fd-0dbf-4986-89ab-d6853640ea50,id=bdd059fd-0dbf-4986-89ab-d6853640ea50; trace=bdd059fd-0dbf-4986-89ab-d6853640ea50,id=78473f6c-3715-47c4-855d-58ddb53eee00; trace=bdd059fd-0dbf-4986-89ab-d6853640ea50,id=b8fa42dd-d7d2-4a12-b9d2-57ec2de4b10c; trace=bdd059fd-0dbf-4986-89ab-d6853640ea50,id=d3dfcea1-c703-4e1f-9438-dbe27d30e034; trace=f9b89ed0-c6b0-466c

Step 5: Assessing support for response 3...
Support assessment: fully supported
Step 6: Evaluating utility for response 3...
Utility score: 5
Selecting the best response...
Best response support: fully supported, utility: 5
Generation: The honors curriculum requires a total of 18 credits, which includes the following components:

1. **Honors First-Year Seminar** (4 credits)
2. **Honors Writing for College** (3 credits)
3. **Thematic and Civic Engagement Seminars** (6 credits)
4. **Honors by Contract** (1 credit)
5. **Honors Thesis** (3 credits)
6. **Honors Senior Seminar** (1 credit)

Students in the honors program engage in seminar-style classes that cover significant intellectual and social topics, taught by Marist's top faculty. These courses emphasize critical thinking and ethics, and many overlap with general education Core requirements.

Processing query: has marist partnered with any library?
Step 1: Determining if retrieval is necessary...
Retrieval decision: yes
Step 2: Retrie

Failed to batch ingest runs: langsmith.utils.LangSmithRateLimitError: Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError('429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch', '{"detail":"Monthly unique traces usage limit exceeded"}')
post: trace=243fad92-277a-41ef-847b-b526c445eada,id=128b1c10-0121-4e64-a193-4a269719c91e; trace=243fad92-277a-41ef-847b-b526c445eada,id=7419e431-0e56-4c8c-a6fe-b8b3dac5d152; trace=f96a2b75-b993-4a1c-811b-c60571dc8aa3,id=f96a2b75-b993-4a1c-811b-c60571dc8aa3; trace=f96a2b75-b993-4a1c-811b-c60571dc8aa3,id=150856b0-59ce-4808-a383-e455dc011ee3; trace=f96a2b75-b993-4a1c-811b-c60571dc8aa3,id=c2c46619-6df1-4571-a37e-acafbf49c2a9; trace=f96a2b75-b993-4a1c-811b-c60571dc8aa3,id=f9862e4c-9c3f-4452-8e03-7895f589d1cc; trace=44f8e685-5d89-40e1-8d21-d4659eb4080c,id=44f8e685-5d89-40e1-8d21-d4659eb4080c; trace=44f8e685-5d89-40e1-8d21-d4659eb4080c,id=438653a1-6f33-4711-8d61-c7f8ec00e64f; trace=44f8e685-5d89-40e1

Retrieved 3 documents
Step 3: Evaluating relevance of retrieved documents...
Document 1 relevance: relevant
Document 2 relevance: relevant
Document 3 relevance: relevant
Number of relevant contexts: 3
Step 4: Generating responses using relevant contexts...
Generating response for context 1...


Failed to batch ingest runs: langsmith.utils.LangSmithRateLimitError: Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError('429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch', '{"detail":"Monthly unique traces usage limit exceeded"}')
post: trace=9fbf59a5-2110-418a-a8cc-6e90b0237fc0,id=9fbf59a5-2110-418a-a8cc-6e90b0237fc0; trace=9fbf59a5-2110-418a-a8cc-6e90b0237fc0,id=c21eb058-bbae-42c8-951d-7e1ceeb59e07; trace=9fbf59a5-2110-418a-a8cc-6e90b0237fc0,id=170bfb0e-5fa3-4f74-969f-6ca528eabaf5; trace=9fbf59a5-2110-418a-a8cc-6e90b0237fc0,id=cae460ba-25ba-4035-b0ca-800a624ba417; trace=0852ffd7-502e-4b11-b9d5-d6b9254c512b,id=0852ffd7-502e-4b11-b9d5-d6b9254c512b; trace=0852ffd7-502e-4b11-b9d5-d6b9254c512b,id=7289553a-176a-44eb-9738-7a2c2674fc6c; trace=0852ffd7-502e-4b11-b9d5-d6b9254c512b,id=eaaa3fba-c404-4910-a599-6dc25ec1b643; trace=0852ffd7-502e-4b11-b9d5-d6b9254c512b,id=7a3fb621-8155-4203-9bb8-7d76927cc572; trace=4c2c1839-0a8f-444a

Step 5: Assessing support for response 1...
Support assessment: fully supported
Step 6: Evaluating utility for response 1...
Utility score: 4
Generating response for context 2...
Step 5: Assessing support for response 2...
Support assessment: partially supported
Step 6: Evaluating utility for response 2...
Utility score: 4
Generating response for context 3...
Step 5: Assessing support for response 3...
Support assessment: fully supported
Step 6: Evaluating utility for response 3...
Utility score: 4
Selecting the best response...
Best response support: fully supported, utility: 4
Generation: The context provided does not specifically mention any partnerships between Marist College and libraries. It discusses academic partnerships with organizations that share a commitment to diversity and community service, but does not detail any collaborations with libraries.

Processing query: Where is the registrar's office located?
Step 1: Determining if retrieval is necessary...
Retrieval decision

Failed to batch ingest runs: langsmith.utils.LangSmithRateLimitError: Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError('429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch', '{"detail":"Monthly unique traces usage limit exceeded"}')
post: trace=b3115f2a-1f22-44d5-9e5f-51db0251fad8,id=334409f9-3c85-4d36-b4c4-22fe9b10d180; trace=b3115f2a-1f22-44d5-9e5f-51db0251fad8,id=897b9d9a-823a-4bd3-931d-d9c0ab54033b; trace=2c41b261-b775-45f4-a00c-4e5f8a8e6ea3,id=2c41b261-b775-45f4-a00c-4e5f8a8e6ea3; trace=2c41b261-b775-45f4-a00c-4e5f8a8e6ea3,id=3d1d687f-d575-4c49-8e48-de779fc59d5f; trace=2c41b261-b775-45f4-a00c-4e5f8a8e6ea3,id=a0e195e0-0982-4b66-a770-081f6b2ec7b0; trace=2c41b261-b775-45f4-a00c-4e5f8a8e6ea3,id=28b15401-b662-4c91-8347-eee04f71ccb7; trace=013cbf0a-8063-44c4-9233-bc45a07717a0,id=013cbf0a-8063-44c4-9233-bc45a07717a0; trace=013cbf0a-8063-44c4-9233-bc45a07717a0,id=c6eaf983-4260-4a81-9ddc-490dbb16ece8; trace=013cbf0a-8063-44c4

Retrieval decision: yes
Step 2: Retrieving relevant documents...


Failed to batch ingest runs: langsmith.utils.LangSmithRateLimitError: Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError('429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch', '{"detail":"Monthly unique traces usage limit exceeded"}')
post: trace=151722c4-a364-45dc-82d1-00d258151749,id=151722c4-a364-45dc-82d1-00d258151749; trace=151722c4-a364-45dc-82d1-00d258151749,id=1f6a7f4e-080c-4ce8-9518-b3194e7fe32f; trace=151722c4-a364-45dc-82d1-00d258151749,id=0acad749-07f1-458f-8541-02f2af58ebd6; trace=151722c4-a364-45dc-82d1-00d258151749,id=2549cd3f-1955-40b5-a405-dbf195e293f6; trace=e18f3d49-5e03-4fac-be92-bde25316b661,id=e18f3d49-5e03-4fac-be92-bde25316b661; trace=e18f3d49-5e03-4fac-be92-bde25316b661,id=73e99772-fb79-436a-a82a-e9689ef689b4; trace=e18f3d49-5e03-4fac-be92-bde25316b661,id=4179ef92-38c2-4256-b6ae-43633e001b70; trace=e18f3d49-5e03-4fac-be92-bde25316b661,id=0cab9ff7-a5c1-4210-b024-20e400e7386d; trace=dbaadb76-4e9c-4519

Retrieved 3 documents
Step 3: Evaluating relevance of retrieved documents...


Failed to batch ingest runs: langsmith.utils.LangSmithRateLimitError: Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError('429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch', '{"detail":"Monthly unique traces usage limit exceeded"}')
post: trace=800d4993-e2bb-44e1-966f-8a955edf9723,id=fb0ece4f-daee-4c46-a62f-077298597813; trace=800d4993-e2bb-44e1-966f-8a955edf9723,id=4687d749-3813-4386-881d-04fd461dede3; trace=0ec22323-ab16-410f-ae8b-51b50a1aba6f,id=0ec22323-ab16-410f-ae8b-51b50a1aba6f; trace=0ec22323-ab16-410f-ae8b-51b50a1aba6f,id=f3c437a9-1b6e-4888-8ed7-83739251b2d6; trace=0ec22323-ab16-410f-ae8b-51b50a1aba6f,id=c34b49fe-ae35-4bda-9f22-51bb5b6e190c; trace=0ec22323-ab16-410f-ae8b-51b50a1aba6f,id=eb640ba6-cb2e-4f6d-a403-9e5bd74ff3d5; trace=3c9f2997-a7ec-4ab1-82ef-01933fbff638,id=3c9f2997-a7ec-4ab1-82ef-01933fbff638; trace=3c9f2997-a7ec-4ab1-82ef-01933fbff638,id=c212797b-604c-4346-82a6-a48eb0a658dd; trace=3c9f2997-a7ec-4ab1

Document 1 relevance: relevant
Document 2 relevance: relevant
Document 3 relevance: relevant
Number of relevant contexts: 3
Step 4: Generating responses using relevant contexts...
Generating response for context 1...
Step 5: Assessing support for response 1...
Support assessment: fully supported
Step 6: Evaluating utility for response 1...
Utility score: 4
Generating response for context 2...
Step 5: Assessing support for response 2...
Support assessment: fully supported
Step 6: Evaluating utility for response 2...
Utility score: 4
Generating response for context 3...
Step 5: Assessing support for response 3...
Support assessment: fully supported
Step 6: Evaluating utility for response 3...
Utility score: 5
Selecting the best response...
Best response support: fully supported, utility: 5
Generation: You can do internships in various prestigious organizations and companies in New York City and the Hudson Valley. The Department's Internship Coordinator assists students in finding valuabl

Failed to batch ingest runs: langsmith.utils.LangSmithRateLimitError: Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError('429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch', '{"detail":"Monthly unique traces usage limit exceeded"}')
post: trace=9495d8ee-09e6-4ff6-bae5-8387acaaf254,id=9495d8ee-09e6-4ff6-bae5-8387acaaf254; trace=9495d8ee-09e6-4ff6-bae5-8387acaaf254,id=a71e6eb3-9b0a-4abb-9a8e-cfd89b745167; trace=9495d8ee-09e6-4ff6-bae5-8387acaaf254,id=d15b27ea-bc6a-4564-9b6b-6582b23e79ac; trace=9495d8ee-09e6-4ff6-bae5-8387acaaf254,id=92fd08ee-5635-4594-a9ad-ff8abfa61b71; trace=4bc552f5-0074-4789-ad44-6349e978b7e6,id=4bc552f5-0074-4789-ad44-6349e978b7e6; trace=4bc552f5-0074-4789-ad44-6349e978b7e6,id=1ece20ef-1bd7-4da2-b171-7f646829bf4c; trace=4bc552f5-0074-4789-ad44-6349e978b7e6,id=727729f4-0f5a-4437-87b5-1a86954c7454; trace=4bc552f5-0074-4789-ad44-6349e978b7e6,id=c0d72575-f238-428d-8cf0-b7bdeaa538c8; trace=c750a18c-163f-4a83

Generation: Women have held leadership roles throughout history, but the recognition and acceptance of women in leadership positions have evolved significantly over time. In ancient societies, women like Cleopatra and Queen Elizabeth I wielded power, but their leadership was often the exception rather than the norm.

The modern movement for women in leadership began in earnest in the late 19th and early 20th centuries, coinciding with the suffrage movement, which fought for women's right to vote. This period saw the emergence of women in various leadership roles in politics, business, and social movements.

The 1960s and 1970s marked a significant turning point with the feminist movement advocating for gender equality, leading to increased visibility and acceptance of women in leadership positions across various sectors. Today, while progress has been made, the journey towards equal representation in leadership roles continues.

Processing query: Who teaches acting 1
Step 1: Determinin

In [37]:
# Evaluation for LLMContextRecall
await computeEvaluationMetrics(evaluationSamples, [evalMetrics[0]])

Starting Evaluation...


Failed to batch ingest runs: langsmith.utils.LangSmithRateLimitError: Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError('429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch', '{"detail":"Monthly unique traces usage limit exceeded"}')
post: trace=c6761b29-82c8-48be-b8cd-043c9c2de41a,id=c6761b29-82c8-48be-b8cd-043c9c2de41a; trace=c6761b29-82c8-48be-b8cd-043c9c2de41a,id=bac914b0-64da-433e-b7f3-9ff8fb64733e; trace=c6761b29-82c8-48be-b8cd-043c9c2de41a,id=9a56ebc2-e9e3-43dd-91fe-6d7db5397c68; trace=c6761b29-82c8-48be-b8cd-043c9c2de41a,id=23dd5a8e-a9c0-48dd-9986-6483091e8c6c; trace=084816a5-0753-4700-82a7-8c2a05b3d74f,id=084816a5-0753-4700-82a7-8c2a05b3d74f; trace=084816a5-0753-4700-82a7-8c2a05b3d74f,id=ed4b18e4-18d5-467c-be05-d98fe03f6aaa; trace=084816a5-0753-4700-82a7-8c2a05b3d74f,id=61256643-f39c-4664-82e3-8abd09605247; trace=084816a5-0753-4700-82a7-8c2a05b3d74f,id=481efc52-415f-4b2e-a788-b9ac0bd4a322; trace=9c85df69-94ed-47e7

Evaluation Finished!
{'question': 'Cost of credit for Masters in Integrated Marketing Communication', 'LLMContextRecall': 0.045454545454545456}
{'question': 'How to save money on tuition', 'LLMContextRecall': 1.0}
{'question': 'How many courses should be applied by a student in  global studies.', 'LLMContextRecall': 0.25}
{'question': 'Will my classroom assignment ever change?', 'LLMContextRecall': 0.3333333333333333}
{'question': 'What is Marist Money?', 'LLMContextRecall': 0.0}
{'question': 'Where did McManus grow up?', 'LLMContextRecall': 1.0}
{'question': 'Mark James Morreale history?', 'LLMContextRecall': 0.375}
{'question': 'What are the marist gates built of?', 'LLMContextRecall': 0.8181818181818182}
{'question': 'What are the other disciplinary approaches included in a pathway?', 'LLMContextRecall': 0.46153846153846156}
{'question': 'Tell me about the environmental monitoring station at Marist', 'LLMContextRecall': 0.25}
{'question': 'Communications advisory board info', 'LLMCo

In [38]:
# Evaluation for FactualCorrectness
await computeEvaluationMetrics(evaluationSamples, [evalMetrics[1]])

Starting Evaluation...


Failed to batch ingest runs: langsmith.utils.LangSmithRateLimitError: Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError('429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch', '{"detail":"Monthly unique traces usage limit exceeded"}')
post: trace=34cccac1-9c55-4c11-94a9-2c1a6db4a342,id=34cccac1-9c55-4c11-94a9-2c1a6db4a342; trace=34cccac1-9c55-4c11-94a9-2c1a6db4a342,id=bae7dcf3-0acb-4480-a135-2ca5deb5a61f; trace=34cccac1-9c55-4c11-94a9-2c1a6db4a342,id=6355c34b-8b88-4ff4-bfeb-661af3b106ed; trace=22305c59-752b-42cf-8752-aa0b09cb135c,id=22305c59-752b-42cf-8752-aa0b09cb135c; trace=22305c59-752b-42cf-8752-aa0b09cb135c,id=6236ca2c-ba51-49a6-840a-f1fc08ae0c26; trace=22305c59-752b-42cf-8752-aa0b09cb135c,id=31f2a5a1-bec6-41cd-8230-1c8c4578d26b; patch: trace=ccb8c2e1-55a4-4ff6-89f5-221aface9ee2,id=ccb8c2e1-55a4-4ff6-89f5-221aface9ee2; trace=ccb8c2e1-55a4-4ff6-89f5-221aface9ee2,id=937364a8-eaa0-4925-b62a-a3acbe42bb09; trace=ccb8c2e1-55

Evaluation Finished!
{'question': 'Cost of credit for Masters in Integrated Marketing Communication', 'FactualCorrectness': 0.15}
{'question': 'How to save money on tuition', 'FactualCorrectness': 0.0}
{'question': 'How many courses should be applied by a student in  global studies.', 'FactualCorrectness': 0.0}
{'question': 'Will my classroom assignment ever change?', 'FactualCorrectness': 0.0}
{'question': 'What is Marist Money?', 'FactualCorrectness': 0.14}
{'question': 'Where did McManus grow up?', 'FactualCorrectness': 0.0}
{'question': 'Mark James Morreale history?', 'FactualCorrectness': 0.49}
{'question': 'What are the marist gates built of?', 'FactualCorrectness': 0.12}
{'question': 'What are the other disciplinary approaches included in a pathway?', 'FactualCorrectness': 0.43}
{'question': 'Tell me about the environmental monitoring station at Marist', 'FactualCorrectness': 0.19}
{'question': 'Communications advisory board info', 'FactualCorrectness': 0.36}
{'question': 'What 

In [39]:
# Evaluation for Faithfulness
await computeEvaluationMetrics(evaluationSamples, [evalMetrics[2]])

Starting Evaluation...


Failed to batch ingest runs: langsmith.utils.LangSmithRateLimitError: Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError('429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch', '{"detail":"Monthly unique traces usage limit exceeded"}')
post: trace=dd3402f7-0ce8-4adc-9389-4eb710255198,id=df1f1957-8d9d-4e0e-ba70-776f23a4679b; trace=dd3402f7-0ce8-4adc-9389-4eb710255198,id=992dd1ff-32de-486a-a921-50a0c2e5266f; trace=7dedd063-50b1-4d7b-8927-a6bde7c003d0,id=7dedd063-50b1-4d7b-8927-a6bde7c003d0; trace=7dedd063-50b1-4d7b-8927-a6bde7c003d0,id=80049155-e367-4eed-93d9-eff8bac750b5; trace=7dedd063-50b1-4d7b-8927-a6bde7c003d0,id=dc6a4842-2826-4217-9e68-1d3c332f34d9; trace=7dedd063-50b1-4d7b-8927-a6bde7c003d0,id=3e7c7bce-1692-4658-8ff5-7bc55158b342; trace=7dedd063-50b1-4d7b-8927-a6bde7c003d0,id=4638ee56-e23a-4d67-9b89-f6d287afd9ab; trace=7dedd063-50b1-4d7b-8927-a6bde7c003d0,id=e1fb813d-cb65-4370-9995-76f26b4a6db8; trace=7dedd063-50b1-4d7b

Evaluation Finished!
{'question': 'Cost of credit for Masters in Integrated Marketing Communication', 'Faithfulness': 1.0}
{'question': 'How to save money on tuition', 'Faithfulness': 0.0}
{'question': 'How many courses should be applied by a student in  global studies.', 'Faithfulness': 0.8888888888888888}
{'question': 'Will my classroom assignment ever change?', 'Faithfulness': 0.0}
{'question': 'What is Marist Money?', 'Faithfulness': 0.0}
{'question': 'Where did McManus grow up?', 'Faithfulness': 0.75}
{'question': 'Mark James Morreale history?', 'Faithfulness': 1.0}
{'question': 'What are the marist gates built of?', 'Faithfulness': 1.0}
{'question': 'What are the other disciplinary approaches included in a pathway?', 'Faithfulness': 0.8571428571428571}
{'question': 'Tell me about the environmental monitoring station at Marist', 'Faithfulness': 0.0}
{'question': 'Communications advisory board info', 'Faithfulness': 0.08}
{'question': 'What does OHBM do?', 'Faithfulness': 0.0}
{'qu

In [40]:
# Evaluation for SemanticSimilarity
await computeEvaluationMetrics(evaluationSamples, [evalMetrics[3]])

Starting Evaluation...


Failed to batch ingest runs: langsmith.utils.LangSmithRateLimitError: Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError('429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch', '{"detail":"Monthly unique traces usage limit exceeded"}')
post: trace=7ea61583-82c5-4faf-9228-e3f1b6899d14,id=e9f844ee-8076-4c1c-b024-9d8a7515dcf5; trace=d3f58790-0ec0-49cf-b5bc-155e1781c6ef,id=d3f58790-0ec0-49cf-b5bc-155e1781c6ef; trace=d3f58790-0ec0-49cf-b5bc-155e1781c6ef,id=3d1f3c2f-8cfd-492b-a065-71886ef33738; trace=d3f58790-0ec0-49cf-b5bc-155e1781c6ef,id=cd12617d-b2f1-4b65-90e9-45f3d92c42fe; trace=d3f58790-0ec0-49cf-b5bc-155e1781c6ef,id=38b2235c-4fb0-4327-ba6b-1512257d3ad3; trace=d3f58790-0ec0-49cf-b5bc-155e1781c6ef,id=f8d8c398-fd79-416b-aff7-1f615a330b83; trace=9c3b0b6f-6962-46e6-a00f-4a244358f411,id=9c3b0b6f-6962-46e6-a00f-4a244358f411; trace=9c3b0b6f-6962-46e6-a00f-4a244358f411,id=7ff19ed2-f6bd-47b2-8fab-6bfc8f5d2c3a; trace=9c3b0b6f-6962-46e6

Evaluation Finished!
{'question': 'Cost of credit for Masters in Integrated Marketing Communication', 'SemanticSimilarity': 0.8679842595846049}
{'question': 'How to save money on tuition', 'SemanticSimilarity': 0.7495072443253069}
{'question': 'How many courses should be applied by a student in  global studies.', 'SemanticSimilarity': 0.8460346491278142}
{'question': 'Will my classroom assignment ever change?', 'SemanticSimilarity': 0.7159182160255063}
{'question': 'What is Marist Money?', 'SemanticSimilarity': 0.882406980586127}
{'question': 'Where did McManus grow up?', 'SemanticSimilarity': 0.7769311677697431}
{'question': 'Mark James Morreale history?', 'SemanticSimilarity': 0.9519515135275044}
{'question': 'What are the marist gates built of?', 'SemanticSimilarity': 0.8999151312380386}
{'question': 'What are the other disciplinary approaches included in a pathway?', 'SemanticSimilarity': 0.8835759237096779}
{'question': 'Tell me about the environmental monitoring station at Marist

### Manual Evaluation

In [83]:
# Load Data
csvPath = "/Users/christiansarmiento/Library/CloudStorage/OneDrive-MaristCollege/Machine Learning/Data/manual_eval.csv"
maristQA = pd.read_csv(csvPath, sep=";")

# To use RecursiveCharacterTextSplitter, we need a list of dictionaries
maristContext = [Document(page_content=text) for text in maristQA["ground_truth"].tolist()]

# Split Documents into Chunks
textSplitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200, add_start_index=True)
texts = textSplitter.split_documents(maristContext)

# Store Documents in Vector DB (Chroma)
vectorDB = Chroma.from_documents(documents=texts, embedding=OpenAIEmbeddings())

In [86]:
# Run our chain with each question and evaluate
for row in maristQA.iterrows():
    selfRAGResponse = selfRAGLangChain(row[1][0], row[1][1], vectorDB, top_k=3)
    print(f"Generation: {selfRAGResponse}")

  selfRAGResponse = selfRAGLangChain(row[1][0], row[1][1], vectorDB, top_k=3)



Processing query: What is a Marist ally?
Step 1: Determining if retrieval is necessary...
Retrieval decision: yes
Step 2: Retrieving relevant documents...
Retrieved 3 documents
Step 3: Evaluating relevance of retrieved documents...
Document 1 relevance: relevant
Document 2 relevance: relevant
Document 3 relevance: relevant
Number of relevant contexts: 3
Step 4: Generating responses using relevant contexts...
Generating response for context 1...
Step 5: Assessing support for response 1...
Support assessment: fully supported
Step 6: Evaluating utility for response 1...
Utility score: 5
Generating response for context 2...
Step 5: Assessing support for response 2...
Support assessment: fully supported
Step 6: Evaluating utility for response 2...
Utility score: 5
Generating response for context 3...
Step 5: Assessing support for response 3...
Support assessment: fully supported
Step 6: Evaluating utility for response 3...
Utility score: 5
Selecting the best response...
Best response suppo

  selfRAGResponse = selfRAGLangChain(row[1][0], row[1][1], vectorDB, top_k=3)


Retrieval decision: yes
Step 2: Retrieving relevant documents...
Retrieved 3 documents
Step 3: Evaluating relevance of retrieved documents...
Document 1 relevance: relevant
Document 2 relevance: relevant
Document 3 relevance: relevant
Number of relevant contexts: 3
Step 4: Generating responses using relevant contexts...
Generating response for context 1...
Step 5: Assessing support for response 1...
Support assessment: fully supported
Step 6: Evaluating utility for response 1...
Utility score: 5
Generating response for context 2...
Step 5: Assessing support for response 2...
Support assessment: fully supported
Step 6: Evaluating utility for response 2...
Utility score: 5
Generating response for context 3...
Step 5: Assessing support for response 3...
Support assessment: fully supported
Step 6: Evaluating utility for response 3...
Utility score: 4
Selecting the best response...
Best response support: fully supported, utility: 5
Generation: Based on the given query and context, the answe

  selfRAGResponse = selfRAGLangChain(row[1][0], row[1][1], vectorDB, top_k=3)


Retrieval decision: yes
Step 2: Retrieving relevant documents...
Retrieved 3 documents
Step 3: Evaluating relevance of retrieved documents...
Document 1 relevance: relevant
Document 2 relevance: relevant
Document 3 relevance: relevant
Number of relevant contexts: 3
Step 4: Generating responses using relevant contexts...
Generating response for context 1...
Step 5: Assessing support for response 1...
Support assessment: fully supported
Step 6: Evaluating utility for response 1...
Utility score: 5
Generating response for context 2...
Step 5: Assessing support for response 2...
Support assessment: fully supported
Step 6: Evaluating utility for response 2...
Utility score: 5
Generating response for context 3...
Step 5: Assessing support for response 3...
Support assessment: fully supported
Step 6: Evaluating utility for response 3...
Utility score: 5
Selecting the best response...
Best response support: fully supported, utility: 5
Generation: Yes, there are career fairs on campus. The cont

  selfRAGResponse = selfRAGLangChain(row[1][0], row[1][1], vectorDB, top_k=3)


Retrieval decision: yes
Step 2: Retrieving relevant documents...
Retrieved 3 documents
Step 3: Evaluating relevance of retrieved documents...
Document 1 relevance: relevant
Document 2 relevance: relevant
Document 3 relevance: relevant
Number of relevant contexts: 3
Step 4: Generating responses using relevant contexts...
Generating response for context 1...
Step 5: Assessing support for response 1...
Support assessment: fully supported
Step 6: Evaluating utility for response 1...
Utility score: 5
Generating response for context 2...
Step 5: Assessing support for response 2...
Support assessment: fully supported
Step 6: Evaluating utility for response 2...
Utility score: 5
Generating response for context 3...
Step 5: Assessing support for response 3...


Failed to batch ingest runs: langsmith.utils.LangSmithRateLimitError: Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError('429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch', '{"detail":"Monthly unique traces usage limit exceeded"}')
post: trace=06098428-13bd-4488-9365-060257332ec3,id=cd8460e2-e58b-403b-8558-a113f989fb45; trace=cff1c341-66ae-447a-b116-1f359e341d3b,id=cff1c341-66ae-447a-b116-1f359e341d3b; trace=cff1c341-66ae-447a-b116-1f359e341d3b,id=96f8aa5d-58f2-44b7-b46f-0ae0c207039d; trace=cff1c341-66ae-447a-b116-1f359e341d3b,id=138b13bf-4601-4d6e-be8a-cc37b443075a; trace=cff1c341-66ae-447a-b116-1f359e341d3b,id=8764c7a4-cde7-44b4-8c27-d9ca36dde009; trace=36cb7a23-5988-46be-9c41-f82e635d20be,id=36cb7a23-5988-46be-9c41-f82e635d20be; trace=36cb7a23-5988-46be-9c41-f82e635d20be,id=40c72518-ac50-4ed0-886e-f2107c2a9235; trace=36cb7a23-5988-46be-9c41-f82e635d20be,id=7ea44a71-86c0-4e01-941c-8f847f8141c4; trace=36cb7a23-5988-46be

Support assessment: fully supported
Step 6: Evaluating utility for response 3...
Utility score: 5
Selecting the best response...
Best response support: fully supported, utility: 5
Generation: Based on the given query and context, the key information about Juan-Manuel Olivera-Silvera is:

- He graduated from L'École Supérieure Des Arts Appliqués Duperré in Paris, France, which is a renowned European institution for teaching art, design, and decorative arts. 

- He is skilled in multiple design disciplines including fashion, textile, industrial, and graphic design, as well as fine art.

- In Paris, he had the privilege of working with prestigious fashion companies like Courrèges, Carven Haute-Couture, and Popy Moreni.

- In New York, he has worked in the fashion industry at companies like Vivienne Tam, Liz Claiborne, Kenneth Cole, Overstock.com, The Doneger Group, and Ugo Luxury.

- He is currently a Professional Lecturer of Fashion Design & Merchandising.

Processing query: Studying phi

  selfRAGResponse = selfRAGLangChain(row[1][0], row[1][1], vectorDB, top_k=3)


Retrieval decision: yes
Step 2: Retrieving relevant documents...
Retrieved 3 documents
Step 3: Evaluating relevance of retrieved documents...
Document 1 relevance: relevant
Document 2 relevance: relevant
Document 3 relevance: relevant
Number of relevant contexts: 3
Step 4: Generating responses using relevant contexts...
Generating response for context 1...
Step 5: Assessing support for response 1...
Support assessment: fully supported
Step 6: Evaluating utility for response 1...
Utility score: 5
Generating response for context 2...
Step 5: Assessing support for response 2...
Support assessment: fully supported
Step 6: Evaluating utility for response 2...
Utility score: 5
Generating response for context 3...
Step 5: Assessing support for response 3...
Support assessment: fully supported
Step 6: Evaluating utility for response 3...
Utility score: 5
Selecting the best response...
Best response support: fully supported, utility: 5
Generation: Given the context about Professor Henry John Pr

  selfRAGResponse = selfRAGLangChain(row[1][0], row[1][1], vectorDB, top_k=3)


Retrieval decision: yes
Step 2: Retrieving relevant documents...
Retrieved 3 documents
Step 3: Evaluating relevance of retrieved documents...
Document 1 relevance: relevant
Document 2 relevance: relevant
Document 3 relevance: relevant
Number of relevant contexts: 3
Step 4: Generating responses using relevant contexts...
Generating response for context 1...
Step 5: Assessing support for response 1...
Support assessment: fully supported
Step 6: Evaluating utility for response 1...
Utility score: 5
Generating response for context 2...
Step 5: Assessing support for response 2...
Support assessment: fully supported
Step 6: Evaluating utility for response 2...
Utility score: 5
Generating response for context 3...
Step 5: Assessing support for response 3...
Support assessment: fully supported
Step 6: Evaluating utility for response 3...
Utility score: 5
Selecting the best response...
Best response support: fully supported, utility: 5
Generation: Based on the query "Marist social media" and th

  selfRAGResponse = selfRAGLangChain(row[1][0], row[1][1], vectorDB, top_k=3)


Retrieval decision: yes
Step 2: Retrieving relevant documents...
Retrieved 3 documents
Step 3: Evaluating relevance of retrieved documents...
Document 1 relevance: relevant
Document 2 relevance: relevant
Document 3 relevance: relevant
Number of relevant contexts: 3
Step 4: Generating responses using relevant contexts...
Generating response for context 1...
Step 5: Assessing support for response 1...
Support assessment: fully supported
Step 6: Evaluating utility for response 1...
Utility score: 5
Generating response for context 2...
Step 5: Assessing support for response 2...
Support assessment: fully supported
Step 6: Evaluating utility for response 2...
Utility score: 5
Generating response for context 3...
Step 5: Assessing support for response 3...
Support assessment: fully supported
Step 6: Evaluating utility for response 3...
Utility score: 5
Selecting the best response...
Best response support: fully supported, utility: 5
Generation: Based on the provided context, it appears that 

  selfRAGResponse = selfRAGLangChain(row[1][0], row[1][1], vectorDB, top_k=3)


Retrieval decision: yes
Step 2: Retrieving relevant documents...
Retrieved 3 documents
Step 3: Evaluating relevance of retrieved documents...
Document 1 relevance: relevant
Document 2 relevance: relevant
Document 3 relevance: relevant
Number of relevant contexts: 3
Step 4: Generating responses using relevant contexts...
Generating response for context 1...
Step 5: Assessing support for response 1...
Support assessment: fully supported
Step 6: Evaluating utility for response 1...
Utility score: 5
Generating response for context 2...
Step 5: Assessing support for response 2...
Support assessment: fully supported
Step 6: Evaluating utility for response 2...
Utility score: 5
Generating response for context 3...
Step 5: Assessing support for response 3...
Support assessment: fully supported
Step 6: Evaluating utility for response 3...
Utility score: 5
Selecting the best response...
Best response support: fully supported, utility: 5
Generation: The multicultural affairs center is a division 

  selfRAGResponse = selfRAGLangChain(row[1][0], row[1][1], vectorDB, top_k=3)


Retrieval decision: yes
Step 2: Retrieving relevant documents...
Retrieved 3 documents
Step 3: Evaluating relevance of retrieved documents...
Document 1 relevance: relevant
Document 2 relevance: relevant
Document 3 relevance: relevant
Number of relevant contexts: 3
Step 4: Generating responses using relevant contexts...
Generating response for context 1...
Step 5: Assessing support for response 1...
Support assessment: fully supported
Step 6: Evaluating utility for response 1...
Utility score: 5
Generating response for context 2...
Step 5: Assessing support for response 2...
Support assessment: fully supported
Step 6: Evaluating utility for response 2...
Utility score: 5
Generating response for context 3...
Step 5: Assessing support for response 3...
Support assessment: fully supported
Step 6: Evaluating utility for response 3...
Utility score: 5
Selecting the best response...
Best response support: fully supported, utility: 5
Generation: According to the context provided, the research

  selfRAGResponse = selfRAGLangChain(row[1][0], row[1][1], vectorDB, top_k=3)


Retrieval decision: yes
Step 2: Retrieving relevant documents...
Retrieved 3 documents
Step 3: Evaluating relevance of retrieved documents...
Document 1 relevance: relevant
Document 2 relevance: relevant
Document 3 relevance: relevant
Number of relevant contexts: 3
Step 4: Generating responses using relevant contexts...
Generating response for context 1...
Step 5: Assessing support for response 1...
Support assessment: fully supported
Step 6: Evaluating utility for response 1...
Utility score: 5
Generating response for context 2...
Step 5: Assessing support for response 2...
Support assessment: fully supported
Step 6: Evaluating utility for response 2...
Utility score: 5
Generating response for context 3...
Step 5: Assessing support for response 3...
Support assessment: fully supported
Step 6: Evaluating utility for response 3...
Utility score: 5
Selecting the best response...
Best response support: fully supported, utility: 5
Generation: Based on the query "Janine Larmon Peterson" and

  selfRAGResponse = selfRAGLangChain(row[1][0], row[1][1], vectorDB, top_k=3)


Retrieval decision: yes
Step 2: Retrieving relevant documents...
Retrieved 3 documents
Step 3: Evaluating relevance of retrieved documents...
Document 1 relevance: irrelevant
Document 2 relevance: irrelevant
Document 3 relevance: irrelevant
Number of relevant contexts: 0
No relevant contexts found. Generating without retrieval...
Generation: Unfortunately, without any additional context about the "allied health building", I do not have enough information to determine when it was built. The query asks about a specific building, but provides no details about its location or what organization it is associated with. To answer this question, I would need more details about the building in question.

Processing query: Does Marist has equipped with fire safety in all buildings?
Step 1: Determining if retrieval is necessary...


  selfRAGResponse = selfRAGLangChain(row[1][0], row[1][1], vectorDB, top_k=3)


Retrieval decision: yes
Step 2: Retrieving relevant documents...
Retrieved 3 documents
Step 3: Evaluating relevance of retrieved documents...
Document 1 relevance: relevant
Document 2 relevance: relevant
Document 3 relevance: relevant
Number of relevant contexts: 3
Step 4: Generating responses using relevant contexts...
Generating response for context 1...
Step 5: Assessing support for response 1...
Support assessment: fully supported
Step 6: Evaluating utility for response 1...
Utility score: 5
Generating response for context 2...
Step 5: Assessing support for response 2...
Support assessment: fully supported
Step 6: Evaluating utility for response 2...
Utility score: 5
Generating response for context 3...
Step 5: Assessing support for response 3...
Support assessment: fully supported
Step 6: Evaluating utility for response 3...
Utility score: 5
Selecting the best response...
Best response support: fully supported, utility: 5
Generation: Based on the information provided, it appears t

  selfRAGResponse = selfRAGLangChain(row[1][0], row[1][1], vectorDB, top_k=3)


Retrieval decision: yes
Step 2: Retrieving relevant documents...
Retrieved 3 documents
Step 3: Evaluating relevance of retrieved documents...
Document 1 relevance: relevant
Document 2 relevance: relevant
Document 3 relevance: relevant
Number of relevant contexts: 3
Step 4: Generating responses using relevant contexts...
Generating response for context 1...
Step 5: Assessing support for response 1...
Support assessment: fully supported
Step 6: Evaluating utility for response 1...


Failed to batch ingest runs: langsmith.utils.LangSmithRateLimitError: Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError('429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch', '{"detail":"Monthly unique traces usage limit exceeded"}')
post: trace=9e0d469b-a027-40fc-9c45-ef188f923ad7,id=9e0d469b-a027-40fc-9c45-ef188f923ad7; trace=9e0d469b-a027-40fc-9c45-ef188f923ad7,id=1775c06f-ff87-418a-914d-2305ed73debb; trace=9e0d469b-a027-40fc-9c45-ef188f923ad7,id=43a5c3df-0e16-4798-9af5-4a444ab31d76; trace=9e0d469b-a027-40fc-9c45-ef188f923ad7,id=a9825d54-2448-4f93-bd11-e13fd062f3d7; trace=d22caf92-efe0-4bf9-8449-320eed4569b1,id=d22caf92-efe0-4bf9-8449-320eed4569b1; trace=d22caf92-efe0-4bf9-8449-320eed4569b1,id=59011268-2f0d-402d-829e-7311802e0ec9; trace=d22caf92-efe0-4bf9-8449-320eed4569b1,id=f2e4b795-a3d0-4274-a556-5a74328a004c; trace=d22caf92-efe0-4bf9-8449-320eed4569b1,id=eaa19c77-ea82-4bc2-a70b-54ff81d645c0; trace=4b5d1802-76d6-4f0e

Utility score: 4
Generating response for context 2...
Step 5: Assessing support for response 2...
Support assessment: fully supported
Step 6: Evaluating utility for response 2...
Utility score: 5
Generating response for context 3...
Step 5: Assessing support for response 3...
Support assessment: fully supported
Step 6: Evaluating utility for response 3...
Utility score: 5
Selecting the best response...
Best response support: fully supported, utility: 5
Generation: Based on the information provided, Joey Fanfarelli's education is that he has a PhD, as he is referred to as "Dr. Fanfarelli" in the context. The context mentions that he is an Associate Professor at Marist's School of Communication and the Arts, and prior to that he spent time at the University of Central Florida's Games and Interactive Media Department and Institute for Simulation & Training. This suggests he has an advanced academic background and education, likely including a doctoral degree.

Processing query: Is the Mas

  selfRAGResponse = selfRAGLangChain(row[1][0], row[1][1], vectorDB, top_k=3)


Retrieval decision: yes
Step 2: Retrieving relevant documents...
Retrieved 3 documents
Step 3: Evaluating relevance of retrieved documents...
Document 1 relevance: relevant
Document 2 relevance: relevant
Document 3 relevance: relevant
Number of relevant contexts: 3
Step 4: Generating responses using relevant contexts...
Generating response for context 1...
Step 5: Assessing support for response 1...
Support assessment: fully supported
Step 6: Evaluating utility for response 1...
Utility score: 5
Generating response for context 2...
Step 5: Assessing support for response 2...
Support assessment: based on the context provided, it seems the master of science in information systems program is not offered fully online. the context mentions that students can choose "convenient online courses" as part of the information systems certificate program, which includes 5 specific courses that can be taken. however, it does not indicate that the full ms in information systems degree can be completed

  selfRAGResponse = selfRAGLangChain(row[1][0], row[1][1], vectorDB, top_k=3)


Retrieval decision: yes
Step 2: Retrieving relevant documents...
Retrieved 3 documents
Step 3: Evaluating relevance of retrieved documents...
Document 1 relevance: relevant
Document 2 relevance: relevant
Document 3 relevance: relevant
Number of relevant contexts: 3
Step 4: Generating responses using relevant contexts...
Generating response for context 1...
Step 5: Assessing support for response 1...
Support assessment: fully supported
Step 6: Evaluating utility for response 1...
Utility score: 5
Generating response for context 2...
Step 5: Assessing support for response 2...
Support assessment: fully supported
Step 6: Evaluating utility for response 2...
Utility score: 5
Generating response for context 3...
Step 5: Assessing support for response 3...
Support assessment: fully supported
Step 6: Evaluating utility for response 3...
Utility score: 5
Selecting the best response...
Best response support: fully supported, utility: 5
Generation: Based on the given context, in an art studio co

  selfRAGResponse = selfRAGLangChain(row[1][0], row[1][1], vectorDB, top_k=3)


Retrieval decision: yes
Step 2: Retrieving relevant documents...
Retrieved 3 documents
Step 3: Evaluating relevance of retrieved documents...
Document 1 relevance: relevant
Document 2 relevance: relevant
Document 3 relevance: relevant
Number of relevant contexts: 3
Step 4: Generating responses using relevant contexts...
Generating response for context 1...
Step 5: Assessing support for response 1...
Support assessment: fully supported
Step 6: Evaluating utility for response 1...
Utility score: 5
Generating response for context 2...
Step 5: Assessing support for response 2...
Support assessment: fully supported
Step 6: Evaluating utility for response 2...
Utility score: 5
Generating response for context 3...
Step 5: Assessing support for response 3...
Support assessment: fully supported
Step 6: Evaluating utility for response 3...
Utility score: 5
Selecting the best response...
Best response support: fully supported, utility: 5
Generation: Yes, you can still apply to Marist College whil

  selfRAGResponse = selfRAGLangChain(row[1][0], row[1][1], vectorDB, top_k=3)


Retrieval decision: yes
Step 2: Retrieving relevant documents...
Retrieved 3 documents
Step 3: Evaluating relevance of retrieved documents...
Document 1 relevance: relevant
Document 2 relevance: relevant
Document 3 relevance: relevant
Number of relevant contexts: 3
Step 4: Generating responses using relevant contexts...
Generating response for context 1...
Step 5: Assessing support for response 1...
Support assessment: fully supported
Step 6: Evaluating utility for response 1...
Utility score: 5
Generating response for context 2...
Step 5: Assessing support for response 2...
Support assessment: fully supported
Step 6: Evaluating utility for response 2...
Utility score: 5
Generating response for context 3...
Step 5: Assessing support for response 3...
Support assessment: fully supported
Step 6: Evaluating utility for response 3...
Utility score: 5
Selecting the best response...
Best response support: fully supported, utility: 5
Generation: Based on the provided context, Marist College s

  selfRAGResponse = selfRAGLangChain(row[1][0], row[1][1], vectorDB, top_k=3)


Retrieval decision: yes
Step 2: Retrieving relevant documents...
Retrieved 3 documents
Step 3: Evaluating relevance of retrieved documents...
Document 1 relevance: relevant
Document 2 relevance: relevant
Document 3 relevance: relevant
Number of relevant contexts: 3
Step 4: Generating responses using relevant contexts...
Generating response for context 1...
Step 5: Assessing support for response 1...
Support assessment: fully supported
Step 6: Evaluating utility for response 1...
Utility score: 5
Generating response for context 2...
Step 5: Assessing support for response 2...
Support assessment: fully supported
Step 6: Evaluating utility for response 2...
Utility score: 5
Generating response for context 3...
Step 5: Assessing support for response 3...
Support assessment: fully supported
Step 6: Evaluating utility for response 3...
Utility score: 5
Selecting the best response...
Best response support: fully supported, utility: 5
Generation: Based on the information provided, Eileen Curle

  selfRAGResponse = selfRAGLangChain(row[1][0], row[1][1], vectorDB, top_k=3)


Retrieval decision: yes
Step 2: Retrieving relevant documents...
Retrieved 3 documents
Step 3: Evaluating relevance of retrieved documents...
Document 1 relevance: relevant
Document 2 relevance: relevant
Document 3 relevance: relevant
Number of relevant contexts: 3
Step 4: Generating responses using relevant contexts...
Generating response for context 1...
Step 5: Assessing support for response 1...
Support assessment: fully supported
Step 6: Evaluating utility for response 1...
Utility score: 5
Generating response for context 2...
Step 5: Assessing support for response 2...
Support assessment: fully supported
Step 6: Evaluating utility for response 2...
Utility score: 5
Generating response for context 3...
Step 5: Assessing support for response 3...
Support assessment: fully supported
Step 6: Evaluating utility for response 3...
Utility score: 5
Selecting the best response...
Best response support: fully supported, utility: 5
Generation: According to the context provided, the physical

  selfRAGResponse = selfRAGLangChain(row[1][0], row[1][1], vectorDB, top_k=3)


Retrieval decision: yes
Step 2: Retrieving relevant documents...
Retrieved 3 documents
Step 3: Evaluating relevance of retrieved documents...
Document 1 relevance: relevant
Document 2 relevance: relevant
Document 3 relevance: relevant
Number of relevant contexts: 3
Step 4: Generating responses using relevant contexts...
Generating response for context 1...
Step 5: Assessing support for response 1...
Support assessment: fully supported
Step 6: Evaluating utility for response 1...
Utility score: 5
Generating response for context 2...
Step 5: Assessing support for response 2...
Support assessment: fully supported
Step 6: Evaluating utility for response 2...
Utility score: 5
Generating response for context 3...


Failed to batch ingest runs: langsmith.utils.LangSmithRateLimitError: Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError('429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch', '{"detail":"Monthly unique traces usage limit exceeded"}')
post: trace=e1a28a80-e7df-4ccd-a42c-bb8e52cc4aff,id=3058f1d5-5346-440e-a7e5-31a1df2e3e02; trace=e1a28a80-e7df-4ccd-a42c-bb8e52cc4aff,id=7261bbeb-8622-4ac5-8a92-c007da4ec5b1; trace=b1a67d01-0bd8-4ff2-a047-82b900d500d8,id=b1a67d01-0bd8-4ff2-a047-82b900d500d8; trace=b1a67d01-0bd8-4ff2-a047-82b900d500d8,id=74a5adb3-a5ce-405e-952d-da877b44c78a; trace=b1a67d01-0bd8-4ff2-a047-82b900d500d8,id=97160322-d708-4ebe-986a-814108aae4ee; trace=b1a67d01-0bd8-4ff2-a047-82b900d500d8,id=995244cd-cd82-45e8-89f1-1c39b515565f; trace=f7020234-38d4-4e94-93e0-80f05801451f,id=f7020234-38d4-4e94-93e0-80f05801451f; trace=f7020234-38d4-4e94-93e0-80f05801451f,id=6507837a-ce3f-4c39-8ad0-da2d6a6e1181; trace=f7020234-38d4-4e94

Step 5: Assessing support for response 3...
Support assessment: fully supported
Step 6: Evaluating utility for response 3...
Utility score: 5
Selecting the best response...
Best response support: fully supported, utility: 5
Generation: Based on the given query 'Dr. Arth research' and the context provided, it seems that Dr. Arth is a philosopher who specializes in the philosophy of art, aesthetics, and value theory. Some key details about their research interests and publications include:

- Philosophy of Art, Aesthetics, Value Theory
- Comics and Film
- Pornography and depiction in art
- Artistic value and institutions

Specific publications mentioned are:
- "Are You Ready for Some Football? A Monday Night Documentary?" published in the Journal of Aesthetics and Art Criticism in 2018
- "Why Serials Are Killer" published in the Journal of Aesthetics and Art Criticism in 2013 
- "Pornography at the Edge: Depiction, Fiction, and Sexual Predilection" published in the book Art and Pornograp

  selfRAGResponse = selfRAGLangChain(row[1][0], row[1][1], vectorDB, top_k=3)


Retrieval decision: yes
Step 2: Retrieving relevant documents...
Retrieved 3 documents
Step 3: Evaluating relevance of retrieved documents...
Document 1 relevance: relevant
Document 2 relevance: relevant
Document 3 relevance: relevant
Number of relevant contexts: 3
Step 4: Generating responses using relevant contexts...
Generating response for context 1...
Step 5: Assessing support for response 1...
Support assessment: partially supported
Step 6: Evaluating utility for response 1...
Utility score: 4
Generating response for context 2...
Step 5: Assessing support for response 2...
Support assessment: fully supported
Step 6: Evaluating utility for response 2...
Utility score: 4
Generating response for context 3...
Step 5: Assessing support for response 3...
Support assessment: fully supported
Step 6: Evaluating utility for response 3...
Utility score: 4
Selecting the best response...
Best response support: fully supported, utility: 4
Generation: Based on the context provided, it seems tha

  selfRAGResponse = selfRAGLangChain(row[1][0], row[1][1], vectorDB, top_k=3)


Retrieval decision: yes
Step 2: Retrieving relevant documents...
Retrieved 3 documents
Step 3: Evaluating relevance of retrieved documents...
Document 1 relevance: relevant
Document 2 relevance: relevant
Document 3 relevance: relevant
Number of relevant contexts: 3
Step 4: Generating responses using relevant contexts...
Generating response for context 1...
Step 5: Assessing support for response 1...
Support assessment: fully supported
Step 6: Evaluating utility for response 1...
Utility score: 5
Generating response for context 2...
Step 5: Assessing support for response 2...
Support assessment: fully supported
Step 6: Evaluating utility for response 2...
Utility score: 5
Generating response for context 3...
Step 5: Assessing support for response 3...
Support assessment: fully supported
Step 6: Evaluating utility for response 3...
Utility score: 5
Selecting the best response...
Best response support: fully supported, utility: 5
Generation: Based on the context provided, it appears that 

  selfRAGResponse = selfRAGLangChain(row[1][0], row[1][1], vectorDB, top_k=3)


Retrieval decision: yes
Step 2: Retrieving relevant documents...
Retrieved 3 documents
Step 3: Evaluating relevance of retrieved documents...
Document 1 relevance: relevant
Document 2 relevance: relevant
Document 3 relevance: relevant
Number of relevant contexts: 3
Step 4: Generating responses using relevant contexts...
Generating response for context 1...
Step 5: Assessing support for response 1...
Support assessment: fully supported
Step 6: Evaluating utility for response 1...
Utility score: 5
Generating response for context 2...
Step 5: Assessing support for response 2...
Support assessment: fully supported
Step 6: Evaluating utility for response 2...
Utility score: 5
Generating response for context 3...
Step 5: Assessing support for response 3...
Support assessment: fully supported
Step 6: Evaluating utility for response 3...
Utility score: 5
Selecting the best response...
Best response support: fully supported, utility: 5
Generation: Based on the context provided, it seems the res

  selfRAGResponse = selfRAGLangChain(row[1][0], row[1][1], vectorDB, top_k=3)


Retrieval decision: yes
Step 2: Retrieving relevant documents...
Retrieved 3 documents
Step 3: Evaluating relevance of retrieved documents...
Document 1 relevance: relevant
Document 2 relevance: relevant
Document 3 relevance: relevant
Number of relevant contexts: 3
Step 4: Generating responses using relevant contexts...
Generating response for context 1...
Step 5: Assessing support for response 1...
Support assessment: fully supported
Step 6: Evaluating utility for response 1...
Utility score: 5
Generating response for context 2...
Step 5: Assessing support for response 2...
Support assessment: fully supported
Step 6: Evaluating utility for response 2...
Utility score: 4
Generating response for context 3...
Step 5: Assessing support for response 3...
Support assessment: fully supported
Step 6: Evaluating utility for response 3...
Utility score: 5
Selecting the best response...
Best response support: fully supported, utility: 5
Generation: Based on the given query and context, here is a

  selfRAGResponse = selfRAGLangChain(row[1][0], row[1][1], vectorDB, top_k=3)


Retrieval decision: yes
Step 2: Retrieving relevant documents...
Retrieved 3 documents
Step 3: Evaluating relevance of retrieved documents...
Document 1 relevance: relevant
Document 2 relevance: relevant
Document 3 relevance: relevant
Number of relevant contexts: 3
Step 4: Generating responses using relevant contexts...
Generating response for context 1...
Step 5: Assessing support for response 1...
Support assessment: fully supported
Step 6: Evaluating utility for response 1...
Utility score: 5
Generating response for context 2...
Step 5: Assessing support for response 2...
Support assessment: fully supported
Step 6: Evaluating utility for response 2...
Utility score: 5
Generating response for context 3...
Step 5: Assessing support for response 3...
Support assessment: fully supported
Step 6: Evaluating utility for response 3...
Utility score: 5
Selecting the best response...
Best response support: fully supported, utility: 5
Generation: Based on the information provided, it seems tha

  selfRAGResponse = selfRAGLangChain(row[1][0], row[1][1], vectorDB, top_k=3)


Retrieval decision: yes
Step 2: Retrieving relevant documents...
Retrieved 3 documents
Step 3: Evaluating relevance of retrieved documents...
Document 1 relevance: relevant
Document 2 relevance: relevant
Document 3 relevance: relevant
Number of relevant contexts: 3
Step 4: Generating responses using relevant contexts...
Generating response for context 1...
Step 5: Assessing support for response 1...
Support assessment: fully supported
Step 6: Evaluating utility for response 1...
Utility score: 5
Generating response for context 2...
Step 5: Assessing support for response 2...
Support assessment: fully supported
Step 6: Evaluating utility for response 2...
Utility score: 5
Generating response for context 3...
Step 5: Assessing support for response 3...
Support assessment: fully supported
Step 6: Evaluating utility for response 3...
Utility score: 5
Selecting the best response...
Best response support: fully supported, utility: 5
Generation: Based on the provided context, it appears that 

  selfRAGResponse = selfRAGLangChain(row[1][0], row[1][1], vectorDB, top_k=3)


Retrieval decision: yes
Step 2: Retrieving relevant documents...
Retrieved 3 documents
Step 3: Evaluating relevance of retrieved documents...
Document 1 relevance: relevant
Document 2 relevance: relevant
Document 3 relevance: relevant
Number of relevant contexts: 3
Step 4: Generating responses using relevant contexts...
Generating response for context 1...
Step 5: Assessing support for response 1...
Support assessment: fully supported
Step 6: Evaluating utility for response 1...
Utility score: 5
Generating response for context 2...
Step 5: Assessing support for response 2...
Support assessment: fully supported
Step 6: Evaluating utility for response 2...
Utility score: 5
Generating response for context 3...
Step 5: Assessing support for response 3...
Support assessment: fully supported
Step 6: Evaluating utility for response 3...
Utility score: 5
Selecting the best response...
Best response support: fully supported, utility: 5
Generation: Based on the given context, it seems that histo

  selfRAGResponse = selfRAGLangChain(row[1][0], row[1][1], vectorDB, top_k=3)


Retrieval decision: yes
Step 2: Retrieving relevant documents...
Retrieved 3 documents
Step 3: Evaluating relevance of retrieved documents...
Document 1 relevance: relevant
Document 2 relevance: relevant
Document 3 relevance: relevant
Number of relevant contexts: 3
Step 4: Generating responses using relevant contexts...
Generating response for context 1...
Step 5: Assessing support for response 1...
Support assessment: fully supported
Step 6: Evaluating utility for response 1...
Utility score: 4
Generating response for context 2...
Step 5: Assessing support for response 2...
Support assessment: fully supported
Step 6: Evaluating utility for response 2...
Utility score: 4
Generating response for context 3...


Failed to batch ingest runs: langsmith.utils.LangSmithRateLimitError: Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError('429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch', '{"detail":"Monthly unique traces usage limit exceeded"}')
post: trace=99466f6f-9556-448e-98d7-ca49641d7e55,id=99466f6f-9556-448e-98d7-ca49641d7e55; trace=99466f6f-9556-448e-98d7-ca49641d7e55,id=3944b33d-aa73-4ac5-8b59-6fa63b992c6b; trace=99466f6f-9556-448e-98d7-ca49641d7e55,id=332c4a63-1117-4848-b948-13590a7fbdad; trace=99466f6f-9556-448e-98d7-ca49641d7e55,id=a31e82ae-bc91-4457-95ec-a4d6567dfb9d; trace=30769d32-5d60-4351-b3a0-16083021a8b6,id=30769d32-5d60-4351-b3a0-16083021a8b6; trace=30769d32-5d60-4351-b3a0-16083021a8b6,id=e5f81a40-b904-419a-8c70-30599dc23f86; trace=30769d32-5d60-4351-b3a0-16083021a8b6,id=a76ebc48-5fe9-4b25-8dae-18788d290994; trace=30769d32-5d60-4351-b3a0-16083021a8b6,id=f6de7f6c-3190-4365-97de-1dea715d3961; trace=1947da4f-25b6-4a22

Step 5: Assessing support for response 3...
Support assessment: partially supported
Step 6: Evaluating utility for response 3...
Utility score: 5
Selecting the best response...
Best response support: fully supported, utility: 4
Generation: Based on the context provided, the tunnel mentioned in the query was likely constructed as part of the mansion designed by Carrère and Hastings for Colonel Payne in Esopus, New York. The mansion was built in a style similar to one Payne had seen on the Italian coast, sometime prior to his death in 1917. However, the exact date of when the tunnel was made is not specified in the information given. The context indicates the mansion and associated estate were used for various purposes over the years, including as a school and retreat house, but does not provide a definitive date for the tunnel's construction.

Processing query: about Professor McNulty
Step 1: Determining if retrieval is necessary...


  selfRAGResponse = selfRAGLangChain(row[1][0], row[1][1], vectorDB, top_k=3)


Retrieval decision: yes
Step 2: Retrieving relevant documents...
Retrieved 3 documents
Step 3: Evaluating relevance of retrieved documents...
Document 1 relevance: irrelevant
Document 2 relevance: irrelevant
Document 3 relevance: irrelevant
Number of relevant contexts: 0
No relevant contexts found. Generating without retrieval...
Generation: I'm afraid I don't have any specific information about Professor McNulty. Without additional context about who this person is or what field they work in, I don't have enough details to provide a substantive response to your query. If you can share more details about Professor McNulty, I'd be happy to try to generate a more informative response.

Processing query: What is WCF?
Step 1: Determining if retrieval is necessary...


  selfRAGResponse = selfRAGLangChain(row[1][0], row[1][1], vectorDB, top_k=3)


Retrieval decision: yes
Step 2: Retrieving relevant documents...
Retrieved 3 documents
Step 3: Evaluating relevance of retrieved documents...
Document 1 relevance: irrelevant
Document 2 relevance: irrelevant
Document 3 relevance: irrelevant
Number of relevant contexts: 0
No relevant contexts found. Generating without retrieval...
Generation: WCF stands for Windows Communication Foundation. It is a framework for building service-oriented applications in the .NET platform. WCF provides a unified programming model for rapidly building service-oriented applications, communication across different platforms and interoperability with other communication technologies. Some key features of WCF include:

- Support for multiple communication protocols (HTTP, TCP, MSMQ, etc.)
- Built-in security, reliability and transaction handling
- Interoperability with other service technologies like SOAP, REST, etc.
- Extensibility through custom bindings and behaviors
- Hosting flexibility - can be hosted i

  selfRAGResponse = selfRAGLangChain(row[1][0], row[1][1], vectorDB, top_k=3)


Retrieval decision: yes
Step 2: Retrieving relevant documents...
Retrieved 3 documents
Step 3: Evaluating relevance of retrieved documents...
Document 1 relevance: relevant
Document 2 relevance: irrelevant
Document 3 relevance: relevant
Number of relevant contexts: 2
Step 4: Generating responses using relevant contexts...
Generating response for context 1...
Step 5: Assessing support for response 1...
Support assessment: fully supported
Step 6: Evaluating utility for response 1...
Utility score: 5
Generating response for context 2...
Step 5: Assessing support for response 2...
Support assessment: fully supported
Step 6: Evaluating utility for response 2...
Utility score: 5
Selecting the best response...
Best response support: fully supported, utility: 5
Generation: Here is how I can help with Marist alerts:

Marist has a system for sending important alerts and notifications to students, faculty, and staff. This includes things like weather-related closures, campus emergencies, and othe

  selfRAGResponse = selfRAGLangChain(row[1][0], row[1][1], vectorDB, top_k=3)


Retrieval decision: yes
Step 2: Retrieving relevant documents...
Retrieved 3 documents
Step 3: Evaluating relevance of retrieved documents...
Document 1 relevance: relevant
Document 2 relevance: relevant
Document 3 relevance: relevant
Number of relevant contexts: 3
Step 4: Generating responses using relevant contexts...
Generating response for context 1...
Step 5: Assessing support for response 1...
Support assessment: fully supported
Step 6: Evaluating utility for response 1...
Utility score: 5
Generating response for context 2...
Step 5: Assessing support for response 2...
Support assessment: fully supported
Step 6: Evaluating utility for response 2...
Utility score: 5
Generating response for context 3...
Step 5: Assessing support for response 3...
Support assessment: fully supported
Step 6: Evaluating utility for response 3...
Utility score: 5
Selecting the best response...
Best response support: fully supported, utility: 5
Generation: According to the provided context, Professor Su

  selfRAGResponse = selfRAGLangChain(row[1][0], row[1][1], vectorDB, top_k=3)


Retrieval decision: yes
Step 2: Retrieving relevant documents...
Retrieved 3 documents
Step 3: Evaluating relevance of retrieved documents...
Document 1 relevance: relevant
Document 2 relevance: relevant
Document 3 relevance: relevant
Number of relevant contexts: 3
Step 4: Generating responses using relevant contexts...
Generating response for context 1...
Step 5: Assessing support for response 1...
Support assessment: fully supported
Step 6: Evaluating utility for response 1...
Utility score: 5
Generating response for context 2...
Step 5: Assessing support for response 2...
Support assessment: fully supported
Step 6: Evaluating utility for response 2...
Utility score: 5
Generating response for context 3...
Step 5: Assessing support for response 3...
Support assessment: fully supported
Step 6: Evaluating utility for response 3...
Utility score: 2
Selecting the best response...
Best response support: fully supported, utility: 5
Generation: Based on the provided context, here is how you 

  selfRAGResponse = selfRAGLangChain(row[1][0], row[1][1], vectorDB, top_k=3)


Retrieval decision: yes
Step 2: Retrieving relevant documents...
Retrieved 3 documents
Step 3: Evaluating relevance of retrieved documents...
Document 1 relevance: irrelevant
Document 2 relevance: irrelevant
Document 3 relevance: irrelevant
Number of relevant contexts: 0
No relevant contexts found. Generating without retrieval...
Generation: ICA stands for International Co-operative Alliance, which is an independent, non-governmental association that represents cooperatives worldwide. The ICA was founded in 1895 and promotes the cooperative business model as a way to address economic, social and cultural needs.

Processing query: Who is the chair of the history department?
Step 1: Determining if retrieval is necessary...


  selfRAGResponse = selfRAGLangChain(row[1][0], row[1][1], vectorDB, top_k=3)


Retrieval decision: yes
Step 2: Retrieving relevant documents...
Retrieved 3 documents
Step 3: Evaluating relevance of retrieved documents...
Document 1 relevance: relevant
Document 2 relevance: relevant
Document 3 relevance: relevant
Number of relevant contexts: 3
Step 4: Generating responses using relevant contexts...
Generating response for context 1...
Step 5: Assessing support for response 1...
Support assessment: fully supported
Step 6: Evaluating utility for response 1...
Utility score: 5
Generating response for context 2...
Step 5: Assessing support for response 2...
Support assessment: fully supported
Step 6: Evaluating utility for response 2...
Utility score: 5
Generating response for context 3...
Step 5: Assessing support for response 3...
Support assessment: fully supported
Step 6: Evaluating utility for response 3...
Utility score: 5
Selecting the best response...
Best response support: fully supported, utility: 5
Generation: According to the provided context, the chair of

  selfRAGResponse = selfRAGLangChain(row[1][0], row[1][1], vectorDB, top_k=3)


Retrieval decision: yes
Step 2: Retrieving relevant documents...
Retrieved 3 documents
Step 3: Evaluating relevance of retrieved documents...
Document 1 relevance: relevant
Document 2 relevance: irrelevant
Document 3 relevance: irrelevant
Number of relevant contexts: 1
Step 4: Generating responses using relevant contexts...
Generating response for context 1...
Step 5: Assessing support for response 1...
Support assessment: fully supported
Step 6: Evaluating utility for response 1...
Utility score: 3
Selecting the best response...
Best response support: fully supported, utility: 3
Generation: Based on the context provided, the undergraduate admission department hours are not specified. However, the information indicates that the Office of Graduate Admission is open for one-on-one prospective student advisement Monday through Thursday from 8:30 am to 6:00 pm and on Friday from 8:30 am to 5:00 pm. Campus tours for graduate students are also available Monday through Friday from 9:00 am to 

  selfRAGResponse = selfRAGLangChain(row[1][0], row[1][1], vectorDB, top_k=3)


Retrieval decision: yes
Step 2: Retrieving relevant documents...
Retrieved 3 documents
Step 3: Evaluating relevance of retrieved documents...
Document 1 relevance: irrelevant
Document 2 relevance: irrelevant
Document 3 relevance: irrelevant
Number of relevant contexts: 0
No relevant contexts found. Generating without retrieval...
Generation: Unfortunately, without any additional context about the specific campus or university, I do not have enough information to provide the campus ministry hours. Campus ministry hours can vary greatly depending on the institution. If you are able to provide more details about the campus in question, I would be happy to try and research the specific ministry hours for that location.

Processing query: Can I take online classes at Marist?
Step 1: Determining if retrieval is necessary...


  selfRAGResponse = selfRAGLangChain(row[1][0], row[1][1], vectorDB, top_k=3)


Retrieval decision: yes
Step 2: Retrieving relevant documents...
Retrieved 3 documents
Step 3: Evaluating relevance of retrieved documents...
Document 1 relevance: relevant
Document 2 relevance: relevant
Document 3 relevance: relevant
Number of relevant contexts: 3
Step 4: Generating responses using relevant contexts...
Generating response for context 1...
Step 5: Assessing support for response 1...
Support assessment: fully supported
Step 6: Evaluating utility for response 1...
Utility score: 5
Generating response for context 2...
Step 5: Assessing support for response 2...
Support assessment: fully supported
Step 6: Evaluating utility for response 2...
Utility score: 5
Generating response for context 3...
Step 5: Assessing support for response 3...
Support assessment: fully supported
Step 6: Evaluating utility for response 3...
Utility score: 5
Selecting the best response...
Best response support: fully supported, utility: 5
Generation: Based on the provided context, it seems that Ma

  selfRAGResponse = selfRAGLangChain(row[1][0], row[1][1], vectorDB, top_k=3)


Retrieval decision: yes
Step 2: Retrieving relevant documents...
Retrieved 3 documents
Step 3: Evaluating relevance of retrieved documents...
Document 1 relevance: relevant
Document 2 relevance: relevant
Document 3 relevance: relevant
Number of relevant contexts: 3
Step 4: Generating responses using relevant contexts...
Generating response for context 1...
Step 5: Assessing support for response 1...
Support assessment: fully supported
Step 6: Evaluating utility for response 1...
Utility score: 5
Generating response for context 2...
Step 5: Assessing support for response 2...
Support assessment: based on the context provided, it seems that the application process for the childhood education program typically involves the following steps:

1. register with the education department in the first semester of freshman year to ensure timely completion of the requirements. 

2. complete the following prerequisite courses with a grade of c+ or higher:
- educ 101 foundations of education
- educ 

  selfRAGResponse = selfRAGLangChain(row[1][0], row[1][1], vectorDB, top_k=3)


Retrieval decision: yes
Step 2: Retrieving relevant documents...
Retrieved 3 documents
Step 3: Evaluating relevance of retrieved documents...


Failed to batch ingest runs: langsmith.utils.LangSmithRateLimitError: Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError('429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch', '{"detail":"Monthly unique traces usage limit exceeded"}')
post: trace=814ac780-c414-456e-999f-08568fd6a3bd,id=c1711ce9-5358-4c94-9035-cae2c82dd9f7; trace=814ac780-c414-456e-999f-08568fd6a3bd,id=03a38819-9fdd-4b17-b702-2fea12757bd5; trace=15cd04b4-a58f-4117-ade3-47847ad2cad5,id=15cd04b4-a58f-4117-ade3-47847ad2cad5; trace=15cd04b4-a58f-4117-ade3-47847ad2cad5,id=9c588ea5-4e43-41be-ab47-556fede43567; trace=15cd04b4-a58f-4117-ade3-47847ad2cad5,id=a95300c9-28d9-4a35-a161-cc4d291b1f8d; trace=15cd04b4-a58f-4117-ade3-47847ad2cad5,id=7d0ab622-2a04-469b-8a59-4f87b6d5c3ed; trace=504a17af-089f-445b-8e09-0871672d488d,id=504a17af-089f-445b-8e09-0871672d488d; trace=504a17af-089f-445b-8e09-0871672d488d,id=b33bcbc6-ddaf-41e5-9fd8-bb0a0f54e488; trace=504a17af-089f-445b

Document 1 relevance: relevant
Document 2 relevance: relevant
Document 3 relevance: relevant
Number of relevant contexts: 3
Step 4: Generating responses using relevant contexts...
Generating response for context 1...
Step 5: Assessing support for response 1...
Support assessment: fully supported
Step 6: Evaluating utility for response 1...
Utility score: 4
Generating response for context 2...
Step 5: Assessing support for response 2...
Support assessment: fully supported
Step 6: Evaluating utility for response 2...
Utility score: 5
Generating response for context 3...
Step 5: Assessing support for response 3...
Support assessment: fully supported
Step 6: Evaluating utility for response 3...
Utility score: 5
Selecting the best response...
Best response support: fully supported, utility: 5
Generation: The query "department of philosophy and religious studies" is about the Department of Philosophy and Religious Studies at Marist College. The mission of this department is at the core of th

  selfRAGResponse = selfRAGLangChain(row[1][0], row[1][1], vectorDB, top_k=3)


Retrieval decision: yes
Step 2: Retrieving relevant documents...
Retrieved 3 documents
Step 3: Evaluating relevance of retrieved documents...
Document 1 relevance: relevant
Document 2 relevance: relevant
Document 3 relevance: relevant
Number of relevant contexts: 3
Step 4: Generating responses using relevant contexts...
Generating response for context 1...
Step 5: Assessing support for response 1...
Support assessment: fully supported
Step 6: Evaluating utility for response 1...
Utility score: 5
Generating response for context 2...
Step 5: Assessing support for response 2...
Support assessment: fully supported
Step 6: Evaluating utility for response 2...
Utility score: 4
Generating response for context 3...
Step 5: Assessing support for response 3...
Support assessment: fully supported
Step 6: Evaluating utility for response 3...
Utility score: 4
Selecting the best response...
Best response support: fully supported, utility: 5
Generation: Based on the context provided, commuter parking

  selfRAGResponse = selfRAGLangChain(row[1][0], row[1][1], vectorDB, top_k=3)


Retrieval decision: yes
Step 2: Retrieving relevant documents...
Retrieved 3 documents
Step 3: Evaluating relevance of retrieved documents...
Document 1 relevance: relevant
Document 2 relevance: relevant
Document 3 relevance: relevant
Number of relevant contexts: 3
Step 4: Generating responses using relevant contexts...
Generating response for context 1...
Step 5: Assessing support for response 1...
Support assessment: fully supported
Step 6: Evaluating utility for response 1...
Utility score: 5
Generating response for context 2...
Step 5: Assessing support for response 2...
Support assessment: fully supported
Step 6: Evaluating utility for response 2...
Utility score: 5
Generating response for context 3...
Step 5: Assessing support for response 3...
Support assessment: fully supported
Step 6: Evaluating utility for response 3...
Utility score: 4
Selecting the best response...
Best response support: fully supported, utility: 5
Generation: Based on the provided context, it is possible t

  selfRAGResponse = selfRAGLangChain(row[1][0], row[1][1], vectorDB, top_k=3)


Retrieval decision: yes
Step 2: Retrieving relevant documents...
Retrieved 3 documents
Step 3: Evaluating relevance of retrieved documents...
Document 1 relevance: relevant
Document 2 relevance: relevant
Document 3 relevance: relevant
Number of relevant contexts: 3
Step 4: Generating responses using relevant contexts...
Generating response for context 1...
Step 5: Assessing support for response 1...
Support assessment: fully supported
Step 6: Evaluating utility for response 1...
Utility score: 5
Generating response for context 2...
Step 5: Assessing support for response 2...
Support assessment: fully supported
Step 6: Evaluating utility for response 2...
Utility score: 5
Generating response for context 3...
Step 5: Assessing support for response 3...
Support assessment: fully supported
Step 6: Evaluating utility for response 3...
Utility score: 5
Selecting the best response...
Best response support: fully supported, utility: 5
Generation: Based on the given query and context, here is a

  selfRAGResponse = selfRAGLangChain(row[1][0], row[1][1], vectorDB, top_k=3)


Retrieval decision: yes
Step 2: Retrieving relevant documents...
Retrieved 3 documents
Step 3: Evaluating relevance of retrieved documents...
Document 1 relevance: relevant
Document 2 relevance: relevant
Document 3 relevance: relevant
Number of relevant contexts: 3
Step 4: Generating responses using relevant contexts...
Generating response for context 1...
Step 5: Assessing support for response 1...
Support assessment: fully supported
Step 6: Evaluating utility for response 1...
Utility score: 5
Generating response for context 2...
Step 5: Assessing support for response 2...
Support assessment: fully supported
Step 6: Evaluating utility for response 2...
Utility score: 5
Generating response for context 3...
Step 5: Assessing support for response 3...
Support assessment: fully supported
Step 6: Evaluating utility for response 3...
Utility score: 5
Selecting the best response...
Best response support: fully supported, utility: 5
Generation: Based on the provided context, it appears that 

  selfRAGResponse = selfRAGLangChain(row[1][0], row[1][1], vectorDB, top_k=3)


Retrieval decision: yes
Step 2: Retrieving relevant documents...
Retrieved 3 documents
Step 3: Evaluating relevance of retrieved documents...
Document 1 relevance: relevant
Document 2 relevance: relevant
Document 3 relevance: relevant
Number of relevant contexts: 3
Step 4: Generating responses using relevant contexts...
Generating response for context 1...
Step 5: Assessing support for response 1...
Support assessment: fully supported
Step 6: Evaluating utility for response 1...
Utility score: 4
Generating response for context 2...
Step 5: Assessing support for response 2...
Support assessment: fully supported
Step 6: Evaluating utility for response 2...
Utility score: 4
Generating response for context 3...
Step 5: Assessing support for response 3...
Support assessment: fully supported
Step 6: Evaluating utility for response 3...
Utility score: 5
Selecting the best response...
Best response support: fully supported, utility: 5
Generation: Based on the context provided, it appears that 

  selfRAGResponse = selfRAGLangChain(row[1][0], row[1][1], vectorDB, top_k=3)


Retrieval decision: yes
Step 2: Retrieving relevant documents...
Retrieved 3 documents
Step 3: Evaluating relevance of retrieved documents...
Document 1 relevance: relevant
Document 2 relevance: relevant
Document 3 relevance: relevant
Number of relevant contexts: 3
Step 4: Generating responses using relevant contexts...
Generating response for context 1...
Step 5: Assessing support for response 1...
Support assessment: fully supported
Step 6: Evaluating utility for response 1...
Utility score: 5
Generating response for context 2...
Step 5: Assessing support for response 2...
Support assessment: fully supported
Step 6: Evaluating utility for response 2...
Utility score: 5
Generating response for context 3...
Step 5: Assessing support for response 3...
Support assessment: fully supported
Step 6: Evaluating utility for response 3...
Utility score: 5
Selecting the best response...
Best response support: fully supported, utility: 5
Generation: Based on the provided context, Carolyn C. Mathe

  selfRAGResponse = selfRAGLangChain(row[1][0], row[1][1], vectorDB, top_k=3)


Retrieval decision: yes
Step 2: Retrieving relevant documents...
Retrieved 3 documents
Step 3: Evaluating relevance of retrieved documents...
Document 1 relevance: relevant
Document 2 relevance: relevant
Document 3 relevance: relevant
Number of relevant contexts: 3
Step 4: Generating responses using relevant contexts...
Generating response for context 1...
Step 5: Assessing support for response 1...
Support assessment: fully supported
Step 6: Evaluating utility for response 1...
Utility score: 5
Generating response for context 2...
Step 5: Assessing support for response 2...
Support assessment: fully supported
Step 6: Evaluating utility for response 2...
Utility score: 5
Generating response for context 3...
Step 5: Assessing support for response 3...
Support assessment: fully supported
Step 6: Evaluating utility for response 3...
Utility score: 5
Selecting the best response...
Best response support: fully supported, utility: 5
Generation: Based on the provided context, some places that

  selfRAGResponse = selfRAGLangChain(row[1][0], row[1][1], vectorDB, top_k=3)


Retrieval decision: yes
Step 2: Retrieving relevant documents...
Retrieved 3 documents
Step 3: Evaluating relevance of retrieved documents...


Failed to batch ingest runs: langsmith.utils.LangSmithRateLimitError: Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError('429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch', '{"detail":"Monthly unique traces usage limit exceeded"}')
post: trace=3cc7dce9-0fa2-4b65-9afb-e2b0eee2bd72,id=3cc7dce9-0fa2-4b65-9afb-e2b0eee2bd72; trace=3cc7dce9-0fa2-4b65-9afb-e2b0eee2bd72,id=652f3ece-d258-4b7f-b667-820028a91f83; trace=3cc7dce9-0fa2-4b65-9afb-e2b0eee2bd72,id=064e9292-52ce-4d93-a637-a06feaa989e9; trace=3cc7dce9-0fa2-4b65-9afb-e2b0eee2bd72,id=4abd32aa-024b-409a-8153-dc3f8072d091; trace=e74ddd75-6d43-465d-86b0-a0c60f016c7c,id=e74ddd75-6d43-465d-86b0-a0c60f016c7c; trace=e74ddd75-6d43-465d-86b0-a0c60f016c7c,id=4fd8ca74-2a3a-4560-b79a-8b7ce1fe326c; trace=e74ddd75-6d43-465d-86b0-a0c60f016c7c,id=f283d14f-40c7-4733-b081-be3bf70ae9d8; trace=e74ddd75-6d43-465d-86b0-a0c60f016c7c,id=be6bc1bd-df8b-4493-a14c-8e795d3a4d0f; trace=2dcedb84-80c4-4350

Document 1 relevance: relevant
Document 2 relevance: relevant
Document 3 relevance: relevant
Number of relevant contexts: 3
Step 4: Generating responses using relevant contexts...
Generating response for context 1...
Step 5: Assessing support for response 1...
Support assessment: fully supported
Step 6: Evaluating utility for response 1...
Utility score: 5
Generating response for context 2...
Step 5: Assessing support for response 2...
Support assessment: fully supported
Step 6: Evaluating utility for response 2...
Utility score: 5
Generating response for context 3...
Step 5: Assessing support for response 3...
Support assessment: fully supported
Step 6: Evaluating utility for response 3...
Utility score: 5
Selecting the best response...
Best response support: fully supported, utility: 5
Generation: Based on the query and context provided, here is a summary of how to find out more about graduate school:

1. Consult with Marist faculty who work in the discipline you wish to pursue. This

  selfRAGResponse = selfRAGLangChain(row[1][0], row[1][1], vectorDB, top_k=3)


Retrieval decision: yes
Step 2: Retrieving relevant documents...
Retrieved 3 documents
Step 3: Evaluating relevance of retrieved documents...
Document 1 relevance: relevant
Document 2 relevance: relevant
Document 3 relevance: relevant
Number of relevant contexts: 3
Step 4: Generating responses using relevant contexts...
Generating response for context 1...
Step 5: Assessing support for response 1...
Support assessment: fully supported
Step 6: Evaluating utility for response 1...
Utility score: 5
Generating response for context 2...
Step 5: Assessing support for response 2...
Support assessment: fully supported
Step 6: Evaluating utility for response 2...
Utility score: 5
Generating response for context 3...
Step 5: Assessing support for response 3...
Support assessment: fully supported
Step 6: Evaluating utility for response 3...
Utility score: 5
Selecting the best response...
Best response support: fully supported, utility: 5
Generation: If you have finished your breadth requirements 

  selfRAGResponse = selfRAGLangChain(row[1][0], row[1][1], vectorDB, top_k=3)


Retrieval decision: yes
Step 2: Retrieving relevant documents...
Retrieved 3 documents
Step 3: Evaluating relevance of retrieved documents...
Document 1 relevance: relevant
Document 2 relevance: relevant
Document 3 relevance: relevant
Number of relevant contexts: 3
Step 4: Generating responses using relevant contexts...
Generating response for context 1...
Step 5: Assessing support for response 1...
Support assessment: fully supported
Step 6: Evaluating utility for response 1...
Utility score: 5
Generating response for context 2...
Step 5: Assessing support for response 2...
Support assessment: fully supported
Step 6: Evaluating utility for response 2...
Utility score: 5
Generating response for context 3...
Step 5: Assessing support for response 3...
Support assessment: fully supported
Step 6: Evaluating utility for response 3...
Utility score: 5
Selecting the best response...
Best response support: fully supported, utility: 5
Generation: Here is a response based on the given query and

Failed to batch ingest runs: langsmith.utils.LangSmithRateLimitError: Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError('429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch', '{"detail":"Monthly unique traces usage limit exceeded"}')
post: trace=5ea7f70b-e21d-4b6f-8a18-e54bfd7c7cba,id=e6d232ce-5033-413c-9c62-4dd4756d0231; trace=5ea7f70b-e21d-4b6f-8a18-e54bfd7c7cba,id=8cb31d29-e087-45ca-a636-98b087c83623; trace=c5c1b50d-ad39-4d6d-822b-f172360214e7,id=c5c1b50d-ad39-4d6d-822b-f172360214e7; trace=c5c1b50d-ad39-4d6d-822b-f172360214e7,id=b07641de-300c-4e42-bf4e-a3e92c486f4e; trace=c5c1b50d-ad39-4d6d-822b-f172360214e7,id=ffbc5023-9ee5-426d-be9b-2dc5c2f582bb; trace=c5c1b50d-ad39-4d6d-822b-f172360214e7,id=740d7d06-ba82-4079-96e7-4db60c9f8f1b; trace=943782d0-de03-47ee-8c1d-e4105953e4c2,id=943782d0-de03-47ee-8c1d-e4105953e4c2; trace=943782d0-de03-47ee-8c1d-e4105953e4c2,id=aec09498-f2ea-4c8a-8920-10f2c7bb5618; trace=943782d0-de03-47ee

In [89]:
# Save Pandas DF
samplesOutputDF.to_excel("/Users/christiansarmiento/Library/CloudStorage/OneDrive-MaristCollege/Machine Learning/Data/selfRAG_manual_eval.xlsx", index=False)

Failed to batch ingest runs: langsmith.utils.LangSmithRateLimitError: Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError('429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch', '{"detail":"Monthly unique traces usage limit exceeded"}')
post: trace=6754e452-23de-4e77-b8a8-8a9315948f16,id=75ff1732-df66-483c-b477-aa273295dffc; trace=6754e452-23de-4e77-b8a8-8a9315948f16,id=f5e46a6e-6f4d-4b5a-bdf4-afe799025d9a; trace=6754e452-23de-4e77-b8a8-8a9315948f16,id=445441e4-c103-462e-8680-ca4dd0c452bc; trace=695647b0-1fd8-457e-ac3f-f25fca965dbb,id=695647b0-1fd8-457e-ac3f-f25fca965dbb; trace=695647b0-1fd8-457e-ac3f-f25fca965dbb,id=03879d14-4be7-4fde-bc24-c8e654619073; trace=695647b0-1fd8-457e-ac3f-f25fca965dbb,id=ef7f2b4e-cb50-4c67-9b78-b502dbbcef49; trace=695647b0-1fd8-457e-ac3f-f25fca965dbb,id=121f27e6-286f-481c-a7d4-7040a66957b3; trace=e2a59b3d-9269-4bab-a719-38ee2a2cfdb7,id=e2a59b3d-9269-4bab-a719-38ee2a2cfdb7; trace=e2a59b3d-9269-4bab

## Rerun with Anthropic

In [53]:
# Load Anthropic LLM
os.environ["ANTHROPIC_API_KEY"] = anthropicKey()
llm = ChatAnthropic(model='claude-3-haiku-20240307', max_tokens_to_sample=1000)

In [65]:
# LLM

# Prompt templates
class RetrievalResponse(BaseModel):
    response: str = Field(..., title="Determines if retrieval is necessary", description="Output only 'Yes' or 'No'.")
retrieval_prompt = PromptTemplate(
    input_variables=["query"],
    template="Given the query '{query}', determine if retrieval is necessary. Output only 'Yes' or 'No'."
)

class RelevanceResponse(BaseModel):
    response: str = Field(..., title="Determines if context is relevant", description="Output only 'Relevant' or 'Irrelevant'.")
relevance_prompt = PromptTemplate(
    input_variables=["query", "context"],
    template="Given the query '{query}' and the context '{context}', determine if the context is relevant. Output only 'Relevant' or 'Irrelevant'."
)

class GenerationResponse(BaseModel):
    response: str = Field(..., title="Generated response", description="The generated response.")
generation_prompt = PromptTemplate(
    input_variables=["query", "context"],
    template="Given the query '{query}' and the context '{context}', generate a response."
)

class SupportResponse(BaseModel):
    response: str = Field(..., title="Determines if response is supported", description="Output 'Fully supported', 'Partially supported', or 'No support'.")
support_prompt = PromptTemplate(
    input_variables=["response", "context"],
    template="Given the response '{response}' and the context '{context}', determine if the response is supported by the context. Output 'Fully supported', 'Partially supported', or 'No support'."
)

class UtilityResponse(BaseModel):
    response: int = Field(..., title="Utility rating", description="Rate the utility of the response from 1 to 5.")
utility_prompt = PromptTemplate(
    input_variables=["query", "response"],
    template="Given the query '{query}' and the response '{response}', rate the utility of the response from 1 to 5. Please only return a single integer value."
)

## Create LLMChains for each step
retrieval_chain = retrieval_prompt | llm.with_structured_output(RetrievalResponse)
relevance_chain = relevance_prompt | llm.with_structured_output(RelevanceResponse)
generation_chain = generation_prompt | llm.with_structured_output(GenerationResponse)
support_chain = support_prompt | llm.with_structured_output(SupportResponse)
utility_chain = utility_prompt | llm.with_structured_output(UtilityResponse)

In [66]:
# SelfRAG w/ LangChain (modified to include RAGAS evaluation)
evaluationSamples = []
def selfRAGLangChain(query, correctAnswer, vectorstore, top_k=3):

    print(f"\nProcessing query: {query}")

    # Step 1: Determine if retrieval is necessary
    print("Step 1: Determining if retrieval is necessary...")
    input_data = {"query": query}
    retrieval_decision = retrieval_chain.invoke(input_data).response.strip().lower()
    print(f"Retrieval decision: {retrieval_decision}")

    if retrieval_decision == 'yes':

        # Step 2: Retrieve relevant documents
        print("Step 2: Retrieving relevant documents...")
        docs = vectorstore.similarity_search(query, k=top_k)
        contexts = [doc.page_content for doc in docs]
        print(f"Retrieved {len(contexts)} documents")

        # Step 3: Evaluate relevance of retrieved documents
        print("Step 3: Evaluating relevance of retrieved documents...")
        relevant_contexts = []
        for i, context in enumerate(contexts):
            input_data = {"query": query, "context": context}
            relevance = relevance_chain.invoke(input_data).response.strip().lower()
            print(f"Document {i+1} relevance: {relevance}")
            if relevance == 'relevant':
                relevant_contexts.append(context)

        print(f"Number of relevant contexts: {len(relevant_contexts)}")

        # If no relevant contexts found, generate without retrieval
        if not relevant_contexts:

            print("No relevant contexts found. Generating without retrieval...")
            input_data = {"query": query, "context": "No relevant context found."}
            generation = generation_chain.invoke(input_data).response

            evaluationSamples.append({
                "user_input": query,
                "retrieved_contexts": ["no context retrieved"],
                "response": generation,
                "reference": correctAnswer
            })

            return generation

        # Step 4: Generate response using relevant contexts
        print("Step 4: Generating responses using relevant contexts...")
        responses = []
        for i, context in enumerate(relevant_contexts):
            print(f"Generating response for context {i+1}...")
            input_data = {"query": query, "context": context}
            response = generation_chain.invoke(input_data).response

            # Step 5: Assess support
            print(f"Step 5: Assessing support for response {i+1}...")
            input_data = {"response": response, "context": context}
            support = support_chain.invoke(input_data).response.strip().lower()
            print(f"Support assessment: {support}")

            # Step 6: Evaluate utility
            print(f"Step 6: Evaluating utility for response {i+1}...")
            input_data = {"query": query, "response": response}
            utility = int(utility_chain.invoke(input_data).response)
            print(f"Utility score: {utility}")

            responses.append((response, support, utility, context))

        # Select the best response based on support and utility
        print("Selecting the best response...")
        best_response = max(responses, key=lambda x: (x[1] == 'fully supported', x[2]))
        print(f"Best response support: {best_response[1]}, utility: {best_response[2]}")

        evaluationSamples.append({
                "user_input": query,
                "retrieved_contexts": [best_response[3]],
                "response": best_response[0],
                "reference": correctAnswer
            })
        
        return best_response[0]
    
    else:

        # Generate without retrieval
        print("Generating without retrieval...")
        input_data = {"query": query, "context": "No retrieval necessary."}
        generation = generation_chain.invoke(input_data).response

        evaluationSamples.append({
                "user_input": query,
                "retrieved_contexts": ["no context retrieved"],
                "response": generation,
                "reference": correctAnswer
            })
        
        return generation

In [67]:
# Function to evaluate our RAG pipeline when given ground truth
async def pipelineEvaluation(dataset, metrics):

    # Run through our runs
    results = []
    for run in dataset:

        # Save our inputs/outputs
        inputQuery = run["user_input"]
        groundTruthAnswer = run["reference"]
        contexts = run["retrieved_contexts"]
        response = run["response"]

        # Create a SingleTurnSample object
        sample = SingleTurnSample(
            user_input=inputQuery,
            response=response,
            reference=groundTruthAnswer,
            retrieved_contexts=contexts 
        )

        # Evaluate metrics
        runResults = {"question": inputQuery}
        for metric in metrics:

            # Get the score for the given metric
            try:

                score = await metric.single_turn_ascore(sample)
                runResults[type(metric).__name__] = score

            except Exception as e:
                # Catch errors for debugging
                runResults[type(metric).__name__] = f"Error: {str(e)}"
        
        # Save metric results
        results.append(runResults)
    
    # Calculate mean and standard deviation for each metric
    metricsStats = {}
    for metric in metrics:
        metricName = type(metric).__name__
        scores = [result[metricName] for result in results if isinstance(result[metricName], (int, float))]
        
        # Only calculate stats if there are valid scores
        if scores:
            metricsStats[metricName] = {
                "mean": np.mean(scores),
                "std_dev": np.std(scores),
            }
            
        else:
            metricsStats[metricName] = {
                "mean": "No valid scores",
                "std_dev": "No valid scores",
            }
    
    return results, metricsStats

In [57]:
# Load Data
csvPath = "/Users/christiansarmiento/Library/CloudStorage/OneDrive-MaristCollege/Machine Learning/Data/Cleaned_QA.csv"
maristQA = pd.read_csv(csvPath, header=None)

# To use RecursiveCharacterTextSplitter, we need a list of dictionaries
maristContext = [Document(page_content=text) for text in maristQA[1].tolist()]

# Split Documents into Chunks
textSplitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200, add_start_index=True)
texts = textSplitter.split_documents(maristContext)

# Store Documents in Vector DB (Chroma)
vectorDB = Chroma.from_documents(documents=texts, embedding=OpenAIEmbeddings())

In [60]:
# Load metrics
evalMetrics = [LLMContextRecall(llm=LangchainLLMWrapper(llm)), 
               FactualCorrectness(llm=LangchainLLMWrapper(llm)), 
               Faithfulness(llm=LangchainLLMWrapper(llm)), 
               SemanticSimilarity(embeddings=LangchainEmbeddingsWrapper(OpenAIEmbeddings()))]

In [63]:
# Sample records from our dataset
maristTestSample = maristQA.sample(50, replace=False)
maristTestSample.head()

Unnamed: 0,0,1
261,What is the language center?,Modern Languages and Cultures DepartmentWeiss ...
199,Is there a discount for classes for military p...,Military & Veteran AdmissionMilitary & Veteran...
292,Various internship opportunities for school of...,"InternshipsFind an InternshipIn recent years, ..."
70,Studying philosophy.,Philosophy & Religious Studies DepartmentPhilo...
50,How many credits is the Clinical Mental Health...,Master of Arts in Clinical Mental Health Couns...


In [68]:
# Run our chain with each question and evaluate
for row in maristTestSample.iterrows():
    selfRAGResponse = selfRAGLangChain(row[1][0], row[1][1], vectorDB, top_k=3)
    print(f"Generation: {selfRAGResponse}")


Processing query: What is the language center?
Step 1: Determining if retrieval is necessary...
Retrieval decision: yes
Step 2: Retrieving relevant documents...
Retrieved 3 documents
Step 3: Evaluating relevance of retrieved documents...
Document 1 relevance: relevant
Document 2 relevance: relevant
Document 3 relevance: relevant
Number of relevant contexts: 3
Step 4: Generating responses using relevant contexts...
Generating response for context 1...
Step 5: Assessing support for response 1...
Support assessment: fully supported
Step 6: Evaluating utility for response 1...
Utility score: 5
Generating response for context 2...
Step 5: Assessing support for response 2...
Support assessment: fully supported
Step 6: Evaluating utility for response 2...
Utility score: 5
Generating response for context 3...
Step 5: Assessing support for response 3...
Support assessment: fully supported
Step 6: Evaluating utility for response 3...
Utility score: 5
Selecting the best response...
Best response

Failed to batch ingest runs: langsmith.utils.LangSmithRateLimitError: Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError('429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch', '{"detail":"Monthly unique traces usage limit exceeded"}')
post: trace=02a7656b-41e1-465e-bd2b-c23f90589e07,id=02a7656b-41e1-465e-bd2b-c23f90589e07; trace=02a7656b-41e1-465e-bd2b-c23f90589e07,id=ac77ecc0-219b-4f62-b485-2f65d504d5ba; trace=02a7656b-41e1-465e-bd2b-c23f90589e07,id=1dba69bc-8f4c-4b01-8b95-24015ad7b014


Document 2 relevance: relevant
Document 3 relevance: relevant
Number of relevant contexts: 3
Step 4: Generating responses using relevant contexts...
Generating response for context 1...
Step 5: Assessing support for response 1...
Support assessment: fully supported
Step 6: Evaluating utility for response 1...
Utility score: 5
Generating response for context 2...
Step 5: Assessing support for response 2...
Support assessment: fully supported
Step 6: Evaluating utility for response 2...
Utility score: 5
Generating response for context 3...
Step 5: Assessing support for response 3...
Support assessment: fully supported
Step 6: Evaluating utility for response 3...
Utility score: 5
Selecting the best response...
Best response support: fully supported, utility: 5
Generation: The NSF REU (National Science Foundation's Research Experiences for Undergraduates) is a summer research program that provides undergraduate students with the opportunity to gain hands-on research experience in STEM (Sci

Failed to batch ingest runs: langsmith.utils.LangSmithRateLimitError: Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError('429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch', '{"detail":"Monthly unique traces usage limit exceeded"}')
post: trace=02a7656b-41e1-465e-bd2b-c23f90589e07,id=40681dbd-cee8-417d-a62d-4d5f5d586919; trace=4a484bb2-a45d-4ae7-bdec-60466518cf94,id=4a484bb2-a45d-4ae7-bdec-60466518cf94; trace=4a484bb2-a45d-4ae7-bdec-60466518cf94,id=27074bc9-8e00-42d6-aa1d-535e89fa88fc; trace=4a484bb2-a45d-4ae7-bdec-60466518cf94,id=87065d02-da9f-48b3-97fa-3d5a7de7313b; trace=4a484bb2-a45d-4ae7-bdec-60466518cf94,id=5d1a5cb7-81ff-43f5-8a03-30a5d297d3e1; trace=1c3b283a-01bd-45fa-aadd-8ed12982223b,id=1c3b283a-01bd-45fa-aadd-8ed12982223b; trace=1c3b283a-01bd-45fa-aadd-8ed12982223b,id=89e35ede-b953-4620-a38d-362deee36da6; trace=1c3b283a-01bd-45fa-aadd-8ed12982223b,id=d0771146-e202-4112-a2f2-05c8ce6bb4d2; trace=1c3b283a-01bd-45fa

Support assessment: fully supported
Step 6: Evaluating utility for response 3...
Utility score: 5
Selecting the best response...
Best response support: fully supported, utility: 5
Generation: Based on the context provided, it seems the query "Head of film photography Marist" is likely referring to an event or exhibition held at Marist College in February, as part of the "Years of Figurative Photography" slide talk. The context mentions several photography-related exhibits and shows that the artist appears to have participated in during that time period, including at the Marist College Art Dept Gallery in February. So the "Head of film photography Marist" is likely a reference to the artist's involvement or role in the photography exhibit or talk at Marist College that month.

Processing query: What is the mission of the IRB?
Step 1: Determining if retrieval is necessary...
Retrieval decision: yes
Step 2: Retrieving relevant documents...
Retrieved 3 documents
Step 3: Evaluating relevanc

Failed to batch ingest runs: langsmith.utils.LangSmithRateLimitError: Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError('429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch', '{"detail":"Monthly unique traces usage limit exceeded"}')
post: trace=dce0b9c7-43b8-4d45-b62b-32d2f89f812f,id=dce0b9c7-43b8-4d45-b62b-32d2f89f812f; trace=dce0b9c7-43b8-4d45-b62b-32d2f89f812f,id=10700f35-3774-4d67-8b40-e63715f1c515; trace=dce0b9c7-43b8-4d45-b62b-32d2f89f812f,id=49e3f6d0-8077-4dd0-9014-e110b99d74f1; trace=dce0b9c7-43b8-4d45-b62b-32d2f89f812f,id=cfd2f6b6-f059-426b-a3b2-ca792d47f86a; trace=5f2b5c2e-6a55-4044-aac1-39c78e9c4e76,id=5f2b5c2e-6a55-4044-aac1-39c78e9c4e76; trace=5f2b5c2e-6a55-4044-aac1-39c78e9c4e76,id=ee28888e-0f2d-48ad-b8e6-591e0af7479c; trace=5f2b5c2e-6a55-4044-aac1-39c78e9c4e76,id=6cdd8a58-2ad5-4ee1-80f7-3e725544a78f; trace=5f2b5c2e-6a55-4044-aac1-39c78e9c4e76,id=99dd7216-0c5d-4f3a-89a2-2dfa08aeb630; trace=17531ca9-b558-47ed

Step 5: Assessing support for response 1...
Support assessment: fully supported
Step 6: Evaluating utility for response 1...
Utility score: 5
Generating response for context 2...
Step 5: Assessing support for response 2...
Support assessment: fully supported
Step 6: Evaluating utility for response 2...
Utility score: 5
Generating response for context 3...
Step 5: Assessing support for response 3...
Support assessment: fully supported
Step 6: Evaluating utility for response 3...
Utility score: 5
Selecting the best response...
Best response support: fully supported, utility: 5
Generation: Based on the provided context, here are some key details about the cost of study abroad programs:

- The State provides up to $5,000 for a semester or shorter-term (minimum 21 days) study abroad program. 
- An additional $3,000 is available for students targeting a "critical need" language.
- The program prioritizes students facing financial challenges and underrepresented populations.
- The Critical La

Failed to batch ingest runs: langsmith.utils.LangSmithRateLimitError: Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError('429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch', '{"detail":"Monthly unique traces usage limit exceeded"}')
post: trace=063a14c5-1108-4f2b-b0ff-8a34ce445883,id=063a14c5-1108-4f2b-b0ff-8a34ce445883; trace=063a14c5-1108-4f2b-b0ff-8a34ce445883,id=4679756e-c5c4-46c3-b3e3-de88628f7763; trace=063a14c5-1108-4f2b-b0ff-8a34ce445883,id=3fb2cc9a-084f-45ae-8a90-23e3080d46c7; trace=063a14c5-1108-4f2b-b0ff-8a34ce445883,id=72b91d74-98af-475b-9e8c-7990bf6bd965; trace=ca13cb27-f7b3-4b93-89c9-e1c6d90fdc16,id=ca13cb27-f7b3-4b93-89c9-e1c6d90fdc16; trace=ca13cb27-f7b3-4b93-89c9-e1c6d90fdc16,id=67ca39bd-6a5e-498c-976d-f36d7f40b6b2; trace=ca13cb27-f7b3-4b93-89c9-e1c6d90fdc16,id=441fb9ee-bb23-408f-b1c6-0e73b17f19e1; trace=ca13cb27-f7b3-4b93-89c9-e1c6d90fdc16,id=12155ae8-7f54-4e73-a1b4-c7e2e7b13250; trace=db2bad36-f04f-499d

Step 5: Assessing support for response 1...
Support assessment: fully supported
Step 6: Evaluating utility for response 1...
Utility score: 5
Generating response for context 2...
Step 5: Assessing support for response 2...
Support assessment: fully supported
Step 6: Evaluating utility for response 2...
Utility score: 5
Generating response for context 3...
Step 5: Assessing support for response 3...
Support assessment: fully supported
Step 6: Evaluating utility for response 3...
Utility score: 5
Selecting the best response...
Best response support: fully supported, utility: 5
Generation: The purpose of the Central Gate at Marist College is to serve as both a functional and ceremonial entrance to the campus. Situated at the midpoint of the west campus, the Central Gate provides an iconic and visually striking entrance for the college.

Some key points about the Central Gate:

- It is not normally open to regular traffic, except for emergency vehicles, but is opened for special occasions 

Failed to batch ingest runs: langsmith.utils.LangSmithRateLimitError: Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError('429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch', '{"detail":"Monthly unique traces usage limit exceeded"}')
post: trace=f688562d-7633-4e04-aa91-7a03d59c055d,id=b0eddbe8-f097-4796-9c5a-6c7c3caa3908; trace=f688562d-7633-4e04-aa91-7a03d59c055d,id=2dae49e6-6e97-48be-8e7d-1b2f1d3e3c11; trace=ac940096-fb95-4454-a0b4-efb6c772bb8b,id=ac940096-fb95-4454-a0b4-efb6c772bb8b; trace=ac940096-fb95-4454-a0b4-efb6c772bb8b,id=9dee0e77-6703-4e3e-8087-c76b63247317; trace=ac940096-fb95-4454-a0b4-efb6c772bb8b,id=05eb8f4e-c9ac-491a-86a8-f3dc42d45c06; trace=ac940096-fb95-4454-a0b4-efb6c772bb8b,id=042ad11f-7365-4699-96df-f77b67bf3676; trace=0f1d8c19-81ed-4791-afac-432e7a7fb8c0,id=0f1d8c19-81ed-4791-afac-432e7a7fb8c0; trace=0f1d8c19-81ed-4791-afac-432e7a7fb8c0,id=9c94e507-4f3e-4aa4-b3f0-02be310d1cf9; trace=0f1d8c19-81ed-4791

Support assessment: fully supported
Step 6: Evaluating utility for response 1...


Failed to batch ingest runs: langsmith.utils.LangSmithRateLimitError: Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError('429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch', '{"detail":"Monthly unique traces usage limit exceeded"}')
post: trace=ddbd77d1-0314-4d10-8aad-9e9c455fa8ae,id=ddbd77d1-0314-4d10-8aad-9e9c455fa8ae; trace=ddbd77d1-0314-4d10-8aad-9e9c455fa8ae,id=285b2184-259b-42a8-947b-f955bc2c1e0b; trace=ddbd77d1-0314-4d10-8aad-9e9c455fa8ae,id=0c7adb1e-4d6b-4031-9ec4-3a70c37b8bcb; trace=ddbd77d1-0314-4d10-8aad-9e9c455fa8ae,id=14e4acef-db5d-4d11-b5b2-f5729316da85; trace=6dce8c17-07f0-4474-8bf6-36b4ae0ba88b,id=6dce8c17-07f0-4474-8bf6-36b4ae0ba88b; trace=6dce8c17-07f0-4474-8bf6-36b4ae0ba88b,id=a071047a-1eae-4b45-bf3c-c50041ac438b; trace=6dce8c17-07f0-4474-8bf6-36b4ae0ba88b,id=4fcc04df-20dd-4bf2-b654-2f9fe0ce33ef; trace=6dce8c17-07f0-4474-8bf6-36b4ae0ba88b,id=aad79a8b-0fc1-41a5-861b-0dc794bdd9cf; trace=798da648-cb17-4d88

Utility score: 5
Generating response for context 2...


Failed to batch ingest runs: langsmith.utils.LangSmithRateLimitError: Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError('429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch', '{"detail":"Monthly unique traces usage limit exceeded"}')
post: trace=742a9ca6-d8bb-4e88-a8bb-164e1ab32436,id=742a9ca6-d8bb-4e88-a8bb-164e1ab32436; trace=742a9ca6-d8bb-4e88-a8bb-164e1ab32436,id=a93aa811-0ef6-4fa7-9fd4-f4d62da9923e; trace=742a9ca6-d8bb-4e88-a8bb-164e1ab32436,id=3cb8f294-94da-4373-afde-9beae45833ef; trace=742a9ca6-d8bb-4e88-a8bb-164e1ab32436,id=1b7d8713-6bea-4d50-8446-f760d74445c7; trace=7988747c-dbe3-403b-87b4-09940ffa56fd,id=7988747c-dbe3-403b-87b4-09940ffa56fd; trace=7988747c-dbe3-403b-87b4-09940ffa56fd,id=0017a830-16b3-47a6-9174-4436ee0d31eb; trace=7988747c-dbe3-403b-87b4-09940ffa56fd,id=ce62bcc1-bcef-417d-90ec-9bbabb953f2e; trace=7988747c-dbe3-403b-87b4-09940ffa56fd,id=9dad06f7-6d83-470e-8555-4c439c1d183d; trace=680216ef-80e8-45b8

Step 5: Assessing support for response 2...
Support assessment: fully supported
Step 6: Evaluating utility for response 2...
Utility score: 5
Generating response for context 3...
Step 5: Assessing support for response 3...
Support assessment: fully supported
Step 6: Evaluating utility for response 3...
Utility score: 5
Selecting the best response...
Best response support: fully supported, utility: 5
Generation: Based on the provided context, it seems that Lori Beth's primary research interests and areas of focus are:

1. Metaphysics - Particularly the realist/anti-realist debate regarding the existence of fictional objects like characters. She has defended a "brute fact" view of character identity, but personally favors an anti-realist position, such as Kendall Walton's pretense theory.

2. Philosophy of language - Her work on the metaphysics of fictional objects likely intersects with philosophy of language questions about reference, meaning, and representation.

3. Aesthetics - As in

Failed to batch ingest runs: langsmith.utils.LangSmithRateLimitError: Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError('429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch', '{"detail":"Monthly unique traces usage limit exceeded"}')
post: trace=9d8b1700-6adb-49d9-af49-00fbee816975,id=9d8b1700-6adb-49d9-af49-00fbee816975; trace=9d8b1700-6adb-49d9-af49-00fbee816975,id=43e30aea-f5c6-466d-a97e-42082355a0e2; trace=9d8b1700-6adb-49d9-af49-00fbee816975,id=c773fb32-3ea1-410a-841a-178ad2da53ac; trace=9d8b1700-6adb-49d9-af49-00fbee816975,id=76c4fd9c-c4d6-455c-8fb1-31460d6c736e; trace=5fb9ede7-c33e-4dbb-8ed8-06a0a4446e71,id=5fb9ede7-c33e-4dbb-8ed8-06a0a4446e71; trace=5fb9ede7-c33e-4dbb-8ed8-06a0a4446e71,id=86499fb4-91e0-4c7d-8489-52bb08f9e4aa; trace=5fb9ede7-c33e-4dbb-8ed8-06a0a4446e71,id=081a7691-8ce6-49b6-ac02-419488315f16; trace=5fb9ede7-c33e-4dbb-8ed8-06a0a4446e71,id=e42b66b8-a052-43b3-90a7-b7010018a8a1; trace=13b897cf-597b-4452

Step 5: Assessing support for response 1...
Support assessment: fully supported
Step 6: Evaluating utility for response 1...
Utility score: 5
Generating response for context 2...
Step 5: Assessing support for response 2...
Support assessment: fully supported
Step 6: Evaluating utility for response 2...
Utility score: 5
Generating response for context 3...
Step 5: Assessing support for response 3...
Support assessment: fully supported
Step 6: Evaluating utility for response 3...
Utility score: 5
Selecting the best response...
Best response support: fully supported, utility: 5
Generation: Here is how you can learn more about being a student at Marist College:

As a Marist student, you'll have the opportunity to make lifelong friendships with your peers. You'll also have regular contact with the staff and faculty, both formally and informally. Marist has designed its programs and services to promote self-discovery, leadership development, healthy behaviors, social responsibility, and over

Failed to batch ingest runs: langsmith.utils.LangSmithRateLimitError: Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError('429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch', '{"detail":"Monthly unique traces usage limit exceeded"}')
post: trace=983b2ef9-6172-43ba-bc3e-754bc53bcc96,id=983b2ef9-6172-43ba-bc3e-754bc53bcc96; trace=983b2ef9-6172-43ba-bc3e-754bc53bcc96,id=6a2eecef-b9e7-45c4-9131-c64bfe0e8689; trace=983b2ef9-6172-43ba-bc3e-754bc53bcc96,id=c0551a1c-e769-4a9c-86a8-4fbcb9f41907; trace=983b2ef9-6172-43ba-bc3e-754bc53bcc96,id=1e0dd0aa-e619-427f-b797-9935fdf43128; trace=607f8198-e829-43e6-bcba-8a7446ab0b35,id=607f8198-e829-43e6-bcba-8a7446ab0b35; trace=607f8198-e829-43e6-bcba-8a7446ab0b35,id=009e118a-2ddc-42f7-8aab-a8274b0f45df; trace=607f8198-e829-43e6-bcba-8a7446ab0b35,id=8ee76947-d52c-4dad-b8fd-68eaa08780ba; trace=607f8198-e829-43e6-bcba-8a7446ab0b35,id=95145d0c-656d-45da-8538-ba62858ff6e2; trace=a7f5d6de-4875-43f6

In [69]:
# Evaluation for LLMContextRecall
await computeEvaluationMetrics(evaluationSamples, [evalMetrics[0]])

Starting Evaluation...


Prompt fix_output_format failed to parse output: The output parser failed to parse the output including retries.
Prompt fix_output_format failed to parse output: The output parser failed to parse the output including retries.
Prompt fix_output_format failed to parse output: The output parser failed to parse the output including retries.
Prompt context_recall_classification_prompt failed to parse output: The output parser failed to parse the output including retries.
Failed to batch ingest runs: langsmith.utils.LangSmithRateLimitError: Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError('429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch', '{"detail":"Monthly unique traces usage limit exceeded"}')
post: trace=8087132e-c6b5-42af-889a-3a2909e38a46,id=8087132e-c6b5-42af-889a-3a2909e38a46; trace=8087132e-c6b5-42af-889a-3a2909e38a46,id=2daad174-9743-4c65-b4e8-0a82289d813f; trace=8087132e-c6b5-42af-889a-3a2909e38a46,id=f0373f0a-5a

Evaluation Finished!
{'question': 'What is the language center?', 'LLMContextRecall': 'Error: The LLM generation was not completed. Please increase try increasing the max_tokens and try again.'}
{'question': 'Is there a discount for classes for military personnel and their families?', 'LLMContextRecall': 0.6}
{'question': 'Various internship opportunities for school of science students?', 'LLMContextRecall': 'Error: The LLM generation was not completed. Please increase try increasing the max_tokens and try again.'}
{'question': 'Studying philosophy.', 'LLMContextRecall': 'Error: The LLM generation was not completed. Please increase try increasing the max_tokens and try again.'}
{'question': 'How many credits is the Clinical Mental Health Counseling program?', 'LLMContextRecall': 1.0}
{'question': 'Does Marist offer a tuition discount for veterans and active duty personnel?', 'LLMContextRecall': 1.0}
{'question': 'What is NSF REU?', 'LLMContextRecall': 1.0}
{'question': 'Who is David B.

In [70]:
# Evaluation for FactualCorrectness
await computeEvaluationMetrics(evaluationSamples, [evalMetrics[1]])

Starting Evaluation...


Failed to batch ingest runs: langsmith.utils.LangSmithConnectionError: Connection error caused failure to POST https://api.smith.langchain.com/runs/batch in LangSmith API. Please confirm your internet connection. SSLError(MaxRetryError("HTTPSConnectionPool(host='api.smith.langchain.com', port=443): Max retries exceeded with url: /runs/batch (Caused by SSLError(SSLEOFError(8, 'EOF occurred in violation of protocol (_ssl.c:2406)')))"))
Content-Length: 960643
API Key: lsv2_********************************************2e
post: trace=f66211bf-5281-4b15-8b4a-f2c4c0f78f7d,id=f66211bf-5281-4b15-8b4a-f2c4c0f78f7d; trace=f66211bf-5281-4b15-8b4a-f2c4c0f78f7d,id=dbda136e-9dfa-4d9c-a9de-7d779523c884; trace=f66211bf-5281-4b15-8b4a-f2c4c0f78f7d,id=96f0363e-58bc-449a-b936-b160cb6ab188; trace=f66211bf-5281-4b15-8b4a-f2c4c0f78f7d,id=f6daac5e-7543-4524-b7a4-05fc18119f5d; trace=da553e28-937f-4cda-bbc1-81d6eddc8e46,id=da553e28-937f-4cda-bbc1-81d6eddc8e46; trace=da553e28-937f-4cda-bbc1-81d6eddc8e46,id=704478

Evaluation Finished!
{'question': 'What is the language center?', 'FactualCorrectness': 'Error: The LLM generation was not completed. Please increase try increasing the max_tokens and try again.'}
{'question': 'Is there a discount for classes for military personnel and their families?', 'FactualCorrectness': 'Error: The LLM generation was not completed. Please increase try increasing the max_tokens and try again.'}
{'question': 'Various internship opportunities for school of science students?', 'FactualCorrectness': 'Error: The LLM generation was not completed. Please increase try increasing the max_tokens and try again.'}
{'question': 'Studying philosophy.', 'FactualCorrectness': 0.0}
{'question': 'How many credits is the Clinical Mental Health Counseling program?', 'FactualCorrectness': 'Error: The output parser failed to parse the output including retries.'}
{'question': 'Does Marist offer a tuition discount for veterans and active duty personnel?', 'FactualCorrectness': 'Error: The

In [71]:
# Evaluation for Faithfulness
await computeEvaluationMetrics(evaluationSamples, [evalMetrics[2]])

Starting Evaluation...


Prompt fix_output_format failed to parse output: The output parser failed to parse the output including retries.
Prompt fix_output_format failed to parse output: The output parser failed to parse the output including retries.
Prompt fix_output_format failed to parse output: The output parser failed to parse the output including retries.
Prompt n_l_i_statement_prompt failed to parse output: The output parser failed to parse the output including retries.
Failed to batch ingest runs: langsmith.utils.LangSmithRateLimitError: Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError('429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch', '{"detail":"Monthly unique traces usage limit exceeded"}')
post: trace=9af98d39-f641-4761-bc2d-3e90cb71ddc8,id=9af98d39-f641-4761-bc2d-3e90cb71ddc8; trace=9af98d39-f641-4761-bc2d-3e90cb71ddc8,id=4b87a524-4465-4118-b287-728db3a1920c; trace=9af98d39-f641-4761-bc2d-3e90cb71ddc8,id=d457850c-92ef-4c98-9763-0

Evaluation Finished!
{'question': 'What is the language center?', 'Faithfulness': 1.0}
{'question': 'Is there a discount for classes for military personnel and their families?', 'Faithfulness': 0.6}
{'question': 'Various internship opportunities for school of science students?', 'Faithfulness': 'Error: The LLM generation was not completed. Please increase try increasing the max_tokens and try again.'}
{'question': 'Studying philosophy.', 'Faithfulness': 1.0}
{'question': 'How many credits is the Clinical Mental Health Counseling program?', 'Faithfulness': 1.0}
{'question': 'Does Marist offer a tuition discount for veterans and active duty personnel?', 'Faithfulness': 'Error: The output parser failed to parse the output including retries.'}
{'question': 'What is NSF REU?', 'Faithfulness': 'Error: The output parser failed to parse the output including retries.'}
{'question': 'Who is David B. Woolner?', 'Faithfulness': 'Error: The LLM generation was not completed. Please increase try incr

In [72]:
# Evaluation for SemanticSimilarity
await computeEvaluationMetrics(evaluationSamples, [evalMetrics[3]])

Starting Evaluation...


Failed to batch ingest runs: langsmith.utils.LangSmithRateLimitError: Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError('429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch', '{"detail":"Monthly unique traces usage limit exceeded"}')
post: trace=f53e9ef8-d1d3-403e-8e93-aa392ad1b1c6,id=36884e95-cfc0-440e-9e26-7ff1796adfa4; trace=f53e9ef8-d1d3-403e-8e93-aa392ad1b1c6,id=ec2c9cbd-38c8-4c6a-85c4-3c10c0d68d17; trace=f53e9ef8-d1d3-403e-8e93-aa392ad1b1c6,id=f32aa385-eb03-48b8-976a-382dcf6ab17f; trace=f53e9ef8-d1d3-403e-8e93-aa392ad1b1c6,id=90965527-8542-497e-89ed-374a2a5a83ad; trace=f53e9ef8-d1d3-403e-8e93-aa392ad1b1c6,id=df9b9efd-ffd4-4a6b-826e-cd91d4dff3a6; trace=f53e9ef8-d1d3-403e-8e93-aa392ad1b1c6,id=4dd4bb77-568a-40c8-ab41-b443c356d981; trace=72b07f25-9c6d-4089-adbe-20506b477e40,id=72b07f25-9c6d-4089-adbe-20506b477e40; trace=72b07f25-9c6d-4089-adbe-20506b477e40,id=a1d7e05e-0356-40ad-9f5e-b2150090b96d; trace=72b07f25-9c6d-4089

Evaluation Finished!
{'question': 'What is the language center?', 'SemanticSimilarity': 0.8496717388864766}
{'question': 'Is there a discount for classes for military personnel and their families?', 'SemanticSimilarity': 0.8102957195228551}
{'question': 'Various internship opportunities for school of science students?', 'SemanticSimilarity': 0.8921758387364631}
{'question': 'Studying philosophy.', 'SemanticSimilarity': 0.8123792777919205}
{'question': 'How many credits is the Clinical Mental Health Counseling program?', 'SemanticSimilarity': 0.8935188687311393}
{'question': 'Does Marist offer a tuition discount for veterans and active duty personnel?', 'SemanticSimilarity': 0.9277481133736738}
{'question': 'What is NSF REU?', 'SemanticSimilarity': 0.8232849475374435}
{'question': 'Who is David B. Woolner?', 'SemanticSimilarity': 0.80455747993826}
{'question': 'What role does Dr. Robyn L. Rosen hold?', 'SemanticSimilarity': 0.9284267143121968}
{'question': 'How many credits are required

Failed to batch ingest runs: langsmith.utils.LangSmithRateLimitError: Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError('429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch', '{"detail":"Monthly unique traces usage limit exceeded"}')
post: trace=3ac3a168-ad5f-42dc-94e0-d5014bb199f7,id=3ac3a168-ad5f-42dc-94e0-d5014bb199f7; trace=3ac3a168-ad5f-42dc-94e0-d5014bb199f7,id=bfc1696a-51e6-4f9a-acc7-dd8591ee056c; trace=3ac3a168-ad5f-42dc-94e0-d5014bb199f7,id=5fc9e5d9-5a11-4afe-92ea-932bafc93614; trace=3ac3a168-ad5f-42dc-94e0-d5014bb199f7,id=d71aae7b-c31e-4fa6-a089-e0f94c8d5751; trace=3ac3a168-ad5f-42dc-94e0-d5014bb199f7,id=4a71d2bc-4dca-44cc-9596-ca9c69ff1832; trace=3ac3a168-ad5f-42dc-94e0-d5014bb199f7,id=f7c4d163-bd74-45c8-af3d-862b24903900; trace=3ac3a168-ad5f-42dc-94e0-d5014bb199f7,id=a91fd55f-892a-4d9a-b3c3-f17d4fab3a5e; trace=3ac3a168-ad5f-42dc-94e0-d5014bb199f7,id=430f6b9d-1382-473d-9a33-b48a587b6f2f; trace=3ac3a168-ad5f-42dc

In [97]:
csvPath = "/Users/christiansarmiento/Library/CloudStorage/OneDrive-MaristCollege/Machine Learning/Data/Cleaned_QA.csv"
data = pd.read_csv(csvPath, header=None)

sample = data.sample(1)
sample

Unnamed: 0,0,1
474,Who is the chair of the Modern Languages and C...,Contact InformationAcademic SchoolOfficeEmailP...


In [100]:
print(sample[1].iloc[0])

Contact InformationAcademic SchoolOfficeEmailPhoneDr. Patricia Ferrer-MedinaChair, Modern Languages and Cultures; Assistant Professor of Spanish; Coordinator of French StudiesBioEducationResearch Interests / Areas of FocusMain: Early Modern and Colonial Latin American Literature; Theories of Race, Gender, Cultural Difference; Theories of Subjectivity; Postcolonial Theory; Ecological Criticisms.Secondary: The Caribbean, Anthropological and Arqueological studies of the Caribbean, Amerindian Issues,\u00a0Medieval Literature, Golden Age Literature,\u00a0LGBTQ Issues.Selected Publications\u201cNuevas incursiones cr\u00edticas a los estudios coloniales caribe\u00f1os: la cr\u00edtica ecol\u00f3gica\u201d\u00a0(\u201cNew Critical Trends in Colonial Caribbean Studies: Ecological Criticism\u201d).\u00a0Cuadernos del CILHA,\u00a017 (2012):\u00a090-111.Print.\u201cEcology, Difference and Utopia in the Portrayal of the Gypsy in Cervantes\u2019\u00a0La gitanilla\u00a0(1613)\u201d in\u00a0Cervantes 

Failed to batch ingest runs: langsmith.utils.LangSmithRateLimitError: Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError('429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch', '{"detail":"Monthly unique traces usage limit exceeded"}')
post: trace=560e35d0-0cf8-4fdc-9437-7d92411efe2e,id=ed6e1c6c-548f-4e01-9be5-9f5a012db22c; trace=560e35d0-0cf8-4fdc-9437-7d92411efe2e,id=508e5786-c0cf-492f-adfc-c0fe127127b6; trace=560e35d0-0cf8-4fdc-9437-7d92411efe2e,id=548cc5fb-8db1-4db5-8371-5cb4eca156cf; trace=c3316d1f-ba90-4b9d-871a-5cc611547d18,id=c3316d1f-ba90-4b9d-871a-5cc611547d18; trace=c3316d1f-ba90-4b9d-871a-5cc611547d18,id=028b41b8-c1e2-48f9-ab01-538917a6a67a; trace=c3316d1f-ba90-4b9d-871a-5cc611547d18,id=6eae6ceb-8f33-420c-b80e-7b0c4d0510d0; trace=c3316d1f-ba90-4b9d-871a-5cc611547d18,id=26eee498-aa87-4eb5-b571-4c573ac75164; trace=f5cf540f-f95d-4db0-8046-226340b4ab84,id=f5cf540f-f95d-4db0-8046-226340b4ab84; trace=f5cf540f-f95d-4db0

In [98]:
# Run our chain with each question and evaluate
for row in sample.iterrows():
    selfRAGResponse = selfRAGLangChain(row[1][0], row[1][1], vectorDB, top_k=3)
    print(f"Generation: {selfRAGResponse}")


Processing query: Who is the chair of the Modern Languages and Cultures Department?
Step 1: Determining if retrieval is necessary...
Retrieval decision: yes
Step 2: Retrieving relevant documents...
Retrieved 3 documents
Step 3: Evaluating relevance of retrieved documents...
Document 1 relevance: relevant
Document 2 relevance: relevant
Document 3 relevance: relevant
Number of relevant contexts: 3
Step 4: Generating responses using relevant contexts...
Generating response for context 1...
Step 5: Assessing support for response 1...
Support assessment: based on the provided context, the chair of the modern languages and cultures department appears to be irma blanco casey, who is listed as an assistant professor of modern languages.
Step 6: Evaluating utility for response 1...
Utility score: 5
Generating response for context 2...
Step 5: Assessing support for response 2...
Support assessment: based on the given context, the chair of the modern languages and cultures department is likely i

Failed to batch ingest runs: langsmith.utils.LangSmithRateLimitError: Rate limit exceeded for https://api.smith.langchain.com/runs/batch. HTTPError('429 Client Error: Too Many Requests for url: https://api.smith.langchain.com/runs/batch', '{"detail":"Monthly unique traces usage limit exceeded"}')
post: trace=7e38c688-6f13-40cc-9ef2-01c74ebc0575,id=26c5dea1-852b-41ea-af60-163c17e02a24; trace=befe4fa9-49e1-4602-9754-be152add925d,id=befe4fa9-49e1-4602-9754-be152add925d; trace=befe4fa9-49e1-4602-9754-be152add925d,id=9393ba39-a1a1-40d1-a3e9-2085345b89e8; trace=befe4fa9-49e1-4602-9754-be152add925d,id=22c0e5a3-6313-41e2-9284-03c9231f5c9c; trace=befe4fa9-49e1-4602-9754-be152add925d,id=1a96723d-8b7c-491e-800f-f1fe91908aaf; trace=4dacb9f3-c59c-4af6-877e-c77f09053160,id=4dacb9f3-c59c-4af6-877e-c77f09053160; trace=4dacb9f3-c59c-4af6-877e-c77f09053160,id=71203f6e-43fc-4654-ab0b-01f665538c7d; trace=4dacb9f3-c59c-4af6-877e-c77f09053160,id=2afa537e-677d-4d19-81e9-45950e9631e8; trace=4dacb9f3-c59c-4af6