In [None]:
! pip install langchain_community tiktoken langchain-openai langchainhub chromadb langchain

In [2]:
import os
os.environ['LANGCHAIN_TRACING_V2'] = 'true'
os.environ['LANGCHAIN_ENDPOINT'] = 'https://api.smith.langchain.com'

`(3) API Keys`

In [5]:
os.environ['LANGCHAIN_API_KEY'] = None
os.environ["TAVILY_API_KEY"] = None
os.environ['OPENAI_API_KEY'] = None

In [None]:
#### INDEXING ####

# Load blog
import bs4
from langchain_community.document_loaders import WebBaseLoader
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)
blog_docs = loader.load()

# Split
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=300,
    chunk_overlap=50)

# Make splits
splits = text_splitter.split_documents(blog_docs)

# Index
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import Chroma
vectorstore = Chroma.from_documents(documents=splits,
                                    embedding=OpenAIEmbeddings())

retriever = vectorstore.as_retriever()

In [None]:
!pip install -qU langchain-community faiss-cpu
!pip install rank_bm25

In [None]:
#### INDEXING ####

# Load blog
import bs4
from langchain_community.document_loaders import WebBaseLoader
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)
blog_docs = loader.load()

# Split
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=300,
    chunk_overlap=50)

# Make splits
splits = text_splitter.split_documents(blog_docs)
print(type(splits[-1]))

# Index
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import Chroma, FAISS
from langchain_community.vectorstores import FAISS

from langchain.retrievers import BM25Retriever, EnsembleRetriever
from langchain.storage import InMemoryByteStore

# store = InMemoryByteStore()


faiss_index = FAISS.from_documents(splits, embedding=OpenAIEmbeddings())
faiss_retriever = faiss_index.as_retriever()


bm25_retriever = BM25Retriever.from_documents(splits)
# bm25_retriever.add_texts(splits)

ensemble_retriever = EnsembleRetriever(retrievers=[bm25_retriever, faiss_retriever],
                                       weights=[0.4, 0.6])

query = "What is task decomposition for LLM agents?"
relevant_documents = ensemble_retriever.get_relevant_documents(query)

# Output the results
for doc in relevant_documents:
    print(type(doc))

In [14]:
# # Prompt
# template = """Here are the items that you need to grade based on the question, rubric and answer given. The items are formatted in the form 'Question #, question, rubric, answer':

# \n --- \n {question} \n --- \n

# Here is any available background question + answer pairs:

# \n --- \n {q_a_pairs} \n --- \n

# Here is additional context relevant to the question:

# \n --- \n {context} \n --- \n

# You are an agent that primarily uses the above context and any background question + answer pairs to grade the answer for the provided rubric item. \n
# The rubric item is provided to you where the points provided corresponds to if the rubric item is true in the student answer. That means the points in the rubric item, no matter if positive or negative, are given only if the rubric item is TRUE in the student answer. If the points is negative, and the rubric item is not satisfied, then give a score of 0. Your final output should be in the format "score: reasoning" and make sure the reasoning is succinct and to the point. The reasoning should also be focused on the current rubric item only, and it should be directed to the student in the proper tense. \n
# First, only use the rubric item to give the score, but if you are not confident, you can also use the above context and any background question + answer pairs to help grade the answer for the provided rubric item, but remember that the rubric item is your first and most reliable source of information. Think step by step and grade: \n {question}
# """

# decomposition_prompt = ChatPromptTemplate.from_template(template)

In [15]:
from operator import itemgetter
from langchain_core.output_parsers import StrOutputParser

def format_qa_pair(question, answer):
    """Format Q and A pair"""
    formatted_string = ""
    formatted_string += f"Rubric Item: {question}\nGrade and Feedback: {answer}\n\n"
    return formatted_string.strip()

# llm
llm = ChatOpenAI(model_name="gpt-4o-mini", temperature=0)

# q_a_pairs = ""
# answers = ""
# for q in questions:
#     # print(type(q))
#     rag_chain = (
#     {"context": itemgetter("question") | retriever,
#      "question": itemgetter("question"),
#      "q_a_pairs": itemgetter("q_a_pairs")}
#     | decomposition_prompt
#     | llm
#     | StrOutputParser())

#     answer = rag_chain.invoke({"question":q,"q_a_pairs":q_a_pairs})
#     q_a_pair = format_qa_pair(q,answer)
#     q_a_pairs = q_a_pairs + "\n---\n"+  q_a_pair
#     answers = answers + "\n---\n"+  answer

In [None]:
!pip install langchain-core langgraph

In [18]:
from langchain_core.messages import (
    BaseMessage,
    HumanMessage,
    ToolMessage,
)
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder

from langgraph.graph import END, StateGraph, START

def create_detector_agent(llm, system_message: str):
    """Create an agent."""
    ans = []
    prompt = ChatPromptTemplate.from_messages(
        [
            (
                "system",
                "You are assuming the role of an AI-content detector. The messages in the conversation state will contain the question and student answer in the format 'question:answer', and you need to determine whether the answer contains AI-generated content. Provide the score as a JSON with exactly two keys: 'score' and 'lines'. The score should be a value between 0.0 and 100.0, where the higher the score is, the higher the percentage of AI-generated content exists in the student answer. The value for the 'lines' key should only cite the parts of the student answer where you can guarantee there is AI-content in the student answer, so it only contain content EXACTLY in the student answer and nothing else, I REPEAT nothing else. Make sure the content is all regarding what is written by the student. The lines output should be only taken from the student answer. Do not write anything other than that. If the answer is empty, output 0.1, and if there is no miniscule relation between the answer and question, output 0.0. There should be no preamble or explanation."
                " \n{system_message}",
            ),
        ]
    )
    prompt = prompt.partial(system_message=system_message)
    return prompt | llm | JsonOutputParser()


def create_grader_agent(llm, system_message: str):
    """Create an agent."""
    ans = []
    prompt = ChatPromptTemplate.from_messages(
        [
            (
                "system",
                "You are assuming the role of a student answer grader. You will be given a review of your grading, unless this is the first iteration of grading the answer. If the review exists, and if it starts with 'FINAL GRADE:', then it thinks your grading for that specific rubric item is correct, else it has some improvements that you can take into account. If you think the review improvement advice is not correct, do not follow it, but keep in mind, the reviewer is trying to help, and take its advice seriously. Here are the items that you need to grade based on the question, rubric and answer given. The rubric items are formatted in the form 'Question #, question, rubric, answer'. You will be given this item, plus the previous rubric items+grading scores, and also context related to the rubric item. \n --- \n  You are an agent that primarily uses the rubric item to grade the answer for the provided rubric item. \n The rubric item is provided to you where the points provided corresponds to if the rubric item is true in the student answer. That means the points in the rubric item, no matter if positive or negative, are given only if the rubric item is TRUE in the student answer. If the points is negative, and the rubric item is not satisfied, then give a score of 0. Your final output should be in the format 'score: reasoning' and make sure the reasoning is succinct and to the point. The reasoning should also be focused on the current rubric item only, and it should be directed to the student in the proper tense. \n First, only use the rubric item to give the score, but if you are not confident, you can also use the above context and any background question + answer pairs to help grade the answer for the provided rubric item, but remember that the rubric item is your first and most reliable source of information. If you are giving the student the points, then don't tell what is wrong with it. Just explain why the student did or did not get the points, don't give unneccesary information, so it is concise. Always use the rubric as final call. Think step by step and grade the student answer using the rubric and review as advice. The rubric is the final decision. Go with the rubric."
                " \n{system_message}",
            ),
            MessagesPlaceholder(variable_name="messages"),
        ]
    )
    prompt = prompt.partial(system_message=system_message)
    return prompt | llm

def create_reviewer_agent(llm, system_message: str):
    """Create an agent."""
    prompt = ChatPromptTemplate.from_messages(
        [
            (
                "system",
                "Your role is to review the points and reasoning given by the grader, and ensure that all information is correct and factual. The information in the reasoning should primarily be built from the rubric, and the grader's score and reasoning respectively.  \n --- \n The rubric items are formatted in the form 'Question #, question, rubric, answer, grade'. You will be given this item, and also context related to the rubric item from the database we have. \n --- \n Read the reasoning carefully to make sure no hallucination and distraction is there. If you think there is a mistake in the grading regarding the points given, object. Think step by step and review the grading and reasoning for the rubric item in the messages, and make your review concise. If there is no mistake in the grade of a rubric item, start your review with 'FINAL POINTS:', otherwise start with 'WRONG POINTS:', and you must start with either. The conversation state will contains the grades in the format 'score, reasoning', so if the score is correct, do not output 'WRONG POINTS:'. If you think the grader gave the correct points, just make sure mentions what the rubric expected. The beginning of the review is only two options: 'FINAL POINTS:' if the grade gave the correct points, and 'WRONG POINTS:' if the grade did not give the correct points"
                " \n{system_message}",
            ),
            MessagesPlaceholder(variable_name="messages"),
        ]
    )
    prompt = prompt.partial(system_message=system_message)
    return prompt | llm

In [19]:
import operator
from typing import Annotated, Sequence
from typing_extensions import TypedDict

from langchain_openai import ChatOpenAI


# This defines the object that is passed between each node
# in the graph. We will create different nodes for each agent and tool
class AgentState(TypedDict):
    messages: Annotated[Sequence[BaseMessage], operator.add]
    sender: str

In [20]:
from langchain.prompts import ChatPromptTemplate

# Prompt to decompose rubric items into list of elements where each element contains Question #, Question, Rubric, Student Answer; each element is separated based on rubric item"
template = """You are a helpful assistant that divides the rubric/answer key and the student answers into separate entries. Each entry includes the question number, question, rubric item on what content would reward/deduct points for the answer, and the entire answer. Do not output multiple rubric items at once. \n
The goal is to break down the rubric into a set of rubric items that can be checked in isolation. \n
Divide the rubric into separate items. For example if the question numbers are 1, 2a, 2b, 2c, 3, 4, each question will be divided and then the following rubric items and the student answer will be for the question. Ensure that the number of items for each question corresponds to the number of rubric items where points are rewarded or deducted. Do nut make up rubric items. Follow the following rubric entirely. You are grounded by this rubric, so everything comes from this rubric.  \n
Strictly format the division of the rubric into 'question #: question: rubric item: student answer', and if there are multiple rubric items for each question, then separate each item into separate entries, but maintain the same question number, question and answer. Therefore, each rubric item for the same question should have the same question number, question, and answer.  \n
Make sure the question #, question, and rubric item, and it follows the rubric entirely to a tee. The answer must be grounded as well, and use only the student answers provided to divide them. Each element in the list of rubric items should consist of an non-empty string of a rubric item, and each element should have the question #, question, rubric item and answer in one string. If the student answer is empty, simply add 'N/A' at the end of the rubric item. Make sure there is only one rubric point item per entry, and do not repeat entries. Here is the entire rubric list  {question}. Here are the student answers {answer}\n
Do not have empty rubric items. Do not output the entire rubric at the beginning of this decomposition. I only want sub-rubric items. Output (n rubric items):"""
prompt_decomposition = ChatPromptTemplate.from_template(template)

In [21]:
from langchain_openai import ChatOpenAI
from langchain_core.output_parsers import StrOutputParser

# Generate the questions and answers in the format we would like -- 'question #: question: rubric item: student answer'
llm = ChatOpenAI(model_name="gpt-4o-mini", temperature=0)

generate_queries_decomposition = ( prompt_decomposition | llm | StrOutputParser() | (lambda x: x.split("\n")))

question = "1. What are the main components of an LLM-powered autonomous agent system? Mentions 'rag': 1 point, Mentions 'decomposition': 1 point, If any other component other than 'rag' and 'decomposition', give a score of -1: -1 point \n 2a. What are the main components of an AI-powered autonomous agent system? Mentions beta: 1 point, Mentions cloud: 1 point"
answer = "1. I think they are rag, zeta, cheta, neta, decomposition \n 2a. "
questions = generate_queries_decomposition.invoke({"question":question, "answer":answer})

generate_questions = ( get_questions | llm | StrOutputParser() | (lambda x: x.split("\n")))

qs = generate_questions.invoke({"question":question, "answer":answer})

In [None]:
import functools

from langchain_core.messages import AIMessage
from langchain_core.output_parsers import JsonOutputParser

#Helper function to create a node for AI detector agent
def detector_node(state, agent, name, items):
    total_score = 0.0
    i = 0
    lines = []
    print(name)
    for question in questions:
      i += 1
      current_state = {
            "messages": [HumanMessage(content=question)],
            "sender": name,
      }
      result = agent.invoke(current_state)
      total_score += result["score"]
      lines.append(result["lines"])
    total_score /= i
    return {"score": total_score, "lines": lines, "sender": name, "messages": []}

# Helper function to create a node for both grader and reviewer agents
def agent_node(state, agent, name, questions):
    messages = state["messages"]
    q_a_pairs = ""
    answers = []
    prev_q = questions[0]
    # print(questions)
    for i, question in enumerate(questions):
      # if answers:
        # q_a_pair = format_qa_pair(prev_q,answers[-1])
        # q_a_pairs = q_a_pairs + "\n---\n"+  q_a_pair
      if messages:
        if name == "Grader":
          # get the last grade and review 
          current_state = {
                "messages": [HumanMessage(content=question)] + [messages[-len(questions)+i]],
                "sender": name,
                # "q_a_pairs": q_a_pairs,
                # "context": ensemble_retriever.invoke(q)
          }
        # get the last grade given to review 
        else:
          current_state = {
                "messages": [HumanMessage(content=question)] + [messages[-len(questions)+i]],
                "sender": name,
                # "q_a_pairs": q_a_pairs,
                # "context": ensemble_retriever.invoke(q)
          }
      else:
        current_state = {
            "messages": [HumanMessage(content=question)],
            "sender": name,
            # "q_a_pairs": q_a_pairs,
            # "context": ensemble_retriever.invoke(q)
        }
      prev_q = question
      result = agent.invoke(current_state)
      answers.append(result.content)
    # We convert the agent output into a format that is suitable to append to the global state
    # all_answers = "\n".join(answers)
    # result = AIMessage(content=all_answers, **result.dict(exclude={"content", "type", "name"}), name=name)
    # result = AIMessage(**result.dict(exclude={"type", "name"}), name=name)
    if name == "Reviewer":
      return {
        "messages": [message + " " + answer for message,answer in zip(messages[-len(answers):], answers)],
        "sender": name,
      }
    if name == "Grader":
      return {
          "messages": answers,
          "sender": name,
      }

# LLM utilizzed -- GPT 4o mini
llm = ChatOpenAI(model_name="gpt-4o-mini", temperature=0.1)

# AI Detector agent and node
detector_agent = create_detector_agent(
    llm,
    system_message="You should determine whether there is AI-content in the student answers with a score from [0.0 - 100.0], which is the magnitude of AI-content generation. In the lines you output for the AI-generation, make sure those lines are actually in the student answer and no hallucination is there. If you don't think there is AI-generated content, do not add anything to the lines.",
)
detector_node = functools.partial(detector_node, agent=detector_agent, name="Detector", items=qs)

# Grader agent and node
grader_agent = create_grader_agent(
    llm,
    system_message="You should grade the student answers based on the rubric to the best of your ability. Do not go against the rubric information and assume anything on your own. Do not assume typos, go with what is given to you. Treat each rubric item as a condition, and negative points should be rewarded if the condition is satisfied. Do not take semantics of the rubric into account. Rubric is the truth. Scores can only be 0 or the points shown in the rubric item. ",
)
grader_node = functools.partial(agent_node, agent=grader_agent, name="Grader", questions=questions)

# Reviewer agent and node
review_agent = create_reviewer_agent(
    llm,
    system_message="You should make sure the grader follows the rubric primarily. Do not go against the rubric information and assume anything on your own. If the answer satisfies the rubric, do not give a reason to not give the point. Only follow the current rubric item. Other rubric items should not affect your judgement.Do not assume typos, go with what is given to you. If the points are rewarded, do not mention anything in the explanation, except the fact that it satisfied whatever is on the rubric. For negative rubric points, treat it as a binary option between 0 and the negative value, so if the rubric condition is true, then give it the negative points, else if the rubric requirement is not satisfied, give it 0 if there are negative points. If the points rewarded align, then make sure to start with 'FINAL POINTS:', else start with 'WRONG POINTS:' 'WRONG POINTS:' is given only if the score given by you is not the same as the score given by the grader, do not misuse it."
)
reviewer_node = functools.partial(agent_node, agent=review_agent, name="Reviewer", questions=questions)
# print(questions)

In [24]:
from typing import Literal

def router(state):
    """
    Route the flow based on the state. Only a specific agent can end the process.

    Parameters:
    - state: The current state containing the messages.
    - end_agent: The name or identifier of the agent allowed to end the process.

    Returns:
    - str: "call_tool", END, or "continue" based on the state.
    """
    if state["sender"] == "Detector":
      if state["score"] >= 80.0:
        return END
      return "continue"
    if state["sender"] == "Reviewer" or state["sender"] == "Grader":
      messages = state["messages"]
      if not "WRONG POINTS" in " ".join(messages[-len(questions):]) and state["sender"] == "Reviewer":
          # Only the specified agent is allowed to end the process
          return END

      return "continue"

In [25]:
workflow = StateGraph(AgentState)
workflow.add_node("Detector", detector_node)
workflow.add_node("Grader", grader_node)
workflow.add_node("Reviewer", reviewer_node)

workflow.add_conditional_edges(
    "Detector",
    router,
    {"continue": "Grader", END: END},
)

workflow.add_conditional_edges(
    "Grader",
    router,
    {"continue": "Reviewer", END: END},
)

workflow.add_conditional_edges(
    "Reviewer",
    router,
    {"continue": "Grader", END: END},
)

workflow.add_edge(START, "Detector")
graph = workflow.compile()

In [None]:
events = graph.stream(
    {
        "messages": [
        ],
    },
    # Maximum number of steps to take in the graph
    {"recursion_limit": 10},
)

try:
  for s in events:
    print(s)
    print("----")
except Exception as e:
    print(e)
    print(f"final grade")

In [27]:
### Search

from langchain_community.tools.tavily_search import TavilySearchResults

web_search_tool = TavilySearchResults(k=3)

In [28]:
from langchain.prompts import ChatPromptTemplate

# Decomposition
template = """You are a helpful assistant that generates multiple sub-questions related to an input question. \n
The goal is to break down the input into a set of sub-problems / sub-questions that can be answers in isolation. \n
Generate multiple search queries related to: {question} \n
Output (3 queries):"""
prompt_decomposition = ChatPromptTemplate.from_template(template)

In [29]:
from langchain_openai import ChatOpenAI
from langchain_core.output_parsers import StrOutputParser

# LLM
llm = ChatOpenAI(temperature=0)

# Chain
generate_queries_decomposition = ( prompt_decomposition | llm | StrOutputParser() | (lambda x: x.split("\n")))

# Run
questions = generate_queries_decomposition.invoke({"question":question})

In [None]:
questions

In [None]:
### Retrieval Grader

from langchain.prompts import PromptTemplate
from langchain_community.chat_models import ChatOllama
from langchain_core.output_parsers import JsonOutputParser

# LLM
# local_llm = "llama3.1"
# llm = ChatOllama(model=local_llm, format="json", temperature=0)
llm = ChatOpenAI(model_name="gpt-4o-mini", temperature=0)


prompt = PromptTemplate(
    template="""You are a grader assessing relevance
    of a retrieved document to a user question. If the document contains keywords related to the user question,
    grade it as relevant. It does not need to be a stringent test. The goal is to filter out erroneous retrievals.

    Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question.
    Provide the binary score as a JSON with a single key 'score' and no premable or explaination.

    Here is the retrieved document:
    {document}

    Here is the user question:
    {question}
    """,
    input_variables=["question", "document"],
)

retrieval_grader = prompt | llm | JsonOutputParser()
question = "What are the main components of an LLM-powered autonomous agent system?"
docs = retriever.invoke(question)
doc_txt = docs[1].page_content
print(
    f'Is our answer relevant to the question asked: {retrieval_grader.invoke({"question": question, "document": doc_txt})}'
)

In [None]:
from langchain import hub

prompt_rag = hub.pull("rlm/rag-prompt")
print(prompt_rag)

In [33]:
# Answer each sub-question individually

from langchain import hub
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough, RunnableLambda
from langchain_core.output_parsers import StrOutputParser
from langchain_openai import ChatOpenAI
from langchain.schema import Document

# RAG prompt
prompt_rag = hub.pull("rlm/rag-prompt")

def retrieve_and_rag(question,prompt_rag,sub_question_generator_chain):
    """RAG on each sub-question"""

    # Use our decomposition /
    sub_questions = sub_question_generator_chain.invoke({"question":question})

    # Initialize a list to hold RAG chain results
    rag_results = []

    for sub_question in sub_questions:
        filtered_docs = []
        # Retrieve documents for each sub-question
        retrieved_docs = retriever.get_relevant_documents(sub_question)
        for d in retrieved_docs:
          score = retrieval_grader.invoke(
              {"question": question, "document": d.page_content}
          )
          grade = score["score"]
          if grade.lower() == "yes":
              # print("RELEVANT DOC")
              filtered_docs.append(d)
          else:
              # print("NOT RELEVANT")
              web_search = "Yes"
              continue
        if web_search == "Yes":
          docs = web_search_tool.invoke({"query": question})
          web_results = "\n".join([d["content"] for d in docs])
          web_results = Document(page_content=web_results)
          filtered_docs.append(web_results)

        # Use retrieved documents and sub-question in RAG chain
        answer = (prompt_rag | llm | StrOutputParser()).invoke({"context": retrieved_docs,
                                                                "question": sub_question})
        rag_results.append(answer)

    return rag_results,sub_questions

# Wrap the retrieval and RAG process in a RunnableLambda for integration into a chain
answers, questions = retrieve_and_rag(question, prompt_rag, generate_queries_decomposition)

In [None]:
def format_qa_pairs(questions, answers):
    """Format Q and A pairs"""

    formatted_string = ""
    for i, (question, answer) in enumerate(zip(questions, answers), start=1):
        formatted_string += f"Question {i}: {question}\nAnswer {i}: {answer}\n\n"
    return formatted_string.strip()

context = format_qa_pairs(questions, answers)

# Prompt
template = """Here is a set of Q+A pairs:

{context}

Use these to synthesize an answer to the question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)

final_rag_chain = (
    prompt
    | llm
    | StrOutputParser()
)

final_rag_chain.invoke({"context":context,"question":question})