In [20]:
import os
from dotenv import load_dotenv
load_dotenv()


os.environ["GROQ_API_KEY"] = os.getenv("GROQ_API_KEY")
os.environ["TAVILY_API_KEY"] = os.getenv("TAVILY_API_KEY")
os.environ["HF_TOKEN"] = os.getenv("HF_TOKEN")

In [21]:
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import FAISS
from langchain_huggingface import HuggingFaceEmbeddings
#embedding
embeddings = HuggingFaceEmbeddings(
        model_name="sentence-transformers/all-MiniLM-L6-v2"
    )
#index
urls = [
    "https://lilianweng.github.io/posts/2024-04-12-diffusion-video/",
    "https://lilianweng.github.io/posts/2024-07-07-hallucination/"
]
#load
docs = [WebBaseLoader(url).load() for url in urls]
docs_list = [item for sublist in docs for item in sublist]

#split
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size = 500, chunk_overlap=0
)

doc_splits = text_splitter.split_documents(docs_list)
#Add vectorstore
vectorstore = FAISS.from_documents(
    documents=doc_splits,
    embedding=embeddings
)

retriever = vectorstore.as_retriever()

In [22]:
# Retrieval Grader
from langchain_core.prompts import ChatPromptTemplate
from langchain_groq import ChatGroq

from pydantic import BaseModel , Field

#Data Model
class GradeDocuments(BaseModel):
    """Binary score for relevance check on retrieved documents"""

    binary_search : str = Field(
        description="Documents are relevant to the question , 'yes' or 'no'"
    )

#llm with function call
llm = ChatGroq(model= "llama-3.3-70b-versatile", temperature=0)
structured_llm_gader = llm.with_structured_output(GradeDocuments)

#prompt

system = """You are a grader assessing relevance of a retrieved document to a user question. \n
            If the document contains keyword(s) or semantic meaning relateed to the question, grade it as relevant. \n
            GIve a binary score 'yes' or 'no' score to indicate wheather the document is relevant to the question."""

grade_prompt = ChatPromptTemplate.from_messages(
    [
        ("system" , system),
        ("human" , "Retrieved document : \n\n {document} \n\n User question: {question}"),
    ]
)
#chain the prompt with the LLM

retrieval_grader = grade_prompt | structured_llm_gader
question = "Hallucinations in LLMs"
docs = retriever.invoke(question)
doc_txt= docs[1].page_content
print(retrieval_grader.invoke({"question" : question , "document": doc_txt}))

binary_search='yes'


In [23]:
from langchain_classic import hub
from langchain_core.output_parsers import StrOutputParser

#prompt 
prompt = hub.pull("rlm/rag-prompt")

#llm
llm = ChatGroq(model= "llama-3.3-70b-versatile", temperature=0)

#post-processing
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

#chain
rag_chain = prompt | llm | StrOutputParser()

generation = rag_chain.invoke({"context" : docs , "question" : question})
print(generation)

Hallucinations in LLMs refer to the model generating unfaithful, fabricated, inconsistent, or nonsensical content. There are two types of hallucination: in-context hallucination, where the model output should be consistent with the source content in context, and extrinsic hallucination, where the model output should be grounded by the pre-training dataset. To avoid hallucination, LLMs need to be factual and acknowledge not knowing the answer when applicable.


In [24]:
## Question Re-Writer

#llm
llm = ChatGroq(model= "llama-3.3-70b-versatile", temperature=0)

#Prompt
system = """You a question re-writer that converts an input question to a better version that is optimized \n
            for web search. Look at the input and try to reson about the underlying semantic intent / meaning."""

re_write_prompt = ChatPromptTemplate.from_messages(
    [
        ("system" , system),
        (
            "human",
            "Here is the initial question : \n\n {question} \n Formulate an improved question.",
        ),
    ]
)

question_rewrite = re_write_prompt | llm | StrOutputParser()
question_rewrite.invoke({"question" : question})

'Here\'s a rewritten version of the question that\'s optimized for web search:\n\n"What are the causes and implications of hallucinations in Large Language Models (LLMs)?"\n\nThis revised question aims to capture the underlying semantic intent of the original question, which is to understand the phenomenon of hallucinations in LLMs. The added keywords ("causes", "implications", and the explicit mention of "Large Language Models") can help retrieve more relevant and accurate search results.\n\nAlternatively, other possible rewritten questions could be:\n\n* "How do hallucinations occur in artificial intelligence language models?"\n* "What are the effects of hallucinations on the performance of Large Language Models?"\n* "Can LLMs be designed to prevent or mitigate hallucinations, and what are the current research directions?"\n\nThese revised questions can help you find more specific and informative results when searching the web.'

In [25]:
## Search
from langchain_community.tools.tavily_search import TavilySearchResults

web_search_tool = TavilySearchResults(k=3)

In [26]:
from typing import List
from typing_extensions import TypedDict

class GraphState(TypedDict):
    """
    Represent the state of our graph.

    Attribute :
        question : question
        generation : LLM generation
        web_search : whether to add search
        documents : list of documents
    """
    question : str
    generation : str
    web_search : str
    documents : List[str]

    

In [27]:
from langchain_classic.schema import Document

def retrieve(state):
    """
    Retrieve documents

    Args:
        state(dict): The current graph state
    
    Returns:
        state (dict): New key added to state, documents, that contains retrieved documents
    """
    print("----Retrieve----")
    question = state["question"]

    #Retrieval
    documents = retriever.invoke(question)
    return {"docments" : documents , "question" : question}


def generate(state):
    """
    Generate answer

    Args:
        state(dict): The current graph state
    
    Returns:
        state (dict): New key added to state, generation, that contains LLM generation
    """
    print("----GENERATE----")
    question = state["question"]
    documents = state["documents"]

    #RAG generation

    generation = rag_chain.invoke({"context" : documents , "question" : question})
    return {"documents" : documents , "question":question , "generation" : generation }


def grade_documents(state):
    """
        Determines whether the retrieved documnets are relevant to the question.
        Args:
            state(dict): The current graph state
    
        Returns:
            state (dict): Updates doumnets key wth only filtered relevant documents
    """
    print("----CHECK DOCUMENT RELEVANCE TO QUESTION----")
    question = state["question"]
    documents = state["documents"]

    #score each doc
    filtered_docs = []
    web_search= "No"
    for d in documents:
        score = retrieval_grader.invoke(
            {"question" : question, "document": d.page_content}
            )

        grade = score.binary_score
        if grade == "yes":
            print("----GRADE : DOCUMENT RELEVANT----")
            filtered_docs.append(d)
        else:
            print("----GRADE : DOCUMENT NOT RELEVANT")
            web_search = "Yes"
            continue

    return {"documents" :filtered_docs , "question" : question , "web_search" : web_search}


def transform_query(state):

    print("----TRANSFORM QUERY----")
    question = state["question"]
    documents = state["documents"]

    better_question = question_rewrite.invoke({"question" : question})
    return {"documents" : documents , "question" : better_question}


def web_search(state):
    print("----WEB SEARCH----")
    question = state["question"]
    documents = state["documents"]

    #web search
    docs = web_search_tool.invoke({"query" : question})
    web_results = "\n".join([d["content"] for d in docs])
    web_results = Document(page_content=web_results)
    documents.append(web_results)
    return {"documents" : documents , "question" : question}


def decide_to_generate(state):
    print("----ACCESS GRADED DOCUMENTS----")
    state["question"]
    web_search = state["web_search"]
    state["documents"]

    if web_search == "yes":
        print(
            "----DECISION : ALL DOCUMENTS ARE NOT RELEVANT TO QUESTION, TRANSFORM QUERY----"
        )
        return "transform_query"
    else:
        print("----DECISION : GENERATE----")
        return "generate"






In [28]:
from langgraph.graph import END , START , StateGraph
workflow = StateGraph(GraphState)

#Define nodes
workflow.add_node("retrieve" , retrieve)
workflow.add_node("grade_documents" , grade_documents)
workflow.add_node("generate" , generate)
workflow.add_node("transform_query" , transform_query)
workflow.add_node("web_search_node" , web_search)

#Build graph
workflow.add_edge(START ,"retrieve")
workflow.add_edge("retrieve" , "grade_documents")
workflow.add_conditional_edges(
    "grade_documents",
    decide_to_generate,
    {
        "transform_query" : "transform_query",
        "generate" : "generate",
    },
)
workflow.add_edge("transform_query" , "web_search_node")

workflow.add_edge("web_search_node" , "generate")

app = workflow.compile()

In [29]:
from IPython.display import Image , display
display(Image(app.get_graph().draw_mermaid_png()))


ValueError: Failed to reach https://mermaid.ink API while trying to render your graph. Status code: 400.

To resolve this issue:
1. Check your internet connection and try again
2. Try with higher retry settings: `draw_mermaid_png(..., max_retries=5, retry_delay=2.0)`
3. Use the Pyppeteer rendering method which will render your graph locally in a browser: `draw_mermaid_png(..., draw_method=MermaidDrawMethod.PYPPETEER)`

In [None]:
app.invoke({"question" : "what are the types of agent memory ?"})

----Retrieve----
----CHECK DOCUMENT RELEVANCE TO QUESTION----


KeyError: 'documents'