In [13]:
from langgraph.graph import StateGraph,START,END
from langchain_mistralai import ChatMistralAI,MistralAIEmbeddings
from dotenv import load_dotenv
from typing import Annotated,TypedDict
from langchain_core.messages import HumanMessage,BaseMessage
from langgraph.graph.message import add_messages
from langchain_community.vectorstores import FAISS
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader
from langgraph.prebuilt import ToolNode,tools_condition
from langchain.tools import tool

In [2]:
load_dotenv()

True

In [3]:
llm = ChatMistralAI(model = 'mistral-small-latest')

In [6]:
loader = PyPDFLoader("thebook.pdf")
docs = loader.load()


In [9]:
splitter = RecursiveCharacterTextSplitter(chunk_size = 1000,chunk_overlap = 200)
chunks = splitter.split_documents(docs)

In [11]:
embeddings = MistralAIEmbeddings(model = 'mistral-embed')
vectorstore = FAISS.from_documents(docs,embeddings)

In [12]:
retriver = vectorstore.as_retriever(search_type = 'similarity',search_kwargs={'k':4})

In [14]:
retriver.invoke('what is machine learning')

[Document(id='0bbefc78-eacb-4128-9c95-a2a75337b9c0', metadata={'producer': 'pdfTeX-1.40.10', 'creator': 'LaTeX with hyperref package', 'creationdate': '2010-10-01T15:47:05-07:00', 'author': 'AlexJ.SmolaandVishyS.V.N.Vishwanathan', 'title': 'AnIntroductiontoMachineLearning', 'subject': '', 'keywords': '', 'moddate': '2010-10-01T15:47:05-07:00', 'trapped': '/False', 'ptex.fullbanner': 'This is pdfTeX, Version 3.1415926-1.40.10-2.2 (TeX Live/MacPorts 2009_6) kpathsea version 5.0.0', 'source': 'thebook.pdf', 'total_pages': 234, 'page': 11, 'page_label': '4'}, page_content="4 1 Introduction\nWeb  Images  Maps  News  Shopping  Gmail  more !\n \n    \nSponsored Links\nMachine Learning\nGoogle Sydney needs machine\nlearning experts. Apply today!\nwww.google.com.au/jobs\nSign in\n \n Search   Advanced Search\n  Preferences\n Web    Scholar   Results 1 - 10 of about 10,500,000 for machine learning. (0.06 seconds) \nMachine learning - Wikipedia, the free encyclopedia\nAs a broad subfield of artif

In [15]:
@tool
def rag_pdf(query):
    """
    Retrieve relevant information from the pdf document.
    Use this tool when the user asks factual / conceptual questions
    that might be answered from the stored documents.
    """

    result = retriver.invoke(query)

    context = [doc.page_content for doc in result]
    metadata = [doc.metadata for doc in result]

    return {
        'query':query,
        'context':context,
        'metadata':metadata
    }


In [16]:
tools = [rag_pdf]
llm_with_tools = llm.bind_tools(tools)

In [17]:
class ChatState(TypedDict):

    messages :Annotated[list[BaseMessage],add_messages]
    


In [19]:
def chat_node(state: ChatState):

    messages = state['messages']

    response = llm_with_tools.invoke(messages)

    return {'messages': [response]}

In [20]:
tool_node = ToolNode(tools)

In [24]:
graph = StateGraph(ChatState)

graph.add_node("Chat_node",chat_node)
graph.add_node('tools',tool_node)

graph.add_edge(START,"Chat_node")
graph.add_conditional_edges("Chat_node",tools_condition)
graph.add_edge("tools","Chat_node")


chatbot = graph.compile()

In [25]:
result = chatbot.invoke(
    {
        "messages": [
            HumanMessage(
                content=(
                    "Using the pdf notes, explain how to find the ideal value of K in KNN"
                )
            )
        ]
    }
)

In [29]:
print(result)


{'messages': [HumanMessage(content='Using the pdf notes, explain how to find the ideal value of K in KNN', additional_kwargs={}, response_metadata={}, id='ab9be412-1862-403e-98bc-f8d835d6554b'), AIMessage(content='To find the ideal value of \\( K \\) in the K-Nearest Neighbors (KNN) algorithm, you can follow these general steps:\n\n1. **Understand the Role of \\( K \\)**:\n   - \\( K \\) is the number of nearest neighbors to consider when making a prediction.\n   - A smaller \\( K \\) can lead to a model that is more sensitive to noise and outliers, while a larger \\( K \\) can lead to a smoother decision boundary but may underfit the data.\n\n2. **Cross-Validation**:\n   - Use a technique like k-fold cross-validation to evaluate the performance of the model for different values of \\( K \\).\n   - Split the data into \\( k \\) equal parts, train the model on \\( k-1 \\) parts, and validate on the remaining part. Repeat this process for each part and average the results.\n\n3. **Grid S