In [3]:
from dotenv import load_dotenv
from pprint import pprint
# Load environment variables
load_dotenv()

True

In [5]:
from langchain_groq import ChatGroq

llm = ChatGroq(model="llama-3.2-90b-vision-preview")


In [35]:
from typing import Annotated

from typing_extensions import TypedDict

from langgraph.graph import StateGraph, START, END
from langgraph.graph.message import add_messages

from langchain_community.tools.tavily_search import TavilySearchResults
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.tools import StructuredTool, ToolException
from typing import List, Annotated
from langchain.agents import load_tools
from langchain_experimental.utilities import PythonREPL
from langgraph.prebuilt import ToolNode
from langgraph.checkpoint.memory import MemorySaver
from langgraph.graph.message import AnyMessage, add_messages



In [68]:

memory = MemorySaver()
c = {"configurable": {"thread_id": "1"}}

class State(TypedDict):
    messages: Annotated[list[AnyMessage], add_messages]


graph_builder = StateGraph(State)

def chatbot(state: State):
    ans = llm_with_tools.invoke(state["messages"], config=c['configurable'])
    return {"messages": [ans]}


graph_builder.add_node("chatbot", chatbot)
graph_builder.add_edge(START, "chatbot")
graph_builder.add_edge("chatbot", END)
graph = graph_builder.compile(checkpointer=memory)


def stream_graph_updates(user_input: str):
    for event in graph.stream({"messages": [("user", user_input)]}, config=c):
        for value in event:
            pprint(value)


while True:
    user_input = input("User: ")
    if user_input.lower() in ["quit", "exit", "q"]:
        print("Goodbye!")
        break

    stream_graph_updates(user_input)


'chatbot'
Goodbye!


In [61]:
state = graph.get_state(c)  # Get the current state for a given config
state.values

{'messages': [HumanMessage(content='helo', additional_kwargs={}, response_metadata={}, id='0610b76f-4fe9-4e45-85cd-07d7ce6770e9'),
  AIMessage(content='Hello. How can I help you today?', additional_kwargs={}, response_metadata={'token_usage': {'completion_tokens': 10, 'prompt_tokens': 37, 'total_tokens': 47, 'completion_time': 0.052339809, 'prompt_time': 0.010652839, 'queue_time': 0.00690091, 'total_time': 0.062992648}, 'model_name': 'llama-3.2-90b-vision-preview', 'system_fingerprint': 'fp_9c2a937c92', 'finish_reason': 'stop', 'logprobs': None}, id='run-59029e57-19aa-4140-8d60-b78ecadfb8c5-0', usage_metadata={'input_tokens': 37, 'output_tokens': 10, 'total_tokens': 47})]}

In [134]:

from langchain_core.callbacks import CallbackManagerForRetrieverRun
from langchain_core.documents import Document
from langchain_core.retrievers import BaseRetriever
from qdrant_client import QdrantClient
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from typing import List

class QdrantRetriever(BaseRetriever):
    client_: QdrantClient
    embedding_model_: GoogleGenerativeAIEmbeddings
    collection_name_: str 
    with_payload_: bool 
    limit_: int  

    def _get_relevant_documents(
        self, query: str, *, run_manager: CallbackManagerForRetrieverRun = None
    ) -> List[Document]:
        # Generate query embeddings
        query_embeddings = self.embedding_model_.embed_query(query)

        # Perform a search in Qdrant using the client
        search_result = self.client_.query_points(  # Use self.client_
            collection_name=self.collection_name_,  # Use self.collection_name_
            query=query_embeddings,
            with_payload=self.with_payload_,  # Use self.with_payload_
            limit=self.limit_,  # Use self.limit_
        )

        # Extract documents from search results
        documents = []
        if hasattr(search_result, 'points'):
            for point in search_result.points:
                document = Document(
                    page_content=point.payload.get("text", ""),
                    metadata={"pdf_id": point.payload.get("pdf_id", ""), "score": point.score}
                )
                documents.append(document)

        return documents

# Instantiate QdrantRetriever with required parameters
Qretriever = QdrantRetriever(
    client_=client,
    collection_name_=COLLECTION_NAME,
    embedding_model_=EMBEDDING_MODEL,
    limit_=3,
    with_payload_=True
)

from langchain.tools.retriever import create_retriever_tool

retriever_tool = create_retriever_tool(
    Qretriever,
    "qdrant_retriever",
    "Search and return information about query provided by the user from the vectore store.",
)



# Load tools
arxiv_search_tool = load_tools(["arxiv"])[0]  # Assuming it returns a list, get the first tool

# Tavily search
tavily_search_tool = TavilySearchResults(max_results=2)

# Web scraper class
class WebScraper:
    def __init__(self, urls: List[str]):
        self.urls = urls

    def scrape_webpages(self) -> str:
        """Scrape the provided web pages for detailed information."""
        try:
            loader = WebBaseLoader(self.urls)
            docs = loader.load()
            return "\n\n".join(
                [
                    f'<Document name="{doc.metadata.get("title", "")}">\n{doc.page_content}\n</Document>'
                    for doc in docs
                ]
            )
        except ToolException as e:
            return self._handle_error(e)

    def _handle_error(self, error: ToolException) -> str:
        return f"The following errors occurred during tool execution: `{error.args[0]}`"

web_scraper_tool = StructuredTool.from_function(
    func=WebScraper.scrape_webpages,
    handle_tool_error=WebScraper._handle_error
)

# Python REPL tool
repl = PythonREPL()

def python_repl(
    code: Annotated[str, "The Python code to execute to generate visualization."],):
    """Execute Python code."""
    try:
        result = repl.run(code)
    except ToolException as e:
        return f"Failed to execute. Error: {repr(e)}"
    return f"Successfully executed:\n```python\n{code}\n```\nStdout: {result}"

repl_tool = StructuredTool.from_function(
    func=python_repl,
)

# Create the tool node
tools = [tavily_search_tool, web_scraper_tool, repl_tool, arxiv_search_tool, retriever_tool]
tool_node = ToolNode(tools=tools)



In [150]:
llm_with_tools = llm.bind_tools(tools)

# llm_with_tools.invoke('what is the encoder in attention is all yu need').tool_calls
# (tool_node.invoke({"messages": [llm_with_tools.invoke('what is the encoder in attention is all yu need')]}))['messages'][-1].content

In [None]:
# llm_with_tools.invoke('similar papers to Attention is all you need paper, and GAN').tool_calls

In [13]:
prompt = """For the following research task, create a structured plan to solve the problem step-by-step. For each plan, specify \
which external tool along with the tool input to gather or process evidence, which you can then store in \
a variable #E that can be referenced by later steps. Use the format (Plan, #E1, Plan, #E2, Plan, ...).

Available tools:
(1) arxiv_search_tool[input]: Searches for relevant academic papers and information on research topics from Arxiv. Useful for retrieving scholarly articles, studies, or theories. Input should be a search query or research question.
(2) tavily_search_tool[input]: A search tool useful for web queries related to technical or general information from the internet. Best for gathering supplementary information and broader web search results. Input should be a specific question or topic.
(3) web_scraper_tool[input]: Extracts detailed information from specific web pages. Ideal when you need in-depth content from particular sources. Input should be a list of URLs.
(4) repl_tool[input]: Executes Python code, including for calculations and visualizations. Use when computational processing or plotting is required. Input should be Python code.

Example:
Task: Conduct a literature review on the recent advancements in GAN architectures and summarize the key findings.

Plan: Search for recent research papers related to advancements in GAN architectures using arxiv_search_tool. #E1 = arxiv_search_tool["advancements in GAN architectures"]
Plan: Extract information from the results in #E1 on the key themes and architectures mentioned. #E2 = LLM[Summarize the main themes from #E1]
Plan: Use tavily_search_tool to find additional insights and popular opinions on recent GAN innovations. #E3 = tavily_search_tool["recent GAN innovations"]
Plan: Cross-reference findings from #E2 with details from #E3 for consistency and additional insights. #E4 = LLM[Summarize #E2 and #E3 with any contrasting points]

Begin! 
Provide detailed, logical plans for each step in the task. Each Plan should be followed by only one #E.

Task: {task}"""


In [70]:
from typing import Annotated, Literal, Sequence
from typing_extensions import TypedDict

from langchain import hub
from langchain_core.messages import BaseMessage, HumanMessage
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import PromptTemplate

from pydantic import BaseModel, Field

from typing import Annotated, Sequence
from typing_extensions import TypedDict

from langchain_core.messages import BaseMessage

from langgraph.graph.message import add_messages


from langgraph.prebuilt import tools_condition

from langgraph.graph.message import AnyMessage, add_messages


class State(TypedDict):
    messages: Annotated[list[BaseMessage], add_messages]
    
    plan_string: str
    steps: List
    results: dict
    current_task: str

In [123]:
from qdrant_ops import connect_to_qdrant

client = connect_to_qdrant()

from langchain_google_genai import GoogleGenerativeAIEmbeddings
# Embedding model
EMBEDDING_MODEL = GoogleGenerativeAIEmbeddings(model='models/text-embedding-004')

# Collection name
COLLECTION_NAME = 'aireas-local'

Started Qdrant client.
Collection 'aireas-local' already exists.


In [124]:
query = 'abstract of Magvit'

query_embeddings = EMBEDDING_MODEL.embed_query(query)

# Query points from Qdrant
search_result = client.query_points(
    collection_name=COLLECTION_NAME,
    query=query_embeddings,
    with_payload=True,
    limit=3,
)

# Extracting necessary details
results = []
if hasattr(search_result, 'points'):
    for point in search_result.points:  # Access the points attribute
        results.append({
            "id": point.id,
            "score": point.score,
            "pdf_id": point.payload.get('pdf_id'),  # Use get to avoid KeyError
            "text": point.payload.get('text'),
        })

In [125]:
from langchain.tools.retriever import create_retriever_tool

from qdrant_client import QdrantClient

from langchain_core.retrievers import BaseRetriever
from typing import List

from langchain_core.callbacks import CallbackManagerForRetrieverRun
from langchain_core.documents import Document
