In [97]:
import os
from dotenv import load_dotenv
from langchain_core.messages import BaseMessage, HumanMessage
from langchain.tools.tavily_search import TavilySearchResults
from langchain.agents import create_react_agent
from langgraph.graph import MessagesState, END
from langchain_groq import ChatGroq
from langchain.prompts import ChatPromptTemplate,PromptTemplate
from langchain.vectorstores import Chroma
from langchain.embeddings import HuggingFaceBgeEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import WebBaseLoader
from langchain.tools.retriever import create_retriever_tool
from langchain.agents.agent import AgentExecutor
from langchain.tools import  tool



In [79]:
from langchain_huggingface import HuggingFaceEmbeddings
embeddings=HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")


GROQ_API_KEY=os.getenv("GROQ_API_KEY")
os.environ["GROQ_API_KEY"]= GROQ_API_KEY

# llm = ChatAnthropic(model="claude-3-5-sonnet-latest")
llm=ChatGroq(model="llama3-70b-8192")

In [80]:
llm.invoke("hi")

AIMessage(content="Hi! It's nice to meet you. Is there something I can help you with or would you like to chat?", additional_kwargs={}, response_metadata={'token_usage': {'completion_tokens': 25, 'prompt_tokens': 11, 'total_tokens': 36, 'completion_time': 0.071428571, 'prompt_time': 0.000127359, 'queue_time': 0.0547244, 'total_time': 0.07155593}, 'model_name': 'llama3-70b-8192', 'system_fingerprint': 'fp_dd4ae1c591', 'finish_reason': 'stop', 'logprobs': None}, id='run-079605c2-4ecc-401c-ab8a-8b6d77c3a40c-0', usage_metadata={'input_tokens': 11, 'output_tokens': 25, 'total_tokens': 36})

In [104]:
@tool
def search_tool(query: str) -> str:
    """A simple search tool that returns a fixed response."""
    tavily_search = TavilySearchResults()
    results = tavily_search.invoke(query)
    return results
    

In [82]:
print(search_tool("what is the capital of France?"))

[{'title': 'List of capitals of France - Wikipedia', 'url': 'https://en.wikipedia.org/wiki/List_of_capitals_of_France', 'content': 'Find sources:\xa0"List of capitals of France"\xa0–\xa0news\xa0· newspapers\xa0· books\xa0· scholar\xa0· JSTOR (July 2012) (Learn how and when to remove this message)\nThis is a chronological list of capitals of France. The capital of France has been Paris since its liberation in 1944.[1]\nChronology[edit]\nTournai (before 486), current-day Belgium\nSoissons (486–936)\nLaon (936–987)\nParis (987–1419), the residence of the Kings of France, although they were consecrated at Reims. [...] Bordeaux (September 1914), the government was relocated from Paris to Bordeaux very briefly during World War I, when it was feared that Paris would soon fall into German hands. These fears were alleviated after the German Army was pushed back at the First Battle of the Marne.\nTours (10–13 June 1940), the city served as the temporary capital of France during World War II afte

In [103]:
import yfinance
@tool
def get_stock_price(ticker:str):
    """A simple stock price tool that returns a fixed response."""
    stock = yfinance.Ticker(ticker)
    stock_price = stock.history()
    return stock_price


In [84]:
print(get_stock_price("AAPL"))

                                 Open        High         Low       Close  \
Date                                                                        
2025-03-12 00:00:00-04:00  220.139999  221.750000  214.910004  216.979996   
2025-03-13 00:00:00-04:00  215.949997  216.839996  208.419998  209.679993   
2025-03-14 00:00:00-04:00  211.250000  213.949997  209.580002  213.490005   
2025-03-17 00:00:00-04:00  213.309998  215.220001  209.970001  214.000000   
2025-03-18 00:00:00-04:00  214.160004  215.149994  211.490005  212.690002   
2025-03-19 00:00:00-04:00  214.220001  218.759995  213.750000  215.240005   
2025-03-20 00:00:00-04:00  213.990005  217.490005  212.220001  214.100006   
2025-03-21 00:00:00-04:00  211.559998  218.839996  211.279999  218.270004   
2025-03-24 00:00:00-04:00  221.000000  221.479996  218.580002  220.729996   
2025-03-25 00:00:00-04:00  220.770004  224.100006  220.080002  223.750000   
2025-03-26 00:00:00-04:00  223.509995  225.020004  220.470001  221.529999   

In [102]:
@tool
def summarizer(state:MessagesState):
    """A simple summarizer that returns a good and user friendly response."""
    messages=state['messages']
    print("---msg---",messages)
    last_message=messages[-1]
    print("---last---",last_message)
    prompt=ChatPromptTemplate.from_template(
            """You are an intelligent, helpful, and professional assistant. Your task is to respond to the user's query in a thoughtful, clear, and well-structured manner. Make sure the final response is:

            Accurate and directly addresses the user's question or request.

            Written in a friendly yet professional tone.

            Easy to understand, avoiding jargon unless necessary (and explain any complex terms).

            Well-organized, with logical flow and, if needed, use bullet points, headings, or numbered steps for clarity.

            Always think deeply about the user’s needs, context, and intent. Aim to provide a high-quality final answer that the user can trust and rely on.
            Query:{question}
            """)
    chain = prompt | llm
#     response=chain.invoke({"question":last_message})
    response=chain.invoke({"question":messages})
    return response

In [86]:
res=summarizer({"messages": "what is the capital of France?"})

---msg--- what is the capital of France?
---last--- ?


In [87]:
res

AIMessage(content='Bonjour!\n\nThe capital of France is Paris (French pronunciation: \u200b[paʁi]). Located in the north-central part of the country, Paris is not only the capital but also the most populous city in France, with a rich history dating back to the 3rd century. Known as the "City of Light" (La Ville Lumière), Paris is famous for its stunning architecture, art museums, fashion, and romantic atmosphere.\n\nSome interesting facts about Paris:\n\n• The Eiffel Tower, built for the 1889 World\'s Fair, is one of the most iconic landmarks in Paris and a symbol of French culture.\n• The Louvre Museum, home to the Mona Lisa, is one of the world\'s largest and most famous museums.\n• Paris is often called the culinary capital of the world, with its exquisite cuisine, wine, and bakeries.\n\nI hope this answers your question! Do you have any other queries about France or Paris?', additional_kwargs={}, response_metadata={'token_usage': {'completion_tokens': 193, 'prompt_tokens': 157, 't

In [98]:

@tool
def retriever_call(state: dict):
    """
    A retriever function that accepts multiple URLs, scrapes them using WebBaseLoader,
    and saves the cleaned data into a vector database.
    """
    messages = state['messages']

    # Extract last message (assuming it includes multiple URLs)
    last_message = messages[-1]

    # Optional: handle string with multiple URLs separated by comma, newline, or space
    if isinstance(last_message, str):
        urls = [url.strip() for url in last_message.replace(',', '\n').splitlines() if url.strip()]
    else:
        urls = last_message  # if already a list

    prompt = ChatPromptTemplate.from_template(
        """You are a smart assistant integrated with LangChain tools. The user has provided multiple URLs. 
For each URL, you need to:

1. Use `WebBaseLoader` to load the content from the web page.
2. Clean and extract meaningful text from the loaded content (e.g., remove navigation, ads, etc.).
3. Save all the cleaned documents into a vector store using a suitable embedding model (like `OpenAIEmbeddings` or `HuggingFaceEmbeddings`).
4. Ensure that the documents are indexed correctly for retrieval later.

Here are the URLs to process:
{urls}

Once done, confirm that the content from all URLs has been scraped and saved successfully to the vector database.
"""
    )
    
    chain = prompt | llm
    response = chain.invoke({"urls": "\n".join(urls)})
    return response


In [101]:
@tool
def vectordb_generate(urls: list):
    """
    A retriever function that accepts multiple URLs, scrapes them using WebBaseLoader,
    splits the content, and saves it into a vector database.
    """
    print("---urls---", urls)
    print("------VectorDB------")

    # Load and flatten documents
    docs = []
    for url in urls:
        loaded_docs = WebBaseLoader(url).load()
        docs.extend(loaded_docs)  # Flatten the nested list

    print(f"---Loaded {len(docs)} documents---")

    # Split documents
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    texts = text_splitter.split_documents(docs)

    # Save to Chroma vector store
    vectordb = Chroma.from_documents(texts, embeddings, persist_directory="stock_test_chroma")
    return vectordb


In [90]:
vector=vectordb_generate(["https://www.bbc.com/news/world-us-canada-67012345","http://langchain-ai.github.io/langgraph/tutorials/multi_agent/multi-agent-collaboration"])

---urls--- ['https://www.bbc.com/news/world-us-canada-67012345', 'http://langchain-ai.github.io/langgraph/tutorials/multi_agent/multi-agent-collaboration']
------VectorDB------
---Loaded 2 documents---


In [91]:
retriever=vector.as_retriever()

In [92]:
retriever.invoke("Any new about the canada?")

[Document(metadata={'language': 'en-GB', 'source': 'https://www.bbc.com/news/world-us-canada-67012345', 'title': 'BBC'}, page_content='BBCSkip to contentBritish Broadcasting CorporationHomeNewsSportBusinessInnovationCultureArtsTravelEarthAudioVideoLiveHomeNewsIsrael-Gaza WarWar in UkraineUS & CanadaUKUK PoliticsEnglandN. IrelandN. Ireland PoliticsScotlandScotland PoliticsWalesWales PoliticsAfricaAsiaChinaIndiaAustraliaEuropeLatin AmericaMiddle EastIn PicturesBBC InDepthBBC VerifySportBusinessExecutive LoungeTechnology of BusinessFuture of BusinessInnovationTechnologyScience & HealthArtificial IntelligenceAI v the MindCultureFilm & TVMusicArt & DesignStyleBooksEntertainment NewsArtsArts in MotionTravelDestinationsAfricaAntarcticaAsiaAustralia and PacificCaribbean & BermudaCentral AmericaEuropeMiddle EastNorth AmericaSouth AmericaWorld’s TableCulture & ExperiencesAdventuresThe SpeciaListEarthNatural WondersWeather & ScienceClimate SolutionsSustainable BusinessGreen LivingAudioPodcastsRad

In [93]:
retriever_tool=create_retriever_tool(
    retriever=retriever,
    name="retriever_tool",
    description="A tool to get Data about the query from the vetor database use it for any query related to the data in the vector database",
    # return_only_outputs=True,
)

In [136]:
from langchain.tools import Tool

from langchain.schema import BaseMessage

def safe_message_trimmer(messages: list, max_len: int = 4000) -> list:
    """Trims list of messages to fit within token limits (approx by character length)."""
    def get_text(msg):
        return msg.content if isinstance(msg, BaseMessage) else str(msg)

    total_tokens = sum(len(get_text(m)) for m in messages)
    while total_tokens > max_len and len(messages) > 1:
        messages.pop(0)
        total_tokens = sum(len(get_text(m)) for m in messages)
    return messages

trim_tool = Tool(
    name="safe_message_trimmer",
    func=lambda msgs: safe_message_trimmer(msgs),
    description="Trims chat history to avoid exceeding token limits. Use this when messages are too long."
)


In [137]:
REACT_PROMPT = PromptTemplate.from_template("""
# ReAct Agent Prompt

You are an intelligent AI assistant with access to various tools to help answer user questions accurately. Your goal is to provide helpful, accurate, and concise responses.

## Tools Available
{tools}
{tool_names}
You have the following tools at your disposal:

1. `retriever_call`: Retrieves information from a vector database of documents
   - Input: A query string related to the documents in the knowledge base
   - Output: Relevant document chunks from the knowledge base

2. `search_tool`: Searches the web for recent or specific information
   - Input: A search query
   - Output: Search results including snippets and URLs

3. `get_stock_price`: Fetches current or historical stock price information
   - Input: Stock ticker symbol and optional date
   - Output: Stock price data

4. `summarizer`: Summarizes long texts or documents
   - Input: Text content to summarize
   - Output: Concise summary of the input text

5. `retriever_tool`: Alternative retriever to access different document collections
   - Input: A specific query for specialized document retrieval
   - Output: Relevant document chunks from specialized collections

6. `safe_message_trimmer`: Trims chat history to stay within token limits  
   - Input: A list of messages  
   - Output: A shortened list that fits within the model’s max context length
                                            

## How to Use Tools

For each user question, follow this process:

1. **Thought**: First, analyze what the user is asking and determine which tool(s) might be helpful.
2. **Action**: Select a tool to use and provide the appropriate input.
3. **Observation**: Review the information returned by the tool.
4. **Thought**: Reflect on whether the information answers the question or if additional tools/queries are needed.
5. Repeat steps 2-4 as necessary until you have enough information.
6. **Answer**: Provide a final answer that directly addresses the user's question, citing sources when appropriate.

## Guidelines

- Think step-by-step - don't jump to conclusions or use tools without reasoning first
- Use retrieval tools before generating information from memory when factual accuracy is important
- When using search or retrieval tools, craft specific queries that target the exact information needed
- Only use tools when necessary - if you can confidently answer without tools, do so
- Always verify information from tools before presenting it as fact
- Keep your final answer concise but complete
- When using information from retrieval or search tools, cite the source

Question: {input}
{agent_scratchpad}
""")

In [138]:
tools = [
    retriever_call,
    search_tool,
    get_stock_price,
    summarizer,
    retriever_tool,
    trim_tool  # ✅ Our new safe input tool
]

llm_with_tools = llm.bind_tools(tools)

tools


[StructuredTool(name='retriever_call', description='A retriever function that accepts multiple URLs, scrapes them using WebBaseLoader,\nand saves the cleaned data into a vector database.', args_schema=<class 'langchain_core.utils.pydantic.retriever_call'>, func=<function retriever_call at 0x7973037a2050>),
 StructuredTool(name='search_tool', description='A simple search tool that returns a fixed response.', args_schema=<class 'langchain_core.utils.pydantic.search_tool'>, func=<function search_tool at 0x7973037a3130>),
 StructuredTool(name='get_stock_price', description='A simple stock price tool that returns a fixed response.', args_schema=<class 'langchain_core.utils.pydantic.get_stock_price'>, func=<function get_stock_price at 0x7973037a2560>),
 StructuredTool(name='summarizer', description='A simple summarizer that returns a good and user friendly response.', args_schema=<class 'langchain_core.utils.pydantic.summarizer'>, func=<function summarizer at 0x7973037a0e50>),
 Tool(name='re

In [142]:
agent = create_react_agent(
    llm_with_tools,
    tools=tools,
    prompt=REACT_PROMPT,
)

agent_executor = AgentExecutor.from_agent_and_tools(
    agent=agent,
    tools=tools,
    verbose=True,
    return_intermediate_steps=False  # ✅ This matters
)


In [146]:
def super_trim_messages(messages, max_chars=3500):
    trimmed = []
    total = 0
    for msg in reversed(messages):
        content = msg.content if hasattr(msg, "content") else str(msg)
        if total + len(content) <= max_chars:
            trimmed.insert(0, msg)
            total += len(content)
    return trimmed


In [147]:
# messages= [
#             HumanMessage(
#                 content="Can you get me the latest news about the Canada?"
#             ),
#         ],
# messages = safe_message_trimmer(messages)



# agent_executor.invoke(
#     {
#         "input": "Can you get me the latest news about the Canada?",
#         "messages": messages,
#     }
# )

messages = [
    HumanMessage(content="Can you get me the latest news about the Canada?")
]

messages = super_trim_messages(messages, max_chars=3500)

response = agent_executor.invoke({
    "input": messages[-1].content  # Just send the latest trimmed input
})




[1m> Entering new AgentExecutor chain...[0m


BadRequestError: Error code: 400 - {'error': {'message': 'Please reduce the length of the messages or completion.', 'type': 'invalid_request_error', 'param': 'messages', 'code': 'context_length_exceeded'}}