In [1]:
import os
from dotenv import load_dotenv, get_key
load_dotenv()

True

In [2]:
#pip install duckduckgo-search langchain langchain-google-genai
#pip install wikipedia
#pip install duckduckgo-search
#pip install arxiv
#pip install youtube

In [3]:
from langchain_google_genai import ChatGoogleGenerativeAI
llm = ChatGoogleGenerativeAI(
    model="gemini-pro",
    google_api_key=os.getenv('GEMINI_API_KEY'),
    temperature=0.1
)

In [87]:
from langchain.agents import Tool
from langchain.tools import WikipediaQueryRun
from langchain.utilities import WikipediaAPIWrapper
from langchain.tools import DuckDuckGoSearchRun
from langchain.tools import YouTubeSearchTool, ArxivQueryRun

ddgs = DuckDuckGoSearchRun()
wikipedia = WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper())
youtube = YouTubeSearchTool()
arxiv = ArxivQueryRun()

tools = []

tools.append(
   Tool(
       name="DuckDuckGo Search",
       func=ddgs.run,
       description="Useful to browse information from the Internet.",
   )
)

tools.append(
   Tool(
       name="Wikipedia Search",
       func=wikipedia.run,
       description="Useful when users request about well known things.",
   )
)

tools.append(
   Tool(
       name="Youtube Search",
       func=youtube.run,
       description="Useful for when the user explicitly asks you to look on Youtube or Videos.",
   )
)

tools.append(
   Tool(
       name="arxiv Search",
       func=arxiv.run,
       description="Useful for when the user ask for publications, paper, journal or academic research works.",
   )
)

print(tools)

[Tool(name='DuckDuckGo Search', description='Useful to browse information from the Internet.', func=<bound method BaseTool.run of DuckDuckGoSearchRun(api_wrapper=DuckDuckGoSearchAPIWrapper(region='wt-wt', safesearch='moderate', time='y', max_results=5, backend='api', source='text'))>), Tool(name='Wikipedia Search', description='Useful when users request about well known things.', func=<bound method BaseTool.run of WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper(wiki_client=<module 'wikipedia' from '/Users/hissain/anaconda3/lib/python3.11/site-packages/wikipedia/__init__.py'>, top_k_results=3, lang='en', load_all_available_meta=False, doc_content_chars_max=4000))>), Tool(name='Youtube Search', description='Useful for when the user explicitly asks you to look on Youtube or Videos.', func=<bound method BaseTool.run of YouTubeSearchTool()>), Tool(name='arxiv Search', description='Useful for when the user ask for publications, paper, journal or academic research works.', func=<bound metho

In [88]:
from langchain_core.prompts import PromptTemplate
from langchain_core.tools import StructuredTool
from readability import Document as ReadabilityDocument
from bs4 import BeautifulSoup
import requests
import os

def split_text(text: str, max_length: int) -> list[str]:
    """Split the text into manageable chunks."""
    sentences = text.split(". ")
    chunks = []
    current_chunk = []
    current_length = 0

    for sentence in sentences:
        sentence_length = len(sentence) + 2
        if current_length + sentence_length > max_length:
            chunks.append(". ".join(current_chunk) + ".")
            current_chunk = []
            current_length = 0
        current_chunk.append(sentence)
        current_length += sentence_length

    if current_chunk:
        chunks.append(". ".join(current_chunk) + ".")

    return chunks


def extract_article(url: str) -> str:
    """Extract the main article content from a given URL."""
    response = requests.get(url)
    if response.status_code != 200:
        print(response.status_code)
        return None

    soup = BeautifulSoup(response.text, "html.parser")
    paragraphs = soup.find_all('p')
    article_text = "\n".join([para.get_text(separator="\n") for para in paragraphs])
    print(article_text)
    chunks = split_text(article_text, 3000)
    
#     doc = ReadabilityDocument(response.text)
#     raw_html = doc.content()
#     print(raw_html)
#     soup = BeautifulSoup(raw_html, "html.parser")
#     plain_text = soup.get_text(separator="\n")
#     chunks = split_text(plain_text, 4000)
    prompt_extract = (
        "You are an assistant tasked with extracting the main article text from the provided content. "
        "The input may include noise like HTML tags or unrelated content. "
        "Do not add any extra response except the extracted text. "
        "If article text does not make sense or you cannot extract text from the article, simply return an empty string. "
        "Now, please clean the text and return the readable article content in a single paragraph format.\n\n"
        "Content: {content}"
    )

    prompt = PromptTemplate.from_template(prompt_extract)
    chain = prompt | llm
    processed_chunks = []
    for chunk in chunks:
        result = chain.invoke({"content": chunk})
        processed_chunks.append(result.content)

    return "\n".join(processed_chunks)

extractor_tool = Tool(
    name="Article Extractor",
    description="Extract the main article content from a given URL when asked.",
    func=extract_article
)

tools.append(extractor_tool)

print(tools)

[Tool(name='DuckDuckGo Search', description='Useful to browse information from the Internet.', func=<bound method BaseTool.run of DuckDuckGoSearchRun(api_wrapper=DuckDuckGoSearchAPIWrapper(region='wt-wt', safesearch='moderate', time='y', max_results=5, backend='api', source='text'))>), Tool(name='Wikipedia Search', description='Useful when users request about well known things.', func=<bound method BaseTool.run of WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper(wiki_client=<module 'wikipedia' from '/Users/hissain/anaconda3/lib/python3.11/site-packages/wikipedia/__init__.py'>, top_k_results=3, lang='en', load_all_available_meta=False, doc_content_chars_max=4000))>), Tool(name='Youtube Search', description='Useful for when the user explicitly asks you to look on Youtube or Videos.', func=<bound method BaseTool.run of YouTubeSearchTool()>), Tool(name='arxiv Search', description='Useful for when the user ask for publications, paper, journal or academic research works.', func=<bound metho

In [89]:
from langchain.chains.summarize import load_summarize_chain
from langchain.docstore.document import Document

def mapreduce_summarize_tool(input_text: str) -> str:
    document = Document(page_content=input_text)
    summarize_chain = load_summarize_chain(llm, chain_type="map_reduce")
    summary = summarize_chain.run([document])
    return summary

summarize_tool = Tool(
    name="summarizing tool",
    description="When asked for summarizing, summarizes the articles or documents using MapReduce.",
    func=mapreduce_summarize_tool,
)

tools.append(summarize_tool)

print(tools)

[Tool(name='DuckDuckGo Search', description='Useful to browse information from the Internet.', func=<bound method BaseTool.run of DuckDuckGoSearchRun(api_wrapper=DuckDuckGoSearchAPIWrapper(region='wt-wt', safesearch='moderate', time='y', max_results=5, backend='api', source='text'))>), Tool(name='Wikipedia Search', description='Useful when users request about well known things.', func=<bound method BaseTool.run of WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper(wiki_client=<module 'wikipedia' from '/Users/hissain/anaconda3/lib/python3.11/site-packages/wikipedia/__init__.py'>, top_k_results=3, lang='en', load_all_available_meta=False, doc_content_chars_max=4000))>), Tool(name='Youtube Search', description='Useful for when the user explicitly asks you to look on Youtube or Videos.', func=<bound method BaseTool.run of YouTubeSearchTool()>), Tool(name='arxiv Search', description='Useful for when the user ask for publications, paper, journal or academic research works.', func=<bound metho

In [90]:
from langchain.agents import initialize_agent, AgentType

agent = initialize_agent(
    tools,
    llm,
    agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
    handle_parsing_errors=True,
    verbose=True,
)

In [11]:
response = agent.run("What is the latest news on wearable technology?")
print(response)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mI should search the web for the latest news on wearable technology.
Action: DuckDuckGo Search
Action Input: latest news on wearable technology[0m

  ddgs_gen = ddgs.text(



Observation: [36;1m[1;3mWearable technology · Why are doctors wary of wearables? · Improved privacy for children's wristband scheme · Fitbit denies update made devices unusable · Google's ... Find the latest Wearables news from WIRED. See related science and technology articles, photos, slideshows and videos. The latest news and comment on wearable technology. ... Tech · Global development · Obituaries. Wearable technology. The latest news and comment on wearable ... Latest Headlines · Generating Electricity from Device On Clothes · Temp-Controlled Clothing: Squid-Inspired Fabric · Smart Fabric Converts Body Heat Into ... Wearables News · OnePlus Buds Ace 2 With 12.4mm Drivers, Up to 43 Hours Total Battery Life Launched: Price, Features · Boat Enigma Daze, Enigma Gem Smartwatches ...[0m
Thought:[32;1m[1;3mThe latest news on wearable technology includes the release of new products, such as the OnePlus Buds Ace 2 and the Boat Enigma Daze and Enigma Gem smartwatches. There are also 

In [14]:
response = agent.run("What is the latest publication on Beamforming?")
print(response)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mI should search for the latest publication on Beamforming on arxiv.
Action: arxiv Search
Action Input: What is the latest publication on Beamforming?[0m
Observation: [36;1m[1;3mPublished: 2017-09-09
Title: Optimization of Massive Full-Dimensional MIMO for Positioning and Communication
Authors: Seongah Jeong, Osvaldo Simeone, Joonhyuk Kang
Summary: Massive Full-Dimensional multiple-input multiple-output (FD-MIMO) base
stations (BSs) have the potential to bring multiplexing and coverage gains by
means of three-dimensional (3D) beamforming. Key technical challenges for their
deployment include the presence of limited-resolution front ends and the
acquisition of channel state information (CSI) at the BSs. This paper
investigates the use of FD-MIMO BSs to provide simultaneously high-rate data
communication and mobile 3D positioning in the downlink. The analysis
concentrates on the problem of beamforming design by accounting for

In [9]:
response = agent.run("Summarise the article, https://www.bbc.com/news/articles/crk01exykx4o")
print(response)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mI should use the Article Extractor to get the main content of the article
Action: Article Extractor
Action Input: https://www.bbc.com/news/articles/crk01exykx4o[0m
Observation: [33;1m[1;3m
For the Turas family, this Christmas marks their first together, with little Leon's greatest gift being his father's presence. However, many Ukrainians remain in desperate anticipation of news about their loved ones. In Kyiv, relatives and activists gather for a Christmas demonstration, pleading for the release of Ukrainian prisoners. They endure the freezing cold, lining a main street as motorists express solidarity with deafening honks. Tetiana, whose son Artem has been captive for nearly three years, hopes for a Christmas miracle and envisions their reunion. Liliya Ivashchyk, a ballet dancer, holds a placard for her boyfriend Bohdan, captured by Russian forces in 2022. Despite sending daily messages, she has had no contact with him. B

In [10]:
response = agent.run("https://www.bbc.com/news/articles/crk01exykx4o")
print(response)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mI should extract the main article content from the given URL
Action: Article Extractor
Action Input: https://www.bbc.com/news/articles/crk01exykx4o[0m
Observation: [33;1m[1;3mA Ukrainian official anticipates a New Year prisoner exchange with Russia "any day," despite the possibility of last-minute cancellations. Negotiations have been challenging due to Russia's recent battlefield gains. Ukraine holds over 8,000 prisoners of war, and Russia has made significant advances, raising concerns about increased captures. Ukrainian marine Andriy Turas, captured in Mariupol in 2022, recounts the torture and abuse he endured in Russian captivity. His wife, Lena, also captured, was released after two weeks but remains psychologically scarred. Andriy was released in September and met his two-year-old son, Leon, for the first time. International organizations corroborate their accounts, alleging widespread torture and ill-treatment of U

In [22]:
response = agent.run(("Can you search any publication which claims like,", 
                     "'The delay-and-sum (DAS) algorithm is a simple and computationally efficient beamform- ing algorithm.'?", 
                     "If you found any, show citation the publication them with IEEE citation convention."))
print(response)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mI should search for publications that claim the delay-and-sum (DAS) algorithm is a simple and computationally efficient beamforming algorithm.
Action: arxiv Search
Action Input: "The delay-and-sum (DAS) algorithm is a simple and computationally efficient beamforming algorithm."[0m
Observation: [36;1m[1;3mPublished: 2018-01-18
Title: Medical Photoacoustic Beamforming Using Minimum Variance-Based Delay Multiply and Sum
Authors: Moein Mozaffarzadeh, Ali Mahloojifar, Mahdi Orooji
Summary: Delay-and-Sum (DAS) beamformer is the most common beamforming algorithm in
Photoacoustic imaging (PAI) due to its simple implementation and real time
imaging. However, it provides poor resolution and high levels of sidelobe. A
new algorithm named Delay-Multiply-and-Sum (DMAS) was introduced. Using DMAS
leads to lower levels of sidelobe compared to DAS, but resolution is not
satisfying yet. In this paper, a novel beamformer is introduced based

In [91]:
response = agent.run("Summarize the content of https://www.bbc.com/news/live/c4glr85l2ldt")
print(response)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mI should use the Article Extractor to get the main content of the article
Action: Article Extractor
Action Input: https://www.bbc.com/news/live/c4glr85l2ldt[0mA plane crash at Muan International Airport in South Korea has killed 179 people. Two people - both flight crew members - have survived what 
could be the country's worst aviation accident
The Boeing 737-800 flight - operated by airline Jeju Air - had 181 people on board including six flight staff and was arriving from Bangkok, Thailand
Families of the victims are 
congregating at the airport
 as one man - whose nephew and two great-nephews were on the flight - says 
"I can't believe the entire family has disappeared, my heart aches so much"
Footage from the scene shows the plane coming off the runway and crashing into a wall, before bursting into flames
Every few minutes, we hear wailing echo through the airport terminal where relatives of those killed have gathered, 


Observation: [33;1m[1;3mA Boeing 737-800 operated by Jeju Air crashed at Muan International Airport in South Korea, killing 179 people. The plane, carrying 181 passengers and six crew members from Bangkok, Thailand, crashed into a wall and burst into flames after veering off the runway. Two flight crew members survived the crash, while all 175 passengers and four crew members perished. Families of the victims gathered at the airport, expressing grief and disbelief. The cause of the crash is still under investigation, with the airline's management team stating that maintenance issues were not a factor.
Maeng Gi-su, whose nephew and two grandnephews were on the flight, expressed his disbelief and heartbreak over the loss of his family. The government has declared a seven-day national mourning period, with flags lowered and civil servants wearing black ribbons. The identification process is ongoing, with officials citing the presence of underage passengers without IDs as a challenge. C