Notebook for working on custom tools for agents

In [1]:
import requests
from bs4 import BeautifulSoup

In [2]:
resp = requests.get("https://www.bbc.com/")

In [3]:
soup = BeautifulSoup(resp.text, "html.parser")


In [12]:
headings = ["h1", "h2", "h3", "h4", "h5", "h6"]
anchors = soup.find_all("a")

pairs = []

for a in anchors:
    if any(a.find(heading) for heading in headings):
        pairs.append((a.text.strip(), a.get("href")))



In [15]:
for pair in pairs:
    print(f"{pair[0][:100]} - {pair[1]}")

LIVEIsrael attacks Fordo nuclear site, as Trump floats 'regime change' after US strikes - https://www.bbc.com/news/live/cn7ze4vmk2pt
Strait of Hormuz: What happens if Iran shuts global oil corridor? - /news/articles/c78n6p09pzno
An unprecedented moment - but what the US and Iran do next could be even more momentous - /news/articles/cy0w94yj68xo
Decoy flights and seven B-2 stealth bombers - how US says it hit Iran's nuclear sites - /news/articles/cew0x7159edo
US strikes on Iran trigger protests internationally - /news/videos/c4g2wxwel5qo
Bride shot dead in attack on French wedding partyA suspected attacker is also killed in an apparent  - /news/articles/cn0q8pypxwxo
First celestial image unveiled from revolutionary telescopeThe telescope should detect killer astero - /news/articles/cj3rmjjgx6xo
Bride shot dead in attack on French wedding partyA suspected attacker is also killed in an apparent  - /news/articles/cn0q8pypxwxo
First celestial image unveiled from revolutionary telescopeThe t

In [28]:
def fetch_news(homepage_url: str) -> list:

    """
    Fetches news items from the given homepage URL.
    Args:
        homepage_url (str): The URL of the news homepage to scrape.
    Returns:
        list: A list of dictionaries containing news titles and URLs.
        {"title": "News Title", "url": "https://example.com/news-item"}
    """

    resp = requests.get(homepage_url)
    soup = BeautifulSoup(resp.text, "html.parser")
    headings = ["h1", "h2", "h3", "h4", "h5", "h6"]
    news_items = []
    for item in soup.find_all("a"):
        if any(item.find(heading) for heading in headings):
            title = item.text.strip()
            url = item.get("href")
            if url and not url.startswith("http"):
                url = homepage_url + url
            news_items.append({"title": title, "url": url})
    for hd in headings:
        for item in soup.find_all(hd):
            if item.text.strip():
                title = item.text.strip()
                url = item.find("a").get("href") if item.find("a") else None
                if not url:
                    continue
                if url and not url.startswith("http"):
                    url = homepage_url + url
                news_items.append({"title": title, "url": url})
    return news_items

In [32]:
fetch_news("https://www.bbc.com/")

[{'title': "LIVEIsrael attacks Fordo nuclear site, as Trump floats 'regime change' after US strikes",
  'url': 'https://www.bbc.com/news/live/cn7ze4vmk2pt'},
 {'title': 'What is the Strait of Hormuz and why does it matter?',
  'url': 'https://www.bbc.com//news/articles/c78n6p09pzno'},
 {'title': 'LIVEBBC Verify Live: What satellite images tell us about damage in Iran',
  'url': 'https://www.bbc.com//news/live/c75rw1d3vwnt'},
 {'title': 'An unprecedented moment - but what the US and Iran do next could be even more momentous',
  'url': 'https://www.bbc.com//news/articles/cy0w94yj68xo'},
 {'title': 'How successful have the US strikes on Iran been?',
  'url': 'https://www.bbc.com//news/videos/cq53l9dvggjo'},
 {'title': 'Bride shot dead in attack on French wedding partyA suspected attacker is also killed in an apparent exchange of fire in a village near Avignon.11 hrs agoEurope',
  'url': 'https://www.bbc.com//news/articles/cn0q8pypxwxo'},
 {'title': 'First celestial image unveiled from rev

In [42]:
resp  = requests.get("https://www.bbc.com/search?q=israel iran war")

In [43]:
soup = BeautifulSoup(resp.text, "html.parser")

In [44]:
results = soup.find_all("div", {"data-testid": "newport-card"})

In [54]:
for r in results:
    anchor = r.find("a", href=True)
    link = anchor["href"]
    if not link.startswith("http"):
        link = "https://www.bbc.co.uk" + link

    title_tag = r.find("h2")
    title = title_tag.get_text(strip=True) if title_tag else None

    description_tag = r.find("div", class_="sc-cdecfb63-3")
    description = description_tag.get_text(strip=True) if description_tag else None

    time_tag = r.find("span", attrs={"data-testid": "card-metadata-lastupdated"})
    time = time_tag.get_text(strip=True) if time_tag else None
    print(f"Title: {title}\nLink: {link}\nDescription: {description}\nTime: {time}\n")


Title: Iran warns Trump against joining war with Israel
Link: https://www.bbc.co.uk/audio/play/p0lk00rz
Description: Iran's supreme leader Ayatollah Ali Khamenei says Iran will never surrender
Time: 5 days ago

Title: Israel-Iran war leads to spike in energy prices
Link: https://www.bbc.co.uk/audio/play/m002dks2
Description: Oil prices jump as tensions rise in the Middle East.
Time: 7 days ago

Title: The US has joined the Iran-Israel war. What happens now?
Link: https://www.bbc.co.uk/news/videos/c5ypw09gdzpo
Description: The BBC's world news correspondent Joe Inwood answers three key questions after the US strikes in Iran.
Time: 22 hrs ago

Title: Americanswers… On 5 Live! Can Donald Trump stop the war between Israel and Iran?
Link: https://www.bbc.co.uk/audio/play/p0ljn2p3
Description: And which world leaders will the US President take advice from?
Time: 7 days ago

Title: 'Nowhere feels safe': Iranians on life under Israeli attacks
Link: https://www.bbc.co.uk/news/articles/c8xgxdr01

In [56]:
def search_bbc(query: str) -> list:
    """
    Searches BBC for news articles related to the given query.
    Args:
        query (str): The search term to look for.
    Returns:
        list: A list of dictionaries containing article titles, URLs, descriptions, and publication times.
        {
            "title": "Article Title",
            "url": "https://www.bbc.com/article-url",
            "description": "Brief description of the article.",
            "time": "Publication time"
        }
    """
    search_url = f"https://www.bbc.com/search?q={query}"
    resp = requests.get(search_url)
    soup = BeautifulSoup(resp.text, "html.parser")
    results = soup.find_all("div", {"data-testid": "newport-card"})
    
    articles = []
    for r in results:
        anchor = r.find("a", href=True)
        link = anchor["href"]
        if not link.startswith("http"):
            link = "https://www.bbc.co.uk" + link

        title_tag = r.find("h2")
        title = title_tag.get_text(strip=True) if title_tag else None

        description_tag = r.find("div", class_="sc-cdecfb63-3")
        description = description_tag.get_text(strip=True) if description_tag else None

        time_tag = r.find("span", attrs={"data-testid": "card-metadata-lastupdated"})
        time = time_tag.get_text(strip=True) if time_tag else None
        
        articles.append({
            "title": title,
            "url": link,
            "description": description,
            "time": time
        })
    
    return articles

In [57]:
search_bbc("FIFA Pakistan")

[{'title': 'African legends bring hope to Somali football',
  'url': 'https://www.bbc.co.uk/sport/football/articles/cdxvw2yvqgno',
  'description': "Somalians hope hosting Emmanuel Adebayor, Samuel Eto'o and Jay Jay Okocha in a legends game can help bring international football back to the country after nearly 40 years.",
  'time': '30 May 2025'},
 {'title': "Human rights groups warn of 'surge' in migrant worker deaths in Saudi Arabia",
  'url': 'https://www.bbc.co.uk/news/articles/cgj86wqe0dlo',
  'description': 'Workers are already dying from preventable workplace accidents as the country prepares to host the 2034 World Cup, reports say.',
  'time': '14 May 2025'},
 {'title': 'Congo-Brazzaville suspended from international football',
  'url': 'https://www.bbc.co.uk/sport/football/articles/cx2513r30v6o',
  'description': "Football's world governing body suspends Congo-Brazzaville from internationals because of third-party interference in the affairs of the country's FA.",
  'time': '7