In [1]:
import os
from dotenv import load_dotenv
from tavily import TavilyClient

# Load environment variables
load_dotenv()

tavily_api_key = os.getenv("TAVILY_API_KEY")


In [2]:
client = TavilyClient(api_key=os.environ.get("TAVILY_API_KEY"))

# run search
result = client.search("What is in Nvidia's new Blackwell GPU?",
                       include_answer=True)

# print the answer
result["answer"]


'The Nvidia Blackwell GPU features fifth-generation Tensor Cores, NVFP4 precision format, and high bandwidth HBM3E memory. It delivers up to 15 petaFLOPS performance. Blackwell is designed for generative AI and large-scale AI models.'

In [None]:
#regular search using duck duck go

city = "New Delhi"

query = f"""
    what is the current weather in {city}?
    Should I travel there today?
    "weather.com"
"""

In [11]:
import requests
from bs4 import BeautifulSoup
from ddgs import DDGS
import re

ddg = DDGS()

def search(query, max_results=3):
    try:
        results = ddg.text(query, max_results=max_results)
        return [i["href"] for i in results]
    except Exception as e:
        print(f"returning previous results due to exception reaching ddg.")
        results = [ # cover case where DDG rate limits due to high deeplearning.ai volume
            "https://weather.com/weather/today/l/USCA0987:1:US",
            "https://weather.com/weather/hourbyhour/l/54f9d8baac32496f6b5497b4bf7a277c3e2e6cc5625de69680e6169e7e38e9a8",
        ]
        return results  

weather_results = search(query)
for i in weather_results:
    print(i)

https://weather.com/weather/tenday/l/c3e96d6cc4965fc54f88296b54449571c4107c73b9638c16aafc83575b4ddf2e
https://weather.com/weather/today/l/Block+3+Delhi+India?canonicalCityId=3b20c1938ff88202bb5f59e875f671c4
https://weather.com/en-IN/weather/today/l/b37bb211aec6a79795616f3fa4cf1c2bbf424a23a45001f5756fcadff0fde73f?par=samsung_widget_INS&cm_ven=L1_current_weather&theme=samsungLight


In [9]:
def scrape_weather_info(url):
    """Scrape content from the given URL"""
    if not url:
        return "Weather information could not be found."
    
    # fetch data
    headers = {'User-Agent': 'Mozilla/5.0'}
    response = requests.get(url, headers=headers)
    if response.status_code != 200:
        return "Failed to retrieve the webpage."

    # parse result
    soup = BeautifulSoup(response.text, 'html.parser')
    return soup


In [19]:
# use DuckDuckGo to find websites and take the first result
url = weather_results[0]

# scrape first wesbsite
soup = scrape_weather_info(url)

print(f"Website: {url}\n\n")
print(str(soup.body)[:5000]) # limit long outputs

Website: https://weather.com/weather/tenday/l/c3e96d6cc4965fc54f88296b54449571c4107c73b9638c16aafc83575b4ddf2e


<body><div class="appWrapper DaybreakLargeScreen LargeScreen lightTheme twcTheme DaybreakLargeScreen--appWrapper--ZkDop" id="appWrapper"><div class="region-meta"><div class="removeIfEmpty" id="WxuHtmlHead-meta-"></div></div><div class="regionHeaderWrapper DaybreakLargeScreen--stickyHeader--0Pgu8"><div class="regionHeaderInnerWrapper"><div class="adsSectionOuterWrapper"><div class="adsSectionInnerWrapper"><div class="stickyAdPlaceholder"></div><div class="js-branded-background-ads" id="labBG"></div><div class="js-branded-background-ads" id="wx-hero-content"></div><div class="region-stickyAds regionStickyAds"><div class="removeIfEmpty" id="WxuAd-stickyAds-50b69813-b340-4d89-a22d-016d4b682491"><div class="adWrapper BaseAd--adWrapper--ANZ1O BaseAd--hide--hCG8L"><div class="adLabel BaseAd--adLabel--JGSp6">Advertisement</div><div class="ad_module BaseAd--ad_module--ajh9S subs-unde

In [15]:
# extract text
weather_data = []
for tag in soup.find_all(['h1', 'h2', 'h3', 'p']):
    text = tag.get_text(" ", strip=True)
    weather_data.append(text)

# combine all elements into a single string
weather_data = "\n".join(weather_data)

# remove all spaces from the combined text
weather_data = re.sub(r'\s+', ' ', weather_data)
    
print(f"Website: {url}\n\n")
print(weather_data)

Website: https://weather.com/weather/tenday/l/c3e96d6cc4965fc54f88296b54449571c4107c73b9638c16aafc83575b4ddf2e


Recent Locations Weather Forecasts Radar & Maps News & Media Products & Account Lifestyle Specialty Forecasts 10 Day Weather - New Delhi, Delhi, India Tonight Night Cloudy skies early with isolated thunderstorms developing late. Hazy. Low around 80F. Winds light and variable. Chance of rain 30%. Heads-up Enjoy Fall-Like Weather While It Lasts Thu 28 Day Scattered showers and thunderstorms. Hazy. High 89F. Winds ESE at 5 to 10 mph. Chance of rain 50%. Night Scattered thunderstorms. Hazy. Low near 80F. Winds light and variable. Chance of rain 50%. Fri 29 Day Thunderstorms likely in the morning. Then a chance of scattered thunderstorms in the afternoon. Hazy. High 87F. Winds ESE at 5 to 10 mph. Chance of rain 70%. Night Cloudy skies early with thunderstorms developing later at night. Hazy. Low 79F. Winds E at 5 to 10 mph. Chance of rain 70%. Sat 30 Day Thunderstorms likely in t

In [16]:
#as visible its lot of HTML parsing.
#now trying agentic search

# run search
result = client.search(query, max_results=1)

# print first result
data = result["results"][0]["content"]

print(data)

{'location': {'name': 'New Delhi', 'region': 'Delhi', 'country': 'India', 'lat': 28.6, 'lon': 77.2, 'tz_id': 'Asia/Kolkata', 'localtime_epoch': 1756304028, 'localtime': '2025-08-27 19:43'}, 'current': {'last_updated_epoch': 1756303200, 'last_updated': '2025-08-27 19:30', 'temp_c': 28.0, 'temp_f': 82.4, 'is_day': 0, 'condition': {'text': 'Mist', 'icon': '//cdn.weatherapi.com/weather/64x64/night/143.png', 'code': 1030}, 'wind_mph': 6.3, 'wind_kph': 10.1, 'wind_degree': 106, 'wind_dir': 'ESE', 'pressure_mb': 1002.0, 'pressure_in': 29.59, 'precip_mm': 0.0, 'precip_in': 0.0, 'humidity': 89, 'cloud': 75, 'feelslike_c': 29.6, 'feelslike_f': 85.3, 'windchill_c': 32.1, 'windchill_f': 89.7, 'heatindex_c': 37.1, 'heatindex_f': 98.8, 'dewpoint_c': 23.0, 'dewpoint_f': 73.4, 'vis_km': 3.2, 'vis_miles': 1.0, 'uv': 0.0, 'gust_mph': 8.7, 'gust_kph': 14.1}}


In [17]:
#formatting to json for agents 
import json
from pygments import highlight, lexers, formatters

# parse JSON
parsed_json = json.loads(data.replace("'", '"'))

# pretty print JSON with syntax highlighting
formatted_json = json.dumps(parsed_json, indent=4)
colorful_json = highlight(formatted_json,
                          lexers.JsonLexer(),
                          formatters.TerminalFormatter())

print(colorful_json)


{[37m[39;49;00m
[37m    [39;49;00m[94m"location"[39;49;00m:[37m [39;49;00m{[37m[39;49;00m
[37m        [39;49;00m[94m"name"[39;49;00m:[37m [39;49;00m[33m"New Delhi"[39;49;00m,[37m[39;49;00m
[37m        [39;49;00m[94m"region"[39;49;00m:[37m [39;49;00m[33m"Delhi"[39;49;00m,[37m[39;49;00m
[37m        [39;49;00m[94m"country"[39;49;00m:[37m [39;49;00m[33m"India"[39;49;00m,[37m[39;49;00m
[37m        [39;49;00m[94m"lat"[39;49;00m:[37m [39;49;00m[34m28.6[39;49;00m,[37m[39;49;00m
[37m        [39;49;00m[94m"lon"[39;49;00m:[37m [39;49;00m[34m77.2[39;49;00m,[37m[39;49;00m
[37m        [39;49;00m[94m"tz_id"[39;49;00m:[37m [39;49;00m[33m"Asia/Kolkata"[39;49;00m,[37m[39;49;00m
[37m        [39;49;00m[94m"localtime_epoch"[39;49;00m:[37m [39;49;00m[34m1756304028[39;49;00m,[37m[39;49;00m
[37m        [39;49;00m[94m"localtime"[39;49;00m:[37m [39;49;00m[33m"2025-08-27 19:43"[39;49;00m[37m[39;49;00m
[37m    [39;49;00m},