# Lesson 3: Agentic Search

In [1]:
# libraries
from dotenv import load_dotenv
import os
from tavily import TavilyClient

# load environment variables from .env file
_ = load_dotenv()

# connect
client = TavilyClient(api_key=os.environ.get("TAVILY_API_KEY"))

In [2]:
# run search
result = client.search("what are basic facts about Broadcom",
                       include_answer=True)

# print the answer
result["answer"]


"Broadcom Inc. is a global technology leader specializing in semiconductor, enterprise software, and security solutions. Established in 1961 as HP Associates, it later became Broadcom through a spinoff from Hewlett-Packard in 1999. In 2005, Avago Technologies was formed after acquiring the Semiconductor Products Group of Agilent Technologies. Broadcom completed the acquisition of Symantec's enterprise security business in 2019. The company has been involved in investigations for anti-competitive practices by authorities like the European Union. As of 2022, Broadcom offers enterprise software and operates the enterprise security business under the Symantec brand."

## Regular search

In [12]:
# choose location (try to change to your own city!)

company = "Broadcom"

query = f"""
    what are basic facts about {company}?
"""

In [15]:
import requests
from bs4 import BeautifulSoup
from duckduckgo_search import DDGS
import re

ddg = DDGS()

def search(query, max_results=5):
    try:
        results = ddg.text(query, max_results=max_results)
        return [i["href"] for i in results]
    except Exception as e:
        print(f"returning previous results due to exception reaching ddg.")
        results = [ # cover case where DDG rate limits due to high deeplearning.ai volume
            "https://weather.com/weather/today/l/USCA0987:1:US",
            "https://weather.com/weather/hourbyhour/l/54f9d8baac32496f6b5497b4bf7a277c3e2e6cc5625de69680e6169e7e38e9a8",
        ]
        return results  


for i in search(query):
    print(i)

https://www.broadcom.com/company/about-us
https://facts.net/general/16-facts-about-broadcom/
https://en.wikipedia.org/wiki/Broadcom
https://www.statista.com/topics/9932/broadcom/
https://www.broadcom.com/company/about-us/company-history


In [16]:
def scrape_info(url):
    """Scrape content from the given URL"""
    if not url:
        return "Weather information could not be found."
    
    # fetch data
    headers = {'User-Agent': 'Mozilla/5.0'}
    response = requests.get(url, headers=headers)
    if response.status_code != 200:
        return "Failed to retrieve the webpage."

    # parse result
    soup = BeautifulSoup(response.text, 'html.parser')
    return soup


In [26]:
# use DuckDuckGo to find websites and take the first result
for url in search(query):

    soup = scrape_info(url)

    print(f"Website: {url}\n\n")
    try:
        meta_description = soup.find('meta', attrs={'name': 'description'})
    
        # Check if meta description exists
        if meta_description:
            meta_description = meta_description.get('content')
    except Exception as exc:
        print(exc)
        meta_description=""
        
    if meta_description:
        print(f"Description: {meta_description}\n\n")
        
    data = []
    for tag in soup.find_all(['h1', 'h2', 'h3', 'p']):
        text = tag.get_text(" ", strip=True)
        data.append(text)
    
    output = " ".join(data)
        
    print(output[:1000]) # limit long outputs  
    print("----\n\n\n")
    


Website: https://www.broadcom.com/company/about-us


Description: Broadcom Inc. is a global technology leader that designs, develops and supplies a broad range of semiconductor, enterprise software and security solutions. 



----



Website: https://facts.net/general/16-facts-about-broadcom/


Description: Discover 16 intriguing facts about Broadcom, a leading technology company that is shaping the future with its innovative solutions and cutting-edge products. Explore the company's history, achievements, and impact on various industries.


16 Facts About Broadcom Written by Doll Hutcherson Modified & Updated: 02 Jun 2024 Reviewed by Sherman Smith Broadcom is a global technology company that is known for its extensive range of semiconductor and infrastructure software solutions. With a presence in over 30 countries, Broadcom is a key player in the field of wireless communication, wired infrastructure, enterprise storage, industrial automation, and more. The company has made significan

## Agentic Search

In [27]:
# run search
result = client.search(query, max_results=1)

# print first result
data = result["results"][0]["content"]

print(data)

In October 2019, the European Union issued an interim antitrust order against Broadcom concerning anticompetitive business practices which allegedly violate European Union competition law.[10]
History[edit]
Origin in Hewlett-Packard[edit]
The company that would later become Broadcom Inc. was established in 1961 as HP Associates, a semiconductor products division of Hewlett-Packard.[11]
The division separated from Hewlett-Packard as part of the Agilent Technologies spinoff in 1999.[7][12]
Formation of Avago Technologies[edit]
KKR and Silver Lake Partners acquired the Semiconductor Products Group of Agilent Technologies in 2005 for $2.6 billion and formed Avago Technologies.[7] Avago Technologies agreed to sell its I/O solutions unit to PMC-Sierra for $42.5 million in October 2005.[13]
In August 2008, the company filed an initial public offering of $400 million.[14][15] And on 4 November 2019, Broadcom announced that it had completed the acquisition of the business, as well as the Symant

In [28]:
# didn't get json for this search

import json
from pygments import highlight, lexers, formatters

# parse JSON
parsed_json = json.loads(data.replace("'", '"'))

# pretty print JSON with syntax highlighting
formatted_json = json.dumps(parsed_json, indent=4)
colorful_json = highlight(formatted_json,
                          lexers.JsonLexer(),
                          formatters.TerminalFormatter())

print(colorful_json)


JSONDecodeError: Expecting value: line 1 column 1 (char 0)

<img src="./google_sample.png" width="800" height="600">