In [None]:
# imports
# If these fail, please check you're running from an 'activated' environment with (llms) in the command prompt

import os
import requests
import json
from typing import List
from dotenv import load_dotenv
from bs4 import BeautifulSoup
from IPython.display import Markdown, display, update_display
from openai import OpenAI

In [17]:
# Initialize and constants

load_dotenv(override=True)
api_key = os.getenv('OPENAI_API_KEY')

if api_key and api_key.startswith('sk-proj-') and len(api_key)>10:
    print("API key looks good so far")
else:
    print("There might be a problem with your API key? Please visit the troubleshooting notebook!")
    
# MODEL = 'gpt-4o-mini'
# openai = OpenAI()

MODEL = "llama3.2"
openai = OpenAI(base_url='http://localhost:11434/v1', api_key='ollama')


There might be a problem with your API key? Please visit the troubleshooting notebook!


In [3]:
# A class to represent a Webpage

# Some websites need you to use proper headers when fetching them:
headers = {
 "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
}

class Website:
    """
    A utility class to represent a Website that we have scraped, now with links
    """

    def __init__(self, url):
        self.url = url
        response = requests.get(url, headers=headers)
        self.body = response.content
        soup = BeautifulSoup(self.body, 'html.parser')
        self.title = soup.title.string if soup.title else "No title found"
        if soup.body:
            for irrelevant in soup.body(["script", "style", "img", "input"]):
                irrelevant.decompose()
            self.text = soup.body.get_text(separator="\n", strip=True)
        else:
            self.text = ""
        links = [link.get('href') for link in soup.find_all('a')]
        self.links = [link for link in links if link]

    def get_contents(self):
        return f"Webpage Title:\n{self.title}\nWebpage Contents:\n{self.text}\n\n"

In [4]:
ed = Website("https://edwarddonner.com")
ed.links

['https://edwarddonner.com/',
 'https://edwarddonner.com/connect-four/',
 'https://edwarddonner.com/outsmart/',
 'https://edwarddonner.com/about-me-and-about-nebula/',
 'https://edwarddonner.com/posts/',
 'https://edwarddonner.com/',
 'https://news.ycombinator.com',
 'https://nebula.io/?utm_source=ed&utm_medium=referral',
 'https://www.prnewswire.com/news-releases/wynden-stark-group-acquires-nyc-venture-backed-tech-startup-untapt-301269512.html',
 'https://patents.google.com/patent/US20210049536A1/',
 'https://www.linkedin.com/in/eddonner/',
 'https://edwarddonner.com/2025/05/28/connecting-my-courses-become-an-llm-expert-and-leader/',
 'https://edwarddonner.com/2025/05/28/connecting-my-courses-become-an-llm-expert-and-leader/',
 'https://edwarddonner.com/2025/05/18/2025-ai-executive-briefing/',
 'https://edwarddonner.com/2025/05/18/2025-ai-executive-briefing/',
 'https://edwarddonner.com/2025/04/21/the-complete-agentic-ai-engineering-course/',
 'https://edwarddonner.com/2025/04/21/the-

# Agent1 : links retriever

In [13]:
link_system_prompt = "You are provided with a list of links found on a webpage. \
You are able to decide which of the links would be most relevant to include in a brochure about the company, \
such as links to an About page, or a Company page, or Careers/Jobs pages.\n"
link_system_prompt += "You should respond in JSON as in this example:"
link_system_prompt += """
{
    "links": [
        {"type": "about page", "url": "https://full.url/goes/here/about"},
        {"type": "careers page": "url": "https://another.full.url/careers"}
    ]
}
"""

In [15]:
def get_links_user_prompt(website):
    user_prompt = f"Here is the list of links on the website of {website.url} - "
    user_prompt += "please decide which of these are relevant web links for a brochure about the company, respond with the full https URL in JSON format. \
Do not include Terms of Service, Privacy, email links.\n"
    user_prompt += "Links (some might be relative links):\n"
    user_prompt += "\n".join(website.links)
    return user_prompt

In [30]:
def agent1_get_links(url):
    website = Website(url)
    response = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": link_system_prompt},
            {"role": "user", "content": get_links_user_prompt(website)}
        ],
        response_format={
            "type": "json_object"}
    )
    result = response.choices[0].message.content
    return json.loads(result)

# Agent2 : create brochure

In [31]:
def get_all_details(url):
    result = "Landing page:\n"
    result += Website(url).get_contents()
    links = agent1_get_links(url)
    print("Found links:", links)
    for link in links["links"]:
        result += f"\n\n{link['type']}\n"
        try:
            result += Website(link["url"]).get_contents()
        except Exception as e:
            result += f"Impossibile recuperare la pagina {link['url']}: {e}\n"
    return result

In [32]:
# system_prompt = "You are an assistant that analyzes the contents of several relevant pages from a company website \
# and creates a short brochure about the company for prospective customers, investors and recruits. Respond in markdown.\
# Include details of company culture, customers and careers/jobs if you have the information."

# Or uncomment the lines below for a more humorous brochure - this demonstrates how easy it is to incorporate 'tone':

system_prompt = "You are an assistant that analyzes the contents of several relevant pages from a company website \
and creates a short humorous, entertaining, jokey brochure about the company for prospective customers, investors and recruits. Respond in markdown.\
Include details of company culture, customers and careers/jobs if you have the information."


def get_brochure_user_prompt(company_name, url):
    user_prompt = f"You are looking at a company called: {company_name}\n"
    user_prompt += f"Here are the contents of its landing page and other relevant pages; use this information to build a short brochure of the company in markdown.\n"
    user_prompt += get_all_details(url)
    user_prompt = user_prompt[:5_000] # Truncate if more than 5,000 characters
    return user_prompt

In [33]:
def agent2_create_brochure(company_name, url):
    response = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": get_brochure_user_prompt(company_name, url)}
          ],
    )
    result = response.choices[0].message.content
    display(Markdown(result))

In [34]:
agent2_create_brochure("HuggingFace", "https://huggingface.co")

Found links: {'links': [{'type': 'About page', 'url': '/'}, {'type': 'Careers/Jobs page', 'url': '/join', 'replace the `<` and `>` with `/` for proper url format so https://huggingface.co/ becomes / about this is done as below: https://huggingface.co/ ': ''}, {'type': 'Company overview page', 'url': '/brand'}, {'type': 'Features or Model explanations page', 'url': '/models'}, {'type': 'Dataset page', 'url': '/datasets'}]}


**Welcome to Hugging Face: The AI Community That's About to Change Everything**

[Image of a hugging face with a superhero cape]

At Hugging Face, we're on a mission to power the future of machine learning. We're not just building AI models, we're building a community that's passionate about making ML more accessible and fun.

**Our Mission**

We believe that everyone should be able to build and deploy their own machine learning models. That's why we've created a platform where you can collaborative on unlimited public models, datasets, and applications. Whether you're a researcher, entrepreneur, or just someone who loves data, we want to help you get started with ML.

**Our Features**

* **1 Million+ Models**: Access to thousands of pre-trained models from top organizations like Google, Meta, and Amazon.
* **Infinite Datasets**: Browse 250k+ datasets for any ML task imaginable.
* **Spaces**: Host and collaborate on unlimited public models, datasets, and applications.
* **Compute**: Deploy on optimized inference endpoints or update your Spaces applications to a GPU in a few clicks.

**Our Community**

We're not just building tech, we're building a community of passionate people who share our vision for the future of AI. With over 50,000 organizations using Hugging Face, we've got the perfect blend of expertise and enthusiasm to make ML happen.

**Join the Fun!**

Ready to join the journey? Check out our [blog](/Learn), where we share insights on the latest trends in ML and how you can get started. Or, dive into our vast repository of [codebases](/Hub) and start building your first model today!

[Button: Sign Up for Hugging Face]

**Transforming Data into Insights**

"AI is not about finding answers; it's about asking better questions." - Peter Norvig

At Hugging Face, we believe that machine learning should be accessible to everyone. Join our mission to empower the ML community and get ready to ask those better questions.

[Social media links: Twitter | LinkedIn | GitHub ]

In [36]:
# Agent3: translate brochure to Italian

def agent3_translate_brochure_to_italian(company_name, url):
    # First, generate the brochure in English
    response_en = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": get_brochure_user_prompt(company_name, url)}
        ],
    )
    brochure_en = response_en.choices[0].message.content

    # Now, translate the brochure to Italian
    translation_prompt = (
        "Translate the following markdown brochure into Italian. "
        "Keep the markdown formatting:\n\n"
        f"{brochure_en}"
    )
    response_it = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": "You are a professional Italian translator."},
            {"role": "user", "content": translation_prompt}
        ],
    )
    brochure_it = response_it.choices[0].message.content
    display(Markdown(brochure_it))

In [37]:
agent3_translate_brochure_to_italian("HuggingFace", "https://huggingface.co")

Found links: {'links': [{'type': 'About page', 'url': 'https://huggingface.co'}, {'type': 'Company page', 'url': 'https://brand.huggingface.co'}, {'type': 'Changelog', 'url': 'https://changelog.huggingface.co'}, {'type': 'News/Blog', 'url': 'https://blog.huggingface.co'}, {'type': 'GitHub repository', 'url': 'https://github.com/huggingface'}, {'type': 'Discord community', 'url': 'https://join discord.huggingface.co'}, {'type': 'Twitter profile', 'url': 'https://twitter.com/huggingface'}, {'type': 'LinkedIn company page', 'url': 'https://www.linkedin.com/company/huggingface/'}, {'type': 'Enterprise information', 'url': 'https://huggingface.co/enterprise/'}, {'type': 'Pricing', 'url': 'https://huggingface.co/pricing'}]}


**Benvenuti all'Hugging Face: dove l'intelligenza artificiale incontra la comunità**
==============================================

[Immagine della copertina: un gruppo di persone di diverse etnie vicine a una tavola, circondate da laptop e libri sulla programmazione]

All'Hugging Face lavoriamo per costruire il futuro dell'intelligenza artificiale. La nostra piattaforma è lo spazio preferito degli appassionati della machine learning, dei ricercatori e degli sviluppatori.

**La Storia**
---------------

Abbiamo iniziato con un semplice idea: creare una piattaforma collaborativa dove le persone da tutto il mondo potessero condividere modelli, dataset e applicazioni. Passando allo stato attuale, abbiamo costruito una comunità di oltre 50.000 organizzazioni ciascuna che contribuisce a accelerare l'innovazione dell'intelligenza artificiale.

**Cosa Facciamo**
-----------------

*   Gestiamo la più grande collezione di modelli di machine learning del mondo, con oltre 1 milione di modelli disponibili per esplorare e distribuire.
*   La nostra piattaforma fornisce molte opportunità all'interno della comunità ad apprendere dagli altri, condividere conoscenze e esporre il proprio lavoro a livello mondiale.
*   Quale che sia la vostra carriera come ricercatore, sviluppatore o imprenditore, abbiamo i tool e le risorse per aiutarvi a sbloccare l'appieno del potenziale dell'intelligenza artificiale.

**Unisciti alla Nostra Comunità**
---------------------------

Esplora la nostra viva comunità di appassionati dell'intelligenza artificiale, dei ricercatori e degli sviluppatori. Esplora il nostro archivio di modelli., contribuisci a progetti open-source o semplicemente connetevi con persone che amano l'intelligenza artificiale come voi.

**I Vostri Benefici**

*   **Accelerare le vostre Progetti**: Con accesso ai migliori modelli, dataset e tool, potrete costruire applicazioni migliori in modo più rapido.
*   **Mostra i Vostri Lavori**: Sviluppereste i progetti sul nostro piattoforma per guadagnare visibilità con un pubblico di 50.000+ organizzazioni.
*   **Cercate Di Staying AHead of the Curve**: Rimanete aggiornati sulle ultime tendenze nell'intelligenza artificiale, paper di ricerca e notizie industariali dalla nostra comunità.

**I Vostri Benefici per le Ispettive**
--------------------------------

*   **Sfruttare Potenzialità dell'Intelligenza Artificiale**: Raggiungete il potenziale raccolto della Hugging Face collective sapersa accelerare pipeline innovativo dei vostri affari.
*   **Competere con i Migliori**: Il nostro piattoforma fornisce un'opportunità esclusiva per collaborarsi con maggiori ricercatori e sviluppatori all'avanguardia dell'intelligenza artificiale.
*   **Vostro Futuro Seguro**: Guadagnate benefici dai nostri livelli di sicurezza aziendale, controllo accesso e supporto dedicato.

**Iniziate Adesso!**
---------------------

Preparati per questo viaggio emozionante? Accedi al nostro piattaforma oggi e uniscete alle migliaia delle organisations che stanno già ad empowering i loro affari con Hugging Face.