In [11]:
# imports
# If these fail, please check you're running from an 'activated' environment with (llms) in the command prompt

import os
import requests
import json
from typing import List
from dotenv import load_dotenv
from bs4 import BeautifulSoup
from IPython.display import Markdown, display, update_display
from openai import OpenAI

In [12]:
# Initialize and constants

load_dotenv(override=True)
api_key = os.getenv('OPENAI_API_KEY')

if api_key and api_key.startswith('sk-proj-') and len(api_key)>10:
    print("API key looks good so far")
else:
    print("There might be a problem with your API key? Please visit the troubleshooting notebook!")
    
MODEL = 'gpt-4o-mini'
openai = OpenAI()

API key looks good so far


In [13]:
# A class to represent a Webpage

# Some websites need you to use proper headers when fetching them:
headers = {
 "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
}

class Website:
    """
    A utility class to represent a Website that we have scraped, now with links
    """

    def __init__(self, url):
        self.url = url
        response = requests.get(url, headers=headers)
        self.body = response.content
        soup = BeautifulSoup(self.body, 'html.parser')
        self.title = soup.title.string if soup.title else "No title found"
        if soup.body:
            for irrelevant in soup.body(["script", "style", "img", "input"]):
                irrelevant.decompose()
            self.text = soup.body.get_text(separator="\n", strip=True)
        else:
            self.text = ""
        links = [link.get('href') for link in soup.find_all('a')]
        self.links = [link for link in links if link]

    def get_contents(self):
        return f"Webpage Title:\n{self.title}\nWebpage Contents:\n{self.text}\n\n"

In [14]:
ed = Website("https://edwarddonner.com")
ed.links

['https://edwarddonner.com/',
 'https://edwarddonner.com/connect-four/',
 'https://edwarddonner.com/outsmart/',
 'https://edwarddonner.com/about-me-and-about-nebula/',
 'https://edwarddonner.com/posts/',
 'https://edwarddonner.com/',
 'https://news.ycombinator.com',
 'https://nebula.io/?utm_source=ed&utm_medium=referral',
 'https://www.prnewswire.com/news-releases/wynden-stark-group-acquires-nyc-venture-backed-tech-startup-untapt-301269512.html',
 'https://patents.google.com/patent/US20210049536A1/',
 'https://www.linkedin.com/in/eddonner/',
 'https://edwarddonner.com/2025/05/28/connecting-my-courses-become-an-llm-expert-and-leader/',
 'https://edwarddonner.com/2025/05/28/connecting-my-courses-become-an-llm-expert-and-leader/',
 'https://edwarddonner.com/2025/05/18/2025-ai-executive-briefing/',
 'https://edwarddonner.com/2025/05/18/2025-ai-executive-briefing/',
 'https://edwarddonner.com/2025/04/21/the-complete-agentic-ai-engineering-course/',
 'https://edwarddonner.com/2025/04/21/the-

In [15]:
link_system_prompt = "You are provided with a list of links found on a webpage. \
You are able to decide which of the links would be most relevant to include in a brochure about the company, \
such as links to an About page, or a Company page, or Careers/Jobs pages.\n"
link_system_prompt += "You should respond in JSON as in this example:"
link_system_prompt += """
{
    "links": [
        {"type": "about page", "url": "https://full.url/goes/here/about"},
        {"type": "careers page", "url": "https://another.full.url/careers"}
    ]
}
"""

In [16]:
print(link_system_prompt)

You are provided with a list of links found on a webpage. You are able to decide which of the links would be most relevant to include in a brochure about the company, such as links to an About page, or a Company page, or Careers/Jobs pages.
You should respond in JSON as in this example:
{
    "links": [
        {"type": "about page", "url": "https://full.url/goes/here/about"},
        {"type": "careers page", "url": "https://another.full.url/careers"}
    ]
}



In [17]:
def get_links_user_prompt(website):
    user_prompt = f"Here is the list of links on the website of {website.url} - "
    user_prompt += "please decide which of these are relevant web links for a brochure about the company, respond with the full https URL in JSON format. \
Do not include Terms of Service, Privacy, email links.\n"
    user_prompt += "Links (some might be relative links):\n"
    user_prompt += "\n".join(website.links)
    return user_prompt

In [18]:
print(get_links_user_prompt(ed))

Here is the list of links on the website of https://edwarddonner.com - please decide which of these are relevant web links for a brochure about the company, respond with the full https URL in JSON format. Do not include Terms of Service, Privacy, email links.
Links (some might be relative links):
https://edwarddonner.com/
https://edwarddonner.com/connect-four/
https://edwarddonner.com/outsmart/
https://edwarddonner.com/about-me-and-about-nebula/
https://edwarddonner.com/posts/
https://edwarddonner.com/
https://news.ycombinator.com
https://nebula.io/?utm_source=ed&utm_medium=referral
https://www.prnewswire.com/news-releases/wynden-stark-group-acquires-nyc-venture-backed-tech-startup-untapt-301269512.html
https://patents.google.com/patent/US20210049536A1/
https://www.linkedin.com/in/eddonner/
https://edwarddonner.com/2025/05/28/connecting-my-courses-become-an-llm-expert-and-leader/
https://edwarddonner.com/2025/05/28/connecting-my-courses-become-an-llm-expert-and-leader/
https://edwarddo

In [19]:
def get_links(url):
    website = Website(url)
    response = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": link_system_prompt},
            {"role": "user", "content": get_links_user_prompt(website)}
      ],
        response_format={"type": "json_object"}
    )
    result = response.choices[0].message.content
    return json.loads(result)

In [20]:
# Anthropic has made their site harder to scrape, so I'm using HuggingFace..

huggingface = Website("https://huggingface.co")
huggingface.links

['/',
 '/models',
 '/datasets',
 '/spaces',
 '/docs',
 '/enterprise',
 '/pricing',
 '/login',
 '/join',
 'inference/get-started',
 '/spaces',
 '/models',
 '/zai-org/GLM-4.5',
 '/tencent/HunyuanWorld-1',
 '/Qwen/Qwen3-Coder-480B-A35B-Instruct',
 '/Chain-GPT/Solidity-LLM',
 '/Qwen/Qwen3-30B-A3B-Instruct-2507',
 '/models',
 '/spaces/enzostvs/deepsite',
 '/spaces/zumjoy/Multi-Style_Video-to-Anime_Generator',
 '/spaces/Qwen/Qwen3-Coder-WebDev',
 '/spaces/smola/higgs_audio_v2',
 '/spaces/black-forest-labs/FLUX.1-Kontext-Dev',
 '/spaces',
 '/datasets/fka/awesome-chatgpt-prompts',
 '/datasets/NousResearch/Hermes-3-Dataset',
 '/datasets/MegaScience/MegaScience',
 '/datasets/interstellarninja/hermes_reasoning_tool_use',
 '/datasets/nvidia/Nemotron-Post-Training-Dataset-v1',
 '/datasets',
 '/join',
 '/pricing#endpoints',
 '/pricing#spaces',
 '/pricing',
 '/enterprise',
 '/enterprise',
 '/enterprise',
 '/enterprise',
 '/enterprise',
 '/enterprise',
 '/enterprise',
 '/allenai',
 '/facebook',
 '/ama

In [21]:
get_links("https://huggingface.co")

{'links': [{'type': 'about page', 'url': 'https://huggingface.co/huggingface'},
  {'type': 'careers page', 'url': 'https://apply.workable.com/huggingface/'},
  {'type': 'company page',
   'url': 'https://www.linkedin.com/company/huggingface/'}]}

In [22]:
def get_all_details(url):
    result = "Landing page:\n"
    result += Website(url).get_contents()
    links = get_links(url)
    print("Found links:", links)
    for link in links["links"]:
        result += f"\n\n{link['type']}\n"
        result += Website(link["url"]).get_contents()
    return result

In [23]:
print(get_all_details("https://huggingface.co"))

Found links: {'links': [{'type': 'about page', 'url': 'https://huggingface.co/huggingface'}, {'type': 'careers page', 'url': 'https://apply.workable.com/huggingface/'}, {'type': 'company page', 'url': 'https://huggingface.co/enterprise'}, {'type': 'pricing page', 'url': 'https://huggingface.co/pricing'}, {'type': 'blog', 'url': 'https://huggingface.co/blog'}, {'type': 'community forum', 'url': 'https://discuss.huggingface.co'}, {'type': 'GitHub page', 'url': 'https://github.com/huggingface'}, {'type': 'Twitter page', 'url': 'https://twitter.com/huggingface'}, {'type': 'LinkedIn page', 'url': 'https://www.linkedin.com/company/huggingface/'}]}
Landing page:
Webpage Title:
Hugging Face – The AI community building the future.
Webpage Contents:
Hugging Face
Models
Datasets
Spaces
Community
Docs
Enterprise
Pricing
Log In
Sign Up
NEW
Get started with Inference in seconds 🚀
Reachy Mini: The Open Robot for AI Builders
Welcome Cohere on the Hub 🔥
The AI community building the future.
The platfor

In [24]:
system_prompt = "You are an assistant that analyzes the contents of several relevant pages from a company website \
and creates a short brochure about the company for prospective customers, investors and recruits. Respond in markdown.\
Include details of company culture, customers and careers/jobs if you have the information."

# Or uncomment the lines below for a more humorous brochure - this demonstrates how easy it is to incorporate 'tone':

# system_prompt = "You are an assistant that analyzes the contents of several relevant pages from a company website \
# and creates a short humorous, entertaining, jokey brochure about the company for prospective customers, investors and recruits. Respond in markdown.\
# Include details of company culture, customers and careers/jobs if you have the information."


In [25]:
def get_brochure_user_prompt(company_name, url):
    user_prompt = f"You are looking at a company called: {company_name}\n"
    user_prompt += f"Here are the contents of its landing page and other relevant pages; use this information to build a short brochure of the company in markdown.\n"
    user_prompt += get_all_details(url)
    user_prompt = user_prompt[:5_000] # Truncate if more than 5,000 characters
    return user_prompt

In [26]:
get_brochure_user_prompt("HuggingFace", "https://huggingface.co")

Found links: {'links': [{'type': 'about page', 'url': 'https://huggingface.co/huggingface'}, {'type': 'careers page', 'url': 'https://apply.workable.com/huggingface/'}, {'type': 'company page', 'url': 'https://huggingface.co/enterprise'}, {'type': 'pricing page', 'url': 'https://huggingface.co/pricing'}, {'type': 'blog', 'url': 'https://huggingface.co/blog'}, {'type': 'documentation', 'url': 'https://huggingface.co/docs'}]}


'You are looking at a company called: HuggingFace\nHere are the contents of its landing page and other relevant pages; use this information to build a short brochure of the company in markdown.\nLanding page:\nWebpage Title:\nHugging Face – The AI community building the future.\nWebpage Contents:\nHugging Face\nModels\nDatasets\nSpaces\nCommunity\nDocs\nEnterprise\nPricing\nLog In\nSign Up\nNEW\nGet started with Inference in seconds 🚀\nReachy Mini: The Open Robot for AI Builders\nWelcome Cohere on the Hub 🔥\nThe AI community building the future.\nThe platform where the machine learning community collaborates on models, datasets, and applications.\nExplore AI Apps\nor\nBrowse 1M+ models\nTrending on\nthis week\nModels\nzai-org/GLM-4.5\nUpdated\n3 days ago\n•\n6.13k\n•\n784\ntencent/HunyuanWorld-1\nUpdated\nabout 4 hours ago\n•\n7.99k\n•\n457\nQwen/Qwen3-Coder-480B-A35B-Instruct\nUpdated\n7 days ago\n•\n20.1k\n•\n934\nChain-GPT/Solidity-LLM\nUpdated\n8 days ago\n•\n1.6k\n•\n504\nQwen/Qwe

In [27]:
def create_brochure(company_name, url):
    response = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": get_brochure_user_prompt(company_name, url)}
          ],
    )
    result = response.choices[0].message.content
    display(Markdown(result))

In [28]:
create_brochure("HuggingFace", "https://huggingface.co")

Found links: {'links': [{'type': 'about page', 'url': 'https://huggingface.co/huggingface'}, {'type': 'careers page', 'url': 'https://apply.workable.com/huggingface/'}, {'type': 'blog page', 'url': 'https://huggingface.co/blog'}, {'type': 'enterprise page', 'url': 'https://huggingface.co/enterprise'}, {'type': 'pricing page', 'url': 'https://huggingface.co/pricing'}, {'type': 'community page', 'url': 'https://discuss.huggingface.co'}, {'type': 'GitHub page', 'url': 'https://github.com/huggingface'}, {'type': 'LinkedIn page', 'url': 'https://www.linkedin.com/company/huggingface/'}, {'type': 'Twitter page', 'url': 'https://twitter.com/huggingface'}]}


# Hugging Face Brochure

## Who We Are
**Hugging Face** is at the forefront of the AI community, dedicated to building the future through machine learning. We provide a platform where individuals and organizations can collaborate on a range of models, datasets, and applications, demonstrating an unwavering commitment to the advancement of AI technology.

## Our Offerings
### Models
At Hugging Face, users can explore a vast library of over **1M+ models** spanning various AI applications, including text, image, video, and audio processing. Our platform hosts trending models that are constantly updated to showcase the best in machine learning developments.

### Datasets
We offer access to **250k+ datasets**, providing researchers and developers with the resources they need to push the boundaries of machine learning. These datasets are crucial for training models and improving AI applications.

### Spaces
Our platform features **400k+ applications**, including tools for web development and creative media, enabling users to generate applications, manipulate content, and explore the capabilities of machine learning in versatile environments.

### Enterprise Solutions
For larger organizations, we provide **enterprise-grade security** and dedicated support, equipping teams with the necessary tools to build and deploy AI responsibly and efficiently. 

## Community Engagement
Hugging Face thrives on community collaboration. We are proud to support **over 50,000 organizations** including notable names like Amazon, Google, Microsoft, and Grammarly, reinforcing our position as a trusted partner in the AI ecosystem. Our community-driven approach encourages sharing and collaboration, helping talents from various backgrounds contribute to AI development.

## Company Culture
At Hugging Face, we cultivate a vibrant and inclusive company culture. Our team is passionate about AI technology and strives to maintain an open-source ethos, championing the belief that great innovations come from collaboration. We provide opportunities for our employees to grow, share, and explore new ideas, ensuring that everyone has a voice in shaping the future.

## Careers at Hugging Face
Join us on our mission to democratize AI! We are consistently looking for talented individuals across various domains to join our team. At Hugging Face, you can take part in groundbreaking projects that impact thousands of users and organizations worldwide.

If you are driven by innovation, creativity, and collaboration, we invite you to become a part of our thriving family. Explore job openings on our [Careers Page](https://huggingface.co/jobs).

## Connect with Us
Stay updated with our latest developments and be part of the conversation! Follow us on social platforms:

- **GitHub**
- **Twitter**
- **LinkedIn**
- **Discord**

Join Hugging Face in forging the future of AI. Let’s build something extraordinary together!

In [29]:
def stream_brochure(company_name, url):
    stream = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": get_brochure_user_prompt(company_name, url)}
          ],
        stream=True
    )
    
    response = ""
    display_handle = display(Markdown(""), display_id=True)
    for chunk in stream:
        response += chunk.choices[0].delta.content or ''
        response = response.replace("```","").replace("markdown", "")
        update_display(Markdown(response), display_id=display_handle.display_id)

In [30]:
stream_brochure("HuggingFace", "https://huggingface.co")

Found links: {'links': [{'type': 'about page', 'url': 'https://huggingface.co/huggingface'}, {'type': 'careers page', 'url': 'https://apply.workable.com/huggingface/'}, {'type': 'enterprise page', 'url': 'https://huggingface.co/enterprise'}, {'type': 'pricing page', 'url': 'https://huggingface.co/pricing'}, {'type': 'blog page', 'url': 'https://huggingface.co/blog'}, {'type': 'discussion page', 'url': 'https://discuss.huggingface.co'}, {'type': 'GitHub page', 'url': 'https://github.com/huggingface'}, {'type': 'Twitter page', 'url': 'https://twitter.com/huggingface'}, {'type': 'LinkedIn page', 'url': 'https://www.linkedin.com/company/huggingface/'}]}



# Hugging Face: The AI Community Building the Future

## About Us
Hugging Face is a pioneering platform at the forefront of the AI and machine learning community, dedicated to fostering collaboration and innovation among its users. With a mission to democratize access to machine learning, Hugging Face offers an open-source environment for users to create, share, and enhance AI models, datasets, and applications.

## What We Offer
- **Models**: Explore over 1 million machine learning models across various modalities including text, image, video, and audio.
- **Datasets**: Access more than 250,000 datasets tailored for machine learning tasks.
- **Spaces**: Create and deploy your own applications with ease.

### Why Choose Hugging Face?
- **Collaboration**: Join a thriving community where over 50,000 organizations, including tech giants like Google, Microsoft, and Amazon, collaborate on AI projects.
- **Cutting-Edge Tools**: Utilize state-of-the-art tools and libraries, including Transformers, Diffusers, and Tokenizers that are optimized for both research and production environments.
- **Enterprise Solutions**: Benefit from our paid compute services starting at just $0.60/hour for GPU usage, along with dedicated enterprise-grade support.

## Company Culture
At Hugging Face, we celebrate diversity, encourage experimental and innovative thinking, and prioritize responsibility and ethical considerations in AI development. Our open-source philosophy empowers community members to contribute actively, creating a supportive and energizing workplace rooted in collaboration.

## Careers at Hugging Face
We are always on the lookout for passionate individuals to join our team! Whether you're a developer, researcher, or business professional, Hugging Face offers a range of exciting career opportunities in an inclusive environment that values creativity and growth.

### Current Open Positions
- Software Engineers
- Data Scientists
- Community Managers
- Marketing Professionals

Join us in shaping the future of AI and be a part of a movement that is transforming industries worldwide!

## Connect With Us
- [Website](https://huggingface.co)
- [Twitter](https://twitter.com/huggingface)
- [LinkedIn](https://www.linkedin.com/company/huggingface)

---

*Join the AI revolution with Hugging Face – where the machine learning community collaborates to create impactful technology!*



In [31]:
# Try changing the system prompt to the humorous version when you make the Brochure for Hugging Face:

stream_brochure("HuggingFace", "https://huggingface.co")

Found links: {'links': [{'type': 'about page', 'url': 'https://huggingface.co/huggingface'}, {'type': 'careers page', 'url': 'https://apply.workable.com/huggingface/'}, {'type': 'company page', 'url': 'https://huggingface.co/enterprise'}, {'type': 'blog page', 'url': 'https://huggingface.co/blog'}, {'type': 'community page', 'url': 'https://discuss.huggingface.co'}, {'type': 'models page', 'url': 'https://huggingface.co/models'}, {'type': 'datasets page', 'url': 'https://huggingface.co/datasets'}, {'type': 'spaces page', 'url': 'https://huggingface.co/spaces'}]}


# Hugging Face Brochure

## Welcome to Hugging Face
**Tagline**: The AI community building the future.

At Hugging Face, we strive to foster innovation in the artificial intelligence and machine learning domains. Our platform populates the ecosystem with tools, models, datasets, and a vibrant community, enabling users to create breakthrough applications. 

## 🌐 Our Offerings
- **Models**: Access and collaborate on over **1 million+ models** across various domains.
- **Datasets**: Browse and utilize **250k+ datasets** for a wide array of machine learning tasks.
- **Spaces**: Discover over **400k applications** that utilize our models and datasets, including tools for video transformation, audio, and coding.
- **Enterprise Solutions**: Tailored offerings including optimized inference endpoints and enterprise-grade security for over **50,000 organizations**.

## 🎉 Company Culture
Hugging Face is deeply committed to open-source principles and community collaboration. Our team thrives on innovation, knowledge-sharing, and inclusivity. We believe that by "creating, discovering, and collaborating on ML better," we can collectively accelerate advancements in the AI space.

### Core Values
- **Community-Driven**: Engaging with users and contributors to improve tools and resources collaboratively.
- **Open-Source Commitment**: Pushing the boundaries of machine learning through transparency and accessibility.
- **Learning & Growth**: Encouraging the continuous learning journey for all members through shared resources, documentation, and support.

## 👥 Our Customers
We're proud to serve a variety of entities:
- Tech giants like **Microsoft**, **Google**, **Amazon**, and **Meta** utilize our platform.
- Organizations from diverse industries harness our tools to develop innovative AI solutions, ensuring their methodologies align with cutting-edge research.

## 💼 Careers at Hugging Face
Join us in building the future of AI! We are on the lookout for passionate and talented individuals willing to contribute to our vision. 

### Opportunities
- **Open Positions**: Explore roles across various teams, from engineering and data science to community management and product development.
- **Culture of Learning**: Enjoy a workplace that promotes continuous professional development and encourages collaboration.

### Why Work With Us?
- Be a part of an impactful community dedicated to pushing AI technology forward.
- Work in an environment that values diversity, creativity, and collaborative problem-solving.
- Flexible working conditions that support a healthy work-life balance.

## Connect with Us
Join the AI revolution! Sign up on our platform [here](https://huggingface.co) and explore the vast resources we offer. Follow us on our social media accounts or reach out to us at our community forums for support, updates, and collaboration opportunities.

---

Whether you are looking to improve your AI capabilities, find your next big opportunity, or contribute to a collaborative community, Hugging Face is your partner in progress. Let's build the future together! 🌟