In [1]:
import os
import json
from dotenv import load_dotenv
from IPython.display import Markdown, display, update_display
from scraper import fetch_website_links, fetch_website_contents
from openai import OpenAI


In [2]:
# Initialize and constants

load_dotenv(override=True)
api_key = os.getenv('OPENAI_API_KEY')

if api_key and api_key.startswith('sk-proj-') and len(api_key)>10:
    print("API key looks good so far")
else:
    print("There might be a problem with your API key? Please visit the troubleshooting notebook!")
    
MODEL = 'gpt-5-nano'
openai = OpenAI()

API key looks good so far


In [3]:
link_system_prompt = """
You are provided with a list of links found on a webpage.
You are able to decide which of the links would be most relevant to include in a brochure about the company,
such as links to an About page, or a Company page, or Careers/Jobs pages.
You should respond in JSON as in this example:

{
    "links": [
        {"type": "about page", "url": "https://full.url/goes/here/about"},
        {"type": "careers page", "url": "https://another.full.url/careers"}
    ]
}
"""

In [4]:
def get_links_user_prompt(url):
    user_prompt = f"""
Here is the list of links on the website {url} -
Please decide which of these are relevant web links for a brochure about the company, 
respond with the full https URL in JSON format.
Do not include Terms of Service, Privacy, email links.

Links (some might be relative links):

"""
    links = fetch_website_links(url)
    user_prompt += "\n".join(links)
    return user_prompt

In [5]:
print(get_links_user_prompt("https://edwarddonner.com"))


Here is the list of links on the website https://edwarddonner.com -
Please decide which of these are relevant web links for a brochure about the company, 
respond with the full https URL in JSON format.
Do not include Terms of Service, Privacy, email links.

Links (some might be relative links):

https://edwarddonner.com/
https://edwarddonner.com/curriculum/
https://edwarddonner.com/proficient/
https://edwarddonner.com/connect-four/
https://edwarddonner.com/outsmart/
https://edwarddonner.com/about-me-and-about-nebula/
https://edwarddonner.com/posts/
https://edwarddonner.com/
https://news.ycombinator.com
https://nebula.io/?utm_source=ed&utm_medium=referral
https://www.prnewswire.com/news-releases/wynden-stark-group-acquires-nyc-venture-backed-tech-startup-untapt-301269512.html
https://edwarddonner.com/curriculum/
https://edwarddonner.com/2026/01/04/ai-builder-with-n8n-create-agents-and-voice-agents/
https://edwarddonner.com/2026/01/04/ai-builder-with-n8n-create-agents-and-voice-agents/

In [6]:
def select_relevant_links(url):
    response = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": link_system_prompt},
            {"role": "user", "content": get_links_user_prompt(url)}
        ],
        response_format={"type": "json_object"}
    )
    result = response.choices[0].message.content
    links = json.loads(result)
    return links
    

In [7]:
select_relevant_links("https://edwarddonner.com")

{'links': [{'type': 'home page', 'url': 'https://edwarddonner.com/'},
  {'type': 'about page',
   'url': 'https://edwarddonner.com/about-me-and-about-nebula/'},
  {'type': 'curriculum page', 'url': 'https://edwarddonner.com/curriculum/'},
  {'type': 'proficient page', 'url': 'https://edwarddonner.com/proficient/'},
  {'type': 'connect-four page',
   'url': 'https://edwarddonner.com/connect-four/'},
  {'type': 'outsmart page', 'url': 'https://edwarddonner.com/outsmart/'},
  {'type': 'blog page', 'url': 'https://edwarddonner.com/posts/'},
  {'type': 'external product page',
   'url': 'https://nebula.io/?utm_source=ed&utm_medium=referral'},
  {'type': 'linkedin', 'url': 'https://www.linkedin.com/in/eddonner/'},
  {'type': 'twitter', 'url': 'https://twitter.com/edwarddonner'},
  {'type': 'facebook', 'url': 'https://www.facebook.com/edward.donner.52'}]}

In [8]:
def select_relevant_links(url):
    print(f"Selecting relevant links for {url} by calling {MODEL}")
    response = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": link_system_prompt},
            {"role": "user", "content": get_links_user_prompt(url)}
        ],
        response_format={"type": "json_object"}
    )
    result = response.choices[0].message.content
    links = json.loads(result)
    print(f"Found {len(links['links'])} relevant links")
    return links

In [9]:
select_relevant_links("https://edwarddonner.com")

Selecting relevant links for https://edwarddonner.com by calling gpt-5-nano
Found 6 relevant links


{'links': [{'type': 'company homepage', 'url': 'https://edwarddonner.com/'},
  {'type': 'about page',
   'url': 'https://edwarddonner.com/about-me-and-about-nebula/'},
  {'type': 'blog page', 'url': 'https://edwarddonner.com/posts/'},
  {'type': 'LinkedIn', 'url': 'https://www.linkedin.com/in/eddonner/'},
  {'type': 'Twitter', 'url': 'https://twitter.com/edwarddonner'},
  {'type': 'Facebook', 'url': 'https://www.facebook.com/edward.donner.52'}]}

In [10]:
def fetch_page_and_all_relevant_links(url):
    contents = fetch_website_contents(url)
    relevant_links = select_relevant_links(url)
    result = f"## Landing Page:\n\n{contents}\n## Relevant Links:\n"
    for link in relevant_links['links']:
        result += f"\n\n### Link: {link['type']}\n"
        result += fetch_website_contents(link["url"])
    return result

In [11]:
print(fetch_page_and_all_relevant_links("https://huggingface.co"))

Selecting relevant links for https://huggingface.co by calling gpt-5-nano
Found 10 relevant links


Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


## Landing Page:

Hugging Face â€“ The AI community building the future.

Hugging Face
Models
Datasets
Spaces
Community
Docs
Enterprise
Pricing
Log In
Sign Up
The AI community building the future.
The platform where the machine learning community collaborates on models, datasets, and applications.
Explore AI Apps
or
Browse 2M+ models
Trending on
this week
Models
nvidia/personaplex-7b-v1
Updated
6 days ago
â€¢
43.9k
â€¢
1.36k
moonshotai/Kimi-K2.5
Updated
about 20 hours ago
â€¢
11k
â€¢
879
microsoft/VibeVoice-ASR
Updated
1 day ago
â€¢
76.7k
â€¢
694
Qwen/Qwen3-TTS-12Hz-1.7B-CustomVoice
Updated
6 days ago
â€¢
139k
â€¢
693
Tongyi-MAI/Z-Image
Updated
about 8 hours ago
â€¢
567
Browse 2M+ models
Spaces
Running
on
Zero
Featured
931
Qwen3-TTS Demo
ðŸŽ™
931
Transform text into natural-sounding speech with custom voices
Running
on
Zero
MCP
1.86k
Z Image Turbo
ðŸ–¼
1.86k
Generate stunning AI images from text descriptions in seconds
Running
on
Zero
Featured
1.22k
Qwen Image Multiple Angles 3D Camera

In [12]:
brochure_system_prompt = """
You are an assistant that analyzes the contents of several relevant pages from a company website
and creates a short brochure about the company for prospective customers, investors and recruits.
Respond in markdown without code blocks.
Include details of company culture, customers and careers/jobs if you have the information.
"""

# Or uncomment the lines below for a more humorous brochure - this demonstrates how easy it is to incorporate 'tone':

# brochure_system_prompt = """
# You are an assistant that analyzes the contents of several relevant pages from a company website
# and creates a short, humorous, entertaining, witty brochure about the company for prospective customers, investors and recruits.
# Respond in markdown without code blocks.
# Include details of company culture, customers and careers/jobs if you have the information.
# """


In [13]:
def get_brochure_user_prompt(company_name, url):
    user_prompt = f"""
You are looking at a company called: {company_name}
Here are the contents of its landing page and other relevant pages;
use this information to build a short brochure of the company in markdown without code blocks.\n\n
"""
    user_prompt += fetch_page_and_all_relevant_links(url)
    user_prompt = user_prompt[:5_000] # Truncate if more than 5,000 characters
    return user_prompt

In [14]:
get_brochure_user_prompt("HuggingFace", "https://huggingface.co")

Selecting relevant links for https://huggingface.co by calling gpt-5-nano
Found 8 relevant links


'\nYou are looking at a company called: HuggingFace\nHere are the contents of its landing page and other relevant pages;\nuse this information to build a short brochure of the company in markdown without code blocks.\n\n\n## Landing Page:\n\nHugging Face â€“ The AI community building the future.\n\nHugging Face\nModels\nDatasets\nSpaces\nCommunity\nDocs\nEnterprise\nPricing\nLog In\nSign Up\nThe AI community building the future.\nThe platform where the machine learning community collaborates on models, datasets, and applications.\nExplore AI Apps\nor\nBrowse 2M+ models\nTrending on\nthis week\nModels\nnvidia/personaplex-7b-v1\nUpdated\n6 days ago\nâ€¢\n43.9k\nâ€¢\n1.36k\nmoonshotai/Kimi-K2.5\nUpdated\nabout 20 hours ago\nâ€¢\n11k\nâ€¢\n879\nmicrosoft/VibeVoice-ASR\nUpdated\n1 day ago\nâ€¢\n76.7k\nâ€¢\n694\nQwen/Qwen3-TTS-12Hz-1.7B-CustomVoice\nUpdated\n6 days ago\nâ€¢\n139k\nâ€¢\n693\nTongyi-MAI/Z-Image\nUpdated\nabout 8 hours ago\nâ€¢\n567\nBrowse 2M+ models\nSpaces\nRunning\non\nZero

In [15]:
def create_brochure(company_name, url):
    response = openai.chat.completions.create(
        model="gpt-4.1-mini",
        messages=[
            {"role": "system", "content": brochure_system_prompt},
            {"role": "user", "content": get_brochure_user_prompt(company_name, url)}
        ],
    )
    result = response.choices[0].message.content
    display(Markdown(result))

In [16]:
create_brochure("HuggingFace", "https://huggingface.co")

Selecting relevant links for https://huggingface.co by calling gpt-5-nano
Found 7 relevant links


# Hugging Face Brochure

---

## About Hugging Face

**Hugging Face** is the premier AI community that is building the future of machine learning. It serves as a vibrant collaboration platform where a global community of machine learning engineers, scientists, and AI enthusiasts come together to create, share, and advance models, datasets, and applications.

Hugging Face powers an ecosystem that facilitates openness, creativity, and ethical AI development. It has become the home of machine learning by providing tools and resources to explore, discover, and build across advanced AI modalities such as text, image, video, audio, and 3D.

---

## What Hugging Face Offers

- **Live Collaboration:** Host and collaborate on unlimited public models, datasets, and AI applications.
- **Massive Resources:** Access to over 2 million open-source ML models and 500,000+ high-quality datasets.
- **Spaces:** A platform to host AI demos and applications with easy deployment and sharing.
- **Multi-Modal AI:** Explore and work with diverse data and ML models covering text, images, video, speech, and 3D.
- **Open-Source Stack:** Utilize cutting-edge open-source ML libraries empowering faster development and experimentation.
- **Community & Learning:** Build your machine learning portfolio by sharing your work and learning from thousands of contributors worldwide.

---

## Community & Customers

Hugging Face serves a diverse set of users including AI researchers, developers, startups, enterprises, and academic institutions. Some of the most popular models come from collaborators and companies like NVIDIA, Microsoft, Alibaba, Facebook, and many AI startups.

The community is fast-growing and highly active, enabling rapid innovation and sharing of AI breakthroughs. Users benefit from a public hub that makes AI accessible and ethical by design.

---

## Careers and Culture

Hugging Face is more than a companyâ€”it's a global community dedicated to open collaboration and ethical AI. The company embraces:

- **Transparency:** Open sharing of code, models, and research.
- **Innovation:** Constant evolution in AI capabilities and modalities.
- **Ethics:** Commitment to building AI that benefits society responsibly.
- **Diversity:** Welcoming talent from all backgrounds who are passionate about AI.

Careers at Hugging Face focus on growing the open AI ecosystem, working alongside passionate experts, and contributing to making AI accessible to the world. Opportunities often arise for ML engineers, community managers, research scientists, and developer advocates eager to push AI frontiers.

---

## Join the Future of AI

Whether you are an individual contributor, a startup, or an enterprise, Hugging Face provides:

- End-to-end tools and community support to accelerate your AI projects.
- An open platform to showcase your ML work and gain recognition.
- Access to cutting-edge models and datasets for research and production.

Explore the future of machine learning with Hugging Face today and become part of the AI revolution.

**Visit:** https://huggingface.co  
**Build, share and collaborate on AI with the world's leading community.**

---

## Brand Highlights

- Recognizable brand colors: vivid yellow (#FFD21E), orange (#FF9D00), and cool gray (#6B7280).
- Friendly and approachable brand personality embracing openness and technical excellence.

---

Hugging Face | The AI community building the future.