In [3]:
import os
import json
from dotenv import load_dotenv
from scraper import fetch_website_links, fetch_website_contents
from openai import OpenAI
from IPython.display import Markdown, display, update_display



In [None]:
links = fetch_website_links("https://edwarddonner.com")
links

In [4]:
load_dotenv(override=True)
api_key = os.getenv('OPENAI_API_KEY')

if api_key and api_key.startswith('sk-proj-') and len(api_key)>10:
    print("API key looks good so far")
else:
    print("There might be a problem with your API key? Please visit the troubleshooting notebook!")
    
MODEL = 'gpt-5-nano'
openai = OpenAI()

API key looks good so far


In [5]:
link_system_prompt = """
You are provided with a list of links found on a webpage.
You are able to decide which of the links would be most relevant to include in a brochure about the company,
such as links to an About page, or a Company page, or Careers/Jobs pages.
You should respond in JSON as in this example:

{
    "links": [
        {"type": "about page", "url": "https://full.url/goes/here/about"},
        {"type": "careers page", "url": "https://another.full.url/careers"}
    ]
}
"""
link_system_prompt

'\nYou are provided with a list of links found on a webpage.\nYou are able to decide which of the links would be most relevant to include in a brochure about the company,\nsuch as links to an About page, or a Company page, or Careers/Jobs pages.\nYou should respond in JSON as in this example:\n\n{\n    "links": [\n        {"type": "about page", "url": "https://full.url/goes/here/about"},\n        {"type": "careers page", "url": "https://another.full.url/careers"}\n    ]\n}\n'

In [6]:
def get_links_user_prompt(url):
    user_prompt = f"""
Here is the list of links on the website {url} -
Please decide which of these are relevant web links for a brochure about the company, 
respond with the full https URL in JSON format.
Do not include Terms of Service, Privacy, email links.

Links (some might be relative links):

"""
    links = fetch_website_links(url)
    user_prompt += "\n".join(links)
    return user_prompt

In [7]:
print(get_links_user_prompt("https://edwarddonner.com"))


Here is the list of links on the website https://edwarddonner.com -
Please decide which of these are relevant web links for a brochure about the company, 
respond with the full https URL in JSON format.
Do not include Terms of Service, Privacy, email links.

Links (some might be relative links):

https://edwarddonner.com/
https://edwarddonner.com/connect-four/
https://edwarddonner.com/outsmart/
https://edwarddonner.com/about-me-and-about-nebula/
https://edwarddonner.com/posts/
https://edwarddonner.com/
https://news.ycombinator.com
https://nebula.io/?utm_source=ed&utm_medium=referral
https://www.prnewswire.com/news-releases/wynden-stark-group-acquires-nyc-venture-backed-tech-startup-untapt-301269512.html
https://patents.google.com/patent/US20210049536A1/
https://www.linkedin.com/in/eddonner/
https://edwarddonner.com/2025/11/11/ai-live-event/
https://edwarddonner.com/2025/11/11/ai-live-event/
https://edwarddonner.com/2025/09/15/ai-in-production-gen-ai-and-agentic-ai-on-aws-at-scale/
htt

In [24]:
def select_relevant_links(url):
    response = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system","content": link_system_prompt},
            {"role": "user","content": get_links_user_prompt(url)}
        ],
        response_format={"type":"json_object"}
    )
    result = response.choices[0].message.content
    links = json.loads(result)
    print(f"Found {len(links['links'])} relevant links")
    return links

In [21]:
select_relevant_links("https://edwarddonner.com")

Found 8 relevant links


{'links': [{'type': 'homepage', 'url': 'https://edwarddonner.com/'},
  {'type': 'about page',
   'url': 'https://edwarddonner.com/about-me-and-about-nebula/'},
  {'type': 'project page', 'url': 'https://edwarddonner.com/connect-four/'},
  {'type': 'project page', 'url': 'https://edwarddonner.com/outsmart/'},
  {'type': 'blog page', 'url': 'https://edwarddonner.com/posts/'},
  {'type': 'LinkedIn page', 'url': 'https://www.linkedin.com/in/eddonner/'},
  {'type': 'Twitter page', 'url': 'https://twitter.com/edwarddonner'},
  {'type': 'Facebook page',
   'url': 'https://www.facebook.com/edward.donner.52'}]}

In [22]:
select_relevant_links("https://huggingface.co")

Found 9 relevant links


{'links': [{'type': 'homepage', 'url': 'https://huggingface.co/'},
  {'type': 'brand page', 'url': 'https://huggingface.co/brand'},
  {'type': 'enterprise page', 'url': 'https://huggingface.co/enterprise'},
  {'type': 'careers page', 'url': 'https://apply.workable.com/huggingface/'},
  {'type': 'GitHub', 'url': 'https://github.com/huggingface'},
  {'type': 'LinkedIn', 'url': 'https://www.linkedin.com/company/huggingface/'},
  {'type': 'Twitter', 'url': 'https://twitter.com/huggingface'},
  {'type': 'Community / Discuss', 'url': 'https://discuss.huggingface.co'},
  {'type': 'Status', 'url': 'https://status.huggingface.co/'}]}

In [None]:
def fetch_page_and_all_relevant_links(url):
    contents = fetch_website_contents(url)
    relevant_links = select_relevant_links(url)
    result = f"## Landing Page:\n\n{contents}\n## Relevant Links:\n"
    for link in relevant_links['links']:
        result += f"\n\n### Link: {link['type']}\n"
        result += (link["url"])
    return result

In [26]:
print(fetch_page_and_all_relevant_links("https://huggingface.co"))

Found 13 relevant links
## Landing Page:

Hugging Face ‚Äì The AI community building the future.

Hugging Face
Models
Datasets
Spaces
Community
Docs
Enterprise
Pricing
Log In
Sign Up
The AI community building the future.
The platform where the machine learning community collaborates on models, datasets, and applications.
Explore AI Apps
or
Browse 2M+ models
Trending on
this week
Models
zai-org/GLM-4.7
Updated
4 days ago
‚Ä¢
15.8k
‚Ä¢
1.03k
Qwen/Qwen-Image-Layered
Updated
8 days ago
‚Ä¢
14.9k
‚Ä¢
790
Qwen/Qwen-Image-Edit-2511
Updated
4 days ago
‚Ä¢
14.5k
‚Ä¢
439
MiniMaxAI/MiniMax-M2.1
Updated
1 day ago
‚Ä¢
15.9k
‚Ä¢
431
google/functiongemma-270m-it
Updated
9 days ago
‚Ä¢
33.6k
‚Ä¢
646
Browse 2M+ models
Spaces
Running
on
Zero
Featured
578
TRELLIS.2
üè¢
578
High-fidelity 3D Generation from images
Running
on
Zero
Featured
290
Qwen Image Layered
üöÄ
290
Decompose an image into layers and export as PPTX or ZIP
Running
Featured
2.96k
Wan2.2 Animate
üëÅ
2.96k
Wan2.2 Animate
Running
on
CPU U

In [27]:
brochure_system_prompt = """
You are an assistant that analyzes the contents of several relevant pages from a company website
and creates a short brochure about the company for prospective customers, investors and recruits.
Respond in markdown without code blocks.
Include details of company culture, customers and careers/jobs if you have the information.
"""

In [28]:
def get_brochure_user_prompt(company_name, url):
    user_prompt = f"""
You are looking at a company called: {company_name}
Here are the contents of its landing page and other relevant pages;
use this information to build a short brochure of the company in markdown without code blocks.\n\n
"""
    user_prompt += fetch_page_and_all_relevant_links(url)
    user_prompt = user_prompt[:5_000] # Truncate if more than 5,000 characters
    return user_prompt

In [29]:
get_brochure_user_prompt("HuggingFace", "https://huggingface.co")

Found 8 relevant links


'\nYou are looking at a company called: HuggingFace\nHere are the contents of its landing page and other relevant pages;\nuse this information to build a short brochure of the company in markdown without code blocks.\n\n\n## Landing Page:\n\nHugging Face ‚Äì The AI community building the future.\n\nHugging Face\nModels\nDatasets\nSpaces\nCommunity\nDocs\nEnterprise\nPricing\nLog In\nSign Up\nThe AI community building the future.\nThe platform where the machine learning community collaborates on models, datasets, and applications.\nExplore AI Apps\nor\nBrowse 2M+ models\nTrending on\nthis week\nModels\nzai-org/GLM-4.7\nUpdated\n4 days ago\n‚Ä¢\n15.8k\n‚Ä¢\n1.03k\nQwen/Qwen-Image-Layered\nUpdated\n8 days ago\n‚Ä¢\n14.9k\n‚Ä¢\n790\nQwen/Qwen-Image-Edit-2511\nUpdated\n4 days ago\n‚Ä¢\n14.5k\n‚Ä¢\n439\nMiniMaxAI/MiniMax-M2.1\nUpdated\n1 day ago\n‚Ä¢\n15.9k\n‚Ä¢\n432\ngoogle/functiongemma-270m-it\nUpdated\n9 days ago\n‚Ä¢\n33.6k\n‚Ä¢\n646\nBrowse 2M+ models\nSpaces\nRunning\non\nZero\nFeatur

In [33]:
def create_brochure(company_name,url):
    response = openai.chat.completions.create(
        model="gpt-4.1-mini",
        messages=[
            {"role": "system", "content": brochure_system_prompt},
            {"role": "user", "content": get_brochure_user_prompt(company_name, url)}
        ],
    )
    result = response.choices[0].message.content
    display(Markdown(result))

In [34]:
create_brochure("HuggingFace", "https://huggingface.co")

Found 10 relevant links


# Hugging Face Brochure

---

## About Hugging Face

Hugging Face is the AI community building the future of machine learning. It serves as a central collaboration platform where developers, researchers, and organizations come together to build, share, and innovate AI models, datasets, and applications. With a fast-growing global community, Hugging Face is at the heart of the AI revolution, fostering an open, ethical, and collaborative environment for machine learning enthusiasts and professionals.

- **Mission:** Empower the next generation of machine learning engineers, scientists, and users to learn, collaborate, and share their work.
- **Platform:** Hosts over 2 million models, 500k+ datasets, and 1 million+ AI applications across modalities like text, image, video, audio, and 3D.

---

## Platform Highlights

- **Collaboration Hub:** Unlimited hosting and collaboration on public models, datasets, and apps.
- **Multi-Modal AI:** Supports diverse data types‚Äîtext, images, audio, video, and 3D content.
- **Community-Powered:** Explore trending models, datasets, and apps with strong user engagement and feedback.
- **Open-Source Focus:** Provides a rich open-source stack that accelerates development and experimentation.
- **Portfolio Building:** Developers can share their work and build recognized machine learning profiles.

---

## Enterprise Solutions

Hugging Face offers tailored solutions for businesses looking to scale their AI capabilities securely and efficiently:

- **Team & Enterprise Plans:** Starting at $20/user/month with flexible contract options for larger organizations.
- **Enterprise-Grade Security:** Includes Single Sign-On (SSO), audit logs, granular access control, and advanced token management.
- **Private Data Support:** Private storage (1 TB/member), private dataset viewers, and secure resource management.
- **Advanced Compute:** Scalable compute options including ZeroGPU upgrades with significantly increased quota.
- **Analytics & Monitoring:** Centralized dashboards provide insight into usage, spending, and repository activity.
- **Dedicated Support:** Enterprise customers gain access to expert support and enhanced infrastructure.

---

## Company Culture

Hugging Face fosters an inclusive, open, and progressive culture centered around:

- **Community Collaboration:** Encouraging sharing and contribution to open-source AI for widespread impact.
- **Innovation at the Edge:** Leveraging talented scientists and engineers who explore and push the boundaries of AI.
- **Transparency & Ethics:** Building an ethical AI future by promoting open and accessible machine learning research and tools.
- **Learning & Growth:** Supporting continuous learning through documentation, forums, tutorials, and active community engagement.

---

## Customers & Community

Hugging Face‚Äôs platform serves:

- Individual machine learning enthusiasts and researchers.
- Academic institutions and scientific communities.
- Startups and large enterprises across industries looking to implement AI.
- Developers seeking to build portfolios and share their AI projects.
- Organizations requiring scalable, secure collaboration on AI models and datasets.

---

## Careers at Hugging Face

Join a forward-thinking AI company shaping the future of technology. Hugging Face offers roles in machine learning research, software engineering, product development, and community management. Working here means contributing to cutting-edge AI projects, collaborating with top-tier talent worldwide, and supporting an ethical AI ecosystem.

- **Why Work Here?**
  - Impactful projects at the forefront of AI innovation.
  - Open, collaborative company culture.
  - Opportunities for professional growth and development.
  - Work alongside a vibrant and diverse global AI community.

Explore open positions on the Hugging Face [Careers Page](https://huggingface.co/careers).

---

## Get Started

- Discover and collaborate on AI models: [huggingface.co/models](https://huggingface.co/models)
- Explore datasets and applications: [huggingface.co/datasets](https://huggingface.co/datasets)
- Try Spaces, hosting ML demos and apps: [huggingface.co/spaces](https://huggingface.co/spaces)
- Join the community: forums, Discord, GitHub, and more.
- Enterprise inquiries and solutions: [huggingface.co/enterprise](https://huggingface.co/enterprise)

---

**Hugging Face**  
*The AI community building the future.*  
Learn more at [huggingface.co](https://huggingface.co)  
Connect on Twitter | LinkedIn | GitHub | Discord

---

*Colors inspired by brand identity:* #FFD21E, #FF9D00, #6B7280

In [35]:
def stream_brochure(company_name,url):
    stream = openai.chat.completions.create(
        model="gpt-4.1-mini",
        messages=[
            {"role": "system", "content": brochure_system_prompt},
            {"role": "user", "content": get_brochure_user_prompt(company_name, url)}
        ],
        stream=True
    )
    response = ""
    display_handle = display(Markdown(""),display_id=True)
    for chunk in stream:
        response += chunk.choices[0].delta.content or ''
        update_display(Markdown(response), display_id=display_handle.display_id)

In [36]:
stream_brochure("HuggingFace", "https://huggingface.co")

Found 12 relevant links


# Hugging Face Brochure

---

## About Hugging Face

Hugging Face is the AI community building the future of machine learning. It serves as a dynamic collaboration platform where machine learning engineers, scientists, and enthusiasts connect to share, explore, and build state-of-the-art models, datasets, and AI applications. With a mission to democratize good machine learning, Hugging Face fosters open and ethical AI development, empowering the next generation of AI practitioners to innovate and collaborate on a global scale.

---

## What We Offer

- **Hugging Face Hub:** A central repository hosting over 2 million models and 500,000+ datasets spanning all modalities ‚Äî text, image, video, audio, and 3D.
- **Spaces:** Deploy and showcase interactive ML applications on a free-to-use platform with a rich community presence.
- **Open Source Stack:** Accelerate ML development using some of the most widely adopted open-source machine learning libraries and tools.
- **Enterprise Solutions:** Dedicated compute, security, access controls, and support tailored for teams and organizations to build AI with confidence.
- **Community & Collaboration:** A vibrant ecosystem actively contributing research papers, custom benchmarks, and tools that push the frontier of AI.

---

## Company Culture

Hugging Face embraces openness, collaboration, and continuous learning with a strong community spirit. The company values transparency, ethical AI development, and fostering inclusivity, inviting AI enthusiasts and experts alike to contribute to the collective progress. Their team of nearly 200 talented individuals is continuously growing, united by the mission to enable access to powerful machine learning resources globally ‚Äî "one commit at a time."

---

## Our Customers & Users

Hugging Face‚Äôs platform serves a wide array of users including:

- AI researchers and developers leveraging open-source models.
- Enterprises needing scalable and secure AI infrastructure.
- Data scientists seeking diverse datasets for experimentation.
- Educators and learners building AI skills through practical projects.
- Innovators deploying applications that solve real-world problems using machine learning.

Leading organizations, research groups, and independent developers all benefit from Hugging Face's comprehensive AI ecosystem.

---

## Careers at Hugging Face

If you are passionate about democratizing AI and influencing the future of machine learning, Hugging Face invites you to join their mission-driven team. The company offers opportunities to work on cutting-edge AI technologies alongside top researchers and contributors in a supportive, open, and purpose-driven environment.

Explore roles in research, engineering, community management, product development, and more. By joining Hugging Face, you become part of an innovative community shaping the AI revolution for collective benefit.

---

## Connect With Us

- Website: [huggingface.co](https://huggingface.co)
- GitHub, Twitter, LinkedIn, Discord communities
- Active publication and blog presence on the latest ML trends and research

Discover, build, and share the future of AI ‚Äî with Hugging Face.

---

## Brand Colors

- Yellow: #FFD21E  
- Orange: #FF9D00  
- Gray: #6B7280

---

Hugging Face: The home of machine learning collaboration and innovation. Join us to accelerate AI for everyone.