## A full Busines Solution

In [1]:

import os
import requests
import json
from typing import List
from dotenv import load_dotenv
from bs4 import BeautifulSoup
from IPython.display import Markdown, display, update_display
from openai import OpenAI

In [2]:
load_dotenv(override=True)
api_key = os.getenv('OPENAI_API_KEY')

if api_key and api_key.startswith('sk-proj-') and len(api_key)>10:
    print("API key looks good so far")
else:
    print("There might be a problem with your API key? Please visit the troubleshooting notebook!")
    
MODEL = 'gpt-4o-mini'
openai = OpenAI()

API key looks good so far


In [3]:
headers = {
 "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
}

class Website:
    """
    A utility class to represent a Website that we have scraped, now with links
    """

    def __init__(self, url):
        self.url = url
        response = requests.get(url, headers=headers)
        self.body = response.content
        soup = BeautifulSoup(self.body, 'html.parser')
        self.title = soup.title.string if soup.title else "No title found"
        if soup.body:
            for irrelevant in soup.body(["script", "style", "img", "input"]):
                irrelevant.decompose()
            self.text = soup.body.get_text(separator="\n", strip=True)
        else:
            self.text = ""
        links = [link.get('href') for link in soup.find_all('a')]
        self.links = [link for link in links if link]

    def get_contents(self):
        return f"Webpage Title:\n{self.title}\nWebpage Contents:\n{self.text}\n\n"

In [4]:
ed = Website("https://edwarddonner.com")
ed.links

['https://edwarddonner.com/',
 'https://edwarddonner.com/connect-four/',
 'https://edwarddonner.com/outsmart/',
 'https://edwarddonner.com/about-me-and-about-nebula/',
 'https://edwarddonner.com/posts/',
 'https://edwarddonner.com/',
 'https://news.ycombinator.com',
 'https://nebula.io/?utm_source=ed&utm_medium=referral',
 'https://www.prnewswire.com/news-releases/wynden-stark-group-acquires-nyc-venture-backed-tech-startup-untapt-301269512.html',
 'https://patents.google.com/patent/US20210049536A1/',
 'https://www.linkedin.com/in/eddonner/',
 'https://edwarddonner.com/2025/05/28/connecting-my-courses-become-an-llm-expert-and-leader/',
 'https://edwarddonner.com/2025/05/28/connecting-my-courses-become-an-llm-expert-and-leader/',
 'https://edwarddonner.com/2025/05/18/2025-ai-executive-briefing/',
 'https://edwarddonner.com/2025/05/18/2025-ai-executive-briefing/',
 'https://edwarddonner.com/2025/04/21/the-complete-agentic-ai-engineering-course/',
 'https://edwarddonner.com/2025/04/21/the-

In [5]:
link_system_prompt = "You are provided with a list of links found on a webpage. \
You are able to decide which of the links would be most relevant to include in a brochure about the company, \
such as links to an About page, or a Company page, or Careers/Jobs pages.\n"
link_system_prompt += "You should respond in JSON as in this example:"
link_system_prompt += """
{
    "links": [
        {"type": "about page", "url": "https://full.url/goes/here/about"},
        {"type": "careers page", "url": "https://another.full.url/careers"}
    ]
}
"""

In [6]:
def get_links_user_prompt(website):
    user_prompt = f"Here is the list of links on the website of {website.url} - "
    user_prompt += "please decide which of these are relevant web links for a brochure about the company, respond with the full https URL in JSON format. \
Do not include Terms of Service, Privacy, email links.\n"
    user_prompt += "Links (some might be relative links):\n"
    user_prompt += "\n".join(website.links)
    return user_prompt

In [8]:
def get_links(url):
    website = Website(url)
    response = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": link_system_prompt},
            {"role": "user", "content": get_links_user_prompt(website)}
      ],
        response_format={"type": "json_object"}
    )
    result = response.choices[0].message.content
    return json.loads(result)

In [9]:
huggingface = Website("https://huggingface.co")
huggingface.links

['/',
 '/models',
 '/datasets',
 '/spaces',
 '/docs',
 '/enterprise',
 '/pricing',
 '/login',
 '/join',
 'inference/get-started',
 '/spaces',
 '/models',
 '/zai-org/GLM-4.5',
 '/black-forest-labs/FLUX.1-Krea-dev',
 '/Qwen/Qwen3-30B-A3B-Instruct-2507',
 '/Qwen/Qwen3-Coder-30B-A3B-Instruct',
 '/tencent/Hunyuan-1.8B-Instruct',
 '/models',
 '/spaces/enzostvs/deepsite',
 '/spaces/Qwen/Qwen3-Coder-WebDev',
 '/spaces/Wan-AI/Wan-2.2-5B',
 '/spaces/zumjoy/Multi-Style_Video-to-Anime_Generator',
 '/spaces/smola/higgs_audio_v2',
 '/spaces',
 '/datasets/spatialverse/InteriorGS',
 '/datasets/fka/awesome-chatgpt-prompts',
 '/datasets/nvidia/Nemotron-Post-Training-Dataset-v1',
 '/datasets/MegaScience/MegaScience',
 '/datasets/UCSC-VLAA/GPT-Image-Edit-1.5M',
 '/datasets',
 '/join',
 '/pricing#endpoints',
 '/pricing#spaces',
 '/pricing',
 '/enterprise',
 '/enterprise',
 '/enterprise',
 '/enterprise',
 '/enterprise',
 '/enterprise',
 '/enterprise',
 '/allenai',
 '/facebook',
 '/amazon',
 '/google',
 '/In

In [10]:
get_links("https://huggingface.co")

{'links': [{'type': 'about page', 'url': 'https://huggingface.co/huggingface'},
  {'type': 'careers page', 'url': 'https://apply.workable.com/huggingface/'},
  {'type': 'blog page', 'url': 'https://huggingface.co/blog'},
  {'type': 'company page',
   'url': 'https://www.linkedin.com/company/huggingface/'},
  {'type': 'enterprise page', 'url': 'https://huggingface.co/enterprise'},
  {'type': 'pricing page', 'url': 'https://huggingface.co/pricing'},
  {'type': 'community discussion page',
   'url': 'https://discuss.huggingface.co'}]}

In [11]:
def get_all_details(url):
    result = "Landing page:\n"
    result += Website(url).get_contents()
    links = get_links(url)
    print("Found links:", links)
    for link in links["links"]:
        result += f"\n\n{link['type']}\n"
        result += Website(link["url"]).get_contents()
    return result

In [12]:
print(get_all_details("https://huggingface.co"))

Found links: {'links': [{'type': 'about page', 'url': 'https://huggingface.co/huggingface'}, {'type': 'careers page', 'url': 'https://apply.workable.com/huggingface/'}, {'type': 'blog page', 'url': 'https://huggingface.co/blog'}, {'type': 'docs page', 'url': 'https://huggingface.co/docs'}]}
Landing page:
Webpage Title:
Hugging Face – The AI community building the future.
Webpage Contents:
Hugging Face
Models
Datasets
Spaces
Community
Docs
Enterprise
Pricing
Log In
Sign Up
NEW
Get started with Inference in seconds 🚀
Reachy Mini: The Open Robot for AI Builders
Welcome Cohere on the Hub 🔥
The AI community building the future.
The platform where the machine learning community collaborates on models, datasets, and applications.
Explore AI Apps
or
Browse 1M+ models
Trending on
this week
Models
zai-org/GLM-4.5
Updated
7 days ago
•
11.3k
•
1.01k
black-forest-labs/FLUX.1-Krea-dev
Updated
4 days ago
•
37.2k
•
428
Qwen/Qwen3-30B-A3B-Instruct-2507
Updated
5 days ago
•
69.6k
•
389
Qwen/Qwen3-Coder-

In [13]:
system_prompt = "You are an assistant that analyzes the contents of several relevant pages from a company website \
and creates a short brochure about the company for prospective customers, investors and recruits. Respond in markdown.\
Include details of company culture, customers and careers/jobs if you have the information."

# Or uncomment the lines below for a more humorous brochure - this demonstrates how easy it is to incorporate 'tone':

# system_prompt = "You are an assistant that analyzes the contents of several relevant pages from a company website \
# and creates a short humorous, entertaining, jokey brochure about the company for prospective customers, investors and recruits. Respond in markdown.\
# Include details of company culture, customers and careers/jobs if you have the information."


In [14]:
def get_brochure_user_prompt(company_name, url):
    user_prompt = f"You are looking at a company called: {company_name}\n"
    user_prompt += f"Here are the contents of its landing page and other relevant pages; use this information to build a short brochure of the company in markdown.\n"
    user_prompt += get_all_details(url)
    user_prompt = user_prompt[:5_000] # Truncate if more than 5,000 characters
    return user_prompt

In [15]:
get_brochure_user_prompt("HuggingFace", "https://huggingface.co")

Found links: {'links': [{'type': 'about page', 'url': 'https://huggingface.co/huggingface'}, {'type': 'careers page', 'url': 'https://apply.workable.com/huggingface/'}, {'type': 'company page', 'url': 'https://huggingface.co/enterprise'}, {'type': 'blog page', 'url': 'https://huggingface.co/blog'}, {'type': 'community page', 'url': 'https://discuss.huggingface.co'}, {'type': 'GitHub page', 'url': 'https://github.com/huggingface'}, {'type': 'Twitter page', 'url': 'https://twitter.com/huggingface'}, {'type': 'LinkedIn page', 'url': 'https://www.linkedin.com/company/huggingface/'}]}


'You are looking at a company called: HuggingFace\nHere are the contents of its landing page and other relevant pages; use this information to build a short brochure of the company in markdown.\nLanding page:\nWebpage Title:\nHugging Face – The AI community building the future.\nWebpage Contents:\nHugging Face\nModels\nDatasets\nSpaces\nCommunity\nDocs\nEnterprise\nPricing\nLog In\nSign Up\nNEW\nGet started with Inference in seconds 🚀\nReachy Mini: The Open Robot for AI Builders\nWelcome Cohere on the Hub 🔥\nThe AI community building the future.\nThe platform where the machine learning community collaborates on models, datasets, and applications.\nExplore AI Apps\nor\nBrowse 1M+ models\nTrending on\nthis week\nModels\nzai-org/GLM-4.5\nUpdated\n7 days ago\n•\n11.3k\n•\n1.01k\nblack-forest-labs/FLUX.1-Krea-dev\nUpdated\n4 days ago\n•\n37.2k\n•\n428\nQwen/Qwen3-30B-A3B-Instruct-2507\nUpdated\n5 days ago\n•\n69.6k\n•\n389\nQwen/Qwen3-Coder-30B-A3B-Instruct\nUpdated\n4 days ago\n•\n60.4k\n•

In [16]:
def create_brochure(company_name, url):
    response = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": get_brochure_user_prompt(company_name, url)}
          ],
    )
    result = response.choices[0].message.content
    display(Markdown(result))

In [17]:
create_brochure("HuggingFace", "https://huggingface.co")

Found links: {'links': [{'type': 'about page', 'url': 'https://huggingface.co/huggingface'}, {'type': 'careers page', 'url': 'https://apply.workable.com/huggingface/'}, {'type': 'blog page', 'url': 'https://huggingface.co/blog'}, {'type': 'github page', 'url': 'https://github.com/huggingface'}, {'type': 'linkedin page', 'url': 'https://www.linkedin.com/company/huggingface/'}, {'type': 'twitter page', 'url': 'https://twitter.com/huggingface'}]}


# Hugging Face - The AI Community Building the Future

Welcome to **Hugging Face**, a vibrant community dedicated to advancing the future of artificial intelligence and machine learning. Our platform fosters collaboration among AI enthusiasts, practitioners, and enterprises alike, allowing users to create, discover, and share groundbreaking models and datasets.

## About Us

Hugging Face is not just a platform; it’s a community. With over 50,000 organizations leveraging our services, from industry leaders like **Google**, **Meta**, and **Microsoft** to innovative startups, we’re at the forefront of machine learning technology. Our mission is to provide the necessary tools and platforms for individuals and organizations to excel in AI research and application.

### The Home of Machine Learning
- **Explore 1M+ Models:** Discover and utilize a vast range of AI models for various applications.
- **Access 250k+ Datasets:** Find diverse datasets suitable for your specific machine learning tasks.
- **Innovative Applications:** With features like HuggingChat and Spaces, you can run applications ranging from text generation to video generation seamlessly.

## Company Culture

At Hugging Face, we prioritize community-driven development and open-source collaboration. Our culture is based on the principles of transparency, inclusiveness, and innovation. We invite contributions from everyone, allowing each member to build a portfolio while sharing their work with the world.

We champion diversity, maintain a supportive work environment, and encourage continuous learning and experimentation — all while working at the cutting edge of AI technologies.

## Careers Opportunities

Interested in joining our mission? We are always on the lookout for passionate individuals who are eager to make an impact in the AI space. At Hugging Face, you’ll find roles that challenge you, a supportive team, and the opportunity to work on fascinating projects that will shape the future of technology.

### Current Openings:
- Machine Learning Engineers
- Data Scientists
- Community Managers
- Software Developers

Check our [Jobs Page](https://huggingface.co/jobs) for the latest opportunities and to learn how you can contribute to this exciting field.

## Get Involved

Whether you are a researcher, developer, or enthusiast, there are endless ways to engage with our community:
- **Sign Up** to host and collaborate on models and datasets.
- Join discussions in our [Forums](https://huggingface.co/forums) or connect with us on [Discord](https://huggingface.co/discord) and [Twitter](https://twitter.com/huggingface).
- Access our comprehensive [Documentation](https://huggingface.co/docs) to unlock the full potential of our tools.

---

Let’s build the future of AI together! Visit us at [huggingface.co](https://huggingface.co) to explore, learn, and grow in this exciting field.

In [18]:
def stream_brochure(company_name, url):
    stream = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": get_brochure_user_prompt(company_name, url)}
          ],
        stream=True
    )
    
    response = ""
    display_handle = display(Markdown(""), display_id=True)
    for chunk in stream:
        response += chunk.choices[0].delta.content or ''
        response = response.replace("```","").replace("markdown", "")
        update_display(Markdown(response), display_id=display_handle.display_id)

In [19]:
stream_brochure("HuggingFace", "https://huggingface.co")

Found links: {'links': [{'type': 'about page', 'url': 'https://huggingface.co/'}, {'type': 'careers page', 'url': 'https://apply.workable.com/huggingface/'}, {'type': 'enterprise page', 'url': 'https://huggingface.co/enterprise'}, {'type': 'pricing page', 'url': 'https://huggingface.co/pricing'}, {'type': 'blog page', 'url': 'https://huggingface.co/blog'}, {'type': 'documentation page', 'url': 'https://huggingface.co/docs'}, {'type': 'community page', 'url': 'https://discuss.huggingface.co'}, {'type': 'GitHub page', 'url': 'https://github.com/huggingface'}, {'type': 'Twitter page', 'url': 'https://twitter.com/huggingface'}, {'type': 'LinkedIn page', 'url': 'https://www.linkedin.com/company/huggingface/'}]}


# Hugging Face Brochure

## About Us
**Hugging Face** is a leading AI community dedicated to building the future of Artificial Intelligence. We offer a collaborative platform where machine learning enthusiasts can work together on models, datasets, and applications. With state-of-the-art tools powered by the community, we empower creators to accelerate their machine learning journeys.

## Our Offerings
### Models
Explore over **1 million models** available for your projects. Whether you need cutting-edge text, image, video, or audio models, Hugging Face has you covered.

### Datasets
Access an extensive library of **250,000+ datasets** tailored for various machine learning tasks, enabling projects from simple experiments to large-scale applications.

### Spaces
Run your AI applications in **Spaces**. Discover and deploy applications engaging with the Hugging Face infrastructure, allowing you to generate and share innovative AI solutions swiftly.

## Our Customers
With over **50,000 organizations** utilizing our platform, we cater to a diverse range of enterprises from tech giants like **Meta, Amazon, Google, and Microsoft** to innovative startups and research institutions. Our commitment to community-driven innovation makes us a trusted partner for leading businesses.

## Company Culture
At Hugging Face, we foster a **collaborative and inclusive culture**. Our environment thrives on openness and transparency where team members can share their knowledge and learn from each other. We believe that the best ideas come from diverse perspectives, and we are committed to building a community that reflects that ethos.

### Join Our Team
We are constantly looking for passionate individuals to join our team. Explore exciting career opportunities in various fields including engineering, research, and community engagement. At Hugging Face, every member is encouraged to contribute to shaping the future of AI.

## Get Started
Join the **Hugging Face** community today and leverage our powerful tools to accelerate your machine learning projects! Whether you are a seasoned expert or new to ML, we provide resources and support to help you succeed.

### Connect with Us
- **[Sign Up](https://huggingface.co/signup)** and become part of our community.
- Follow us on [Twitter](https://twitter.com/huggingface), [LinkedIn](https://linkedin.com/company/huggingface), and [Discord](https://discord.com/invite/huggingface) to get the latest updates.
  
Together, let's build the future of AI! 🌟