In [1]:
# imports

import os
import requests
import json
from typing import List
from dotenv import load_dotenv
from bs4 import BeautifulSoup
from IPython.display import Markdown, display, update_display
from openai import OpenAI


In [2]:
# Initialize and constants

load_dotenv()
os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY', 'your-key-if-not-using-env')
MODEL = 'gpt-4o-mini'
openai = OpenAI()


In [3]:
# A class to represent a Webpage

class Website:
    """
    A utility class to represent a Website that we have scraped, now with links
    """
    url: str
    title: str
    body: str
    links: List[str]
    text: str

    def __init__(self, url):
        self.url = url
        response = requests.get(url)
        self.body = response.content
        soup = BeautifulSoup(self.body, 'html.parser')
        self.title = soup.title.string if soup.title else "No title found"
        if soup.body:
            for irrelevant in soup.body(["script", "style", "img", "input"]):
                irrelevant.decompose()
            self.text = soup.body.get_text(separator="\n", strip=True)
        else:
            self.text = ""
        links = [link.get('href') for link in soup.find_all('a')]
        self.links = [link for link in links if link]

    def get_contents(self):
        return f"Webpage Title:\n{self.title}\nWebpage Contents:\n{self.text}\n\n"

In [4]:
hf = Website("https://huggingface.co")
print(hf.get_contents())


Webpage Title:
Hugging Face – The AI community building the future.
Webpage Contents:
Hugging Face
Models
Datasets
Spaces
Posts
Docs
Solutions
Pricing
Log In
Sign Up
NEW
Use Ollama with GGUF Models from the HF Hub
The AI community building the future.
The platform where the machine learning community collaborates on models, datasets, and applications.
Trending on
this week
Models
microsoft/OmniParser
Updated
3 days ago
•
4.12k
•
990
stabilityai/stable-diffusion-3.5-medium
Updated
5 days ago
•
16.7k
•
265
stabilityai/stable-diffusion-3.5-large
Updated
14 days ago
•
153k
•
1.03k
Etched/oasis-500m
Updated
about 8 hours ago
•
214
genmo/mochi-1-preview
Updated
4 days ago
•
832
Browse 400k+ models
Spaces
Running
on
Zero
868
🏃
Stable Diffusion 3.5 Large
Generate images with SD3.5
Running
on
Zero
474
📈
IC Light V2
Running
on
CPU Upgrade
4.82k
👕
Kolors Virtual Try-On
Running
on
Zero
4.87k
🖥️
FLUX.1 [dev]
Running
on
Zero
1.1k
🗣️
F5-TTS
F5-TTS & E2-TTS: Zero-Shot Voice Cloning (Unofficial Demo)
B

In [5]:
link_system_prompt = "You are provided with a list of links found on a webpage. \
You are able to decide which of the links would be most relevant to include in a brochure about the company, \
such as links to an About page, or a Company page, or Careers/Jobs pages.\n"
link_system_prompt += "You should respond in JSON as in this example:"
link_system_prompt += """
{
    "links": [
        {"type": "about page", "url": "https://full.url/goes/here/about"},
        {"type": "careers page": "url": "https://another.full.url/careers"}
    ]
}
"""

In [6]:
def get_links_user_prompt(website):
    user_prompt = f"Here is the list of links on the website of {website.url} - "
    user_prompt += "please decide which of these are relevant web links for a brochure about the company, respond with the full https URL in JSON format. \
Do not include Terms of Service, Privacy, email links.\n"
    user_prompt += "Links (some might be relative links):\n"
    user_prompt += "\n".join(website.links)
    return user_prompt


In [7]:
print(get_links_user_prompt(hf))


Here is the list of links on the website of https://huggingface.co - please decide which of these are relevant web links for a brochure about the company, respond with the full https URL in JSON format. Do not include Terms of Service, Privacy, email links.
Links (some might be relative links):
/
/models
/datasets
/spaces
/posts
/docs
/pricing
/login
/join
/docs/hub/en/ollama
/microsoft/OmniParser
/stabilityai/stable-diffusion-3.5-medium
/stabilityai/stable-diffusion-3.5-large
/Etched/oasis-500m
/genmo/mochi-1-preview
/models
/spaces/stabilityai/stable-diffusion-3.5-large
/spaces/lllyasviel/iclight-v2
/spaces/Kwai-Kolors/Kolors-Virtual-Try-On
/spaces/black-forest-labs/FLUX.1-dev
/spaces/mrfakename/E2-F5-TTS
/spaces
/datasets/fka/awesome-chatgpt-prompts
/datasets/Spawning/PD12M
/datasets/neuralwork/arxiver
/datasets/BAAI/Infinity-MM
/datasets/vikhyatk/lofi
/datasets
/join
/pricing#endpoints
/pricing#spaces
/pricing
/enterprise
/enterprise
/enterprise
/enterprise
/enterprise
/enterprise


In [8]:
def get_links(url):
    website = Website(url)
    completion = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": link_system_prompt},
            {"role": "user", "content": get_links_user_prompt(website)}
      ],
        response_format={"type": "json_object"}
    )
    result = completion.choices[0].message.content
    return json.loads(result)


In [9]:
get_links("https://huggingface.co")


{'links': [{'type': 'about page', 'url': 'https://huggingface.co/huggingface'},
  {'type': 'careers page', 'url': 'https://apply.workable.com/huggingface/'},
  {'type': 'company page',
   'url': 'https://www.linkedin.com/company/huggingface/'}]}

In [11]:
def get_all_details(url):
    result = "Landing page:\n"
    result += Website(url).get_contents()
    links = get_links(url)
    print("Found links:", links)
    for link in links["links"]:
        result += f"\n\n{link['type']}\n"
        result += Website(link["url"]).get_contents()
    return result


In [12]:
print(get_all_details("https://huggingface.co"))


Found links: {'links': [{'type': 'about page', 'url': 'https://huggingface.co/huggingface'}, {'type': 'careers page', 'url': 'https://apply.workable.com/huggingface/'}, {'type': 'blog', 'url': 'https://huggingface.co/blog'}, {'type': 'company page', 'url': 'https://www.linkedin.com/company/huggingface/'}, {'type': 'community forum', 'url': 'https://discuss.huggingface.co'}]}
Landing page:
Webpage Title:
Hugging Face – The AI community building the future.
Webpage Contents:
Hugging Face
Models
Datasets
Spaces
Posts
Docs
Solutions
Pricing
Log In
Sign Up
NEW
Use Ollama with GGUF Models from the HF Hub
The AI community building the future.
The platform where the machine learning community collaborates on models, datasets, and applications.
Trending on
this week
Models
microsoft/OmniParser
Updated
3 days ago
•
4.12k
•
990
stabilityai/stable-diffusion-3.5-medium
Updated
5 days ago
•
16.7k
•
265
stabilityai/stable-diffusion-3.5-large
Updated
14 days ago
•
153k
•
1.03k
Etched/oasis-500m
Update

In [13]:
system_prompt = "You are an assistant that analyzes the contents of several relevant pages from a company website \
and creates a short brochure about the company for prospective customers, investors and recruits. Respond in markdown.\
Include details of company culture, customers and careers/jobs if you have the information."

# Or uncomment the lines below for a more humorous brochure - this demonstrates how easy it is to incorporate 'tone':

# system_prompt = "You are an assistant that analyzes the contents of several relevant pages from a company website \
# and creates a short humorous, entertaining, jokey brochure about the company for prospective customers, investors and recruits. Respond in markdown.\
# Include details of company culture, customers and careers/jobs if you have the information."

In [14]:
def get_brochure_user_prompt(company_name, url):
    user_prompt = f"You are looking at a company called: {company_name}\n"
    user_prompt += f"Here are the contents of its landing page and other relevant pages; use this information to build a short brochure of the company in markdown.\n"
    user_prompt += get_all_details(url)
    user_prompt = user_prompt[:20_000] # Truncate if more than 20,000 characters
    return user_prompt


In [15]:
def create_brochure(company_name, url):
    response = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": get_brochure_user_prompt(company_name, url)}
          ],
    )
    result = response.choices[0].message.content
    display(Markdown(result))


In [16]:
create_brochure("Huffingface", "https://huggingface.co")


Found links: {'links': [{'type': 'about page', 'url': 'https://huggingface.co/huggingface'}, {'type': 'careers page', 'url': 'https://apply.workable.com/huggingface/'}, {'type': 'company page', 'url': 'https://www.linkedin.com/company/huggingface/'}, {'type': 'blog', 'url': 'https://huggingface.co/blog'}, {'type': 'pricing page', 'url': 'https://huggingface.co/pricing'}, {'type': 'enterprise page', 'url': 'https://huggingface.co/enterprise'}]}


# Hugging Face Brochure

## About Us
Hugging Face is the AI community building the future. Our mission is to democratize good machine learning, one commit at a time. We provide a collaborative platform for researchers, developers, and organizations to create, discover, and share Machine Learning models, datasets, and applications.

## Our Offerings
- **Models**: Explore over 400,000 machine learning models including state-of-the-art models from leading tech companies.
- **Datasets**: Access a vast collection of over 100,000 datasets for a variety of Machine Learning tasks.
- **Spaces**: Create and share ML applications with our user-friendly interface.
- **Compute Solutions**: Efficiently deploy your models with our managed compute offerings starting at $0.60/hour.

## Platform Highlights
- **Community Collaboration**: Join a vibrant community that's passionate about open-source ML technologies.
- **Enterprise Solutions**: Our platform is enterprise-ready with offerings such as Single Sign-On, resource groups, and priority support to help organizations upscale their AI initiatives.
- **Open Source Tools**: We are continuously developing ML tooling with contributions from the community.

## Customer Base
More than 50,000 organizations trust Hugging Face as their AI partner, including:
- **Meta**
- **Amazon Web Services**
- **Google**
- **Microsoft**
- **Intel**

## Company Culture
At Hugging Face, we value collaboration, innovation, and inclusion. Our team of over 200 members works closely together in an environment that encourages learning and growth. We believe in open-source principles and actively support the community through workshops and educational resources.

### Join Us
We are always looking for talented, passionate individuals to join our team. If you're interested in contributing to the future of AI and machine learning, explore our current job openings and be a part of our mission!

## Connect with Us
- **Website**: [Hugging Face Website](https://huggingface.co)
- **Social Media**: Follow us on [Twitter](https://twitter.com/huggingface), [LinkedIn](https://linkedin.com/company/huggingface), and [Discord](https://discord.gg/huggingface) for the latest updates and community discussions.

Join us in building the future of AI!

In [17]:
def stream_brochure(company_name, url):
    stream = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": get_brochure_user_prompt(company_name, url)}
          ],
        stream=True
    )
    
    response = ""
    display_handle = display(Markdown(""), display_id=True)
    for chunk in stream:
        response += chunk.choices[0].delta.content or ''
        response = response.replace("```","").replace("markdown", "")
        update_display(Markdown(response), display_id=display_handle.display_id)



In [18]:
stream_brochure("Huggingface", "https://huggingface.co")


Found links: {'links': [{'type': 'about page', 'url': 'https://huggingface.co/huggingface'}, {'type': 'careers page', 'url': 'https://apply.workable.com/huggingface/'}, {'type': 'company page', 'url': 'https://www.linkedin.com/company/huggingface/'}]}


# Hugging Face Brochure

## About Us
Hugging Face is at the forefront of the AI revolution, creating a vibrant community dedicated to advancing machine learning. Our mission is to democratize effective machine learning practices and tools, making them accessible to everyone – one commit at a time.

## What We Do
We are the home of machine learning, where enthusiasts and professionals can collaborate and share powerful models, datasets, and applications. Here are some key offerings:

- **Models**: Access and collaborate on over **400,000 models** including those from leading organizations like Microsoft, Google, and AWS.
- **Datasets**: Explore a diverse library of **100,000 datasets** for various tasks including NLP, computer vision, and more.
- **Spaces**: Create, discover, and collaborate on **150,000 applications** utilizing cutting-edge AI technology.

Our platform provides tools and technologies that empower users to create and share innovative ML solutions across text, image, video, audio, and 3D modalities.

## Key Clients
More than **50,000 organizations** use Hugging Face to drive their AI initiatives, including:

- **Meta**
- **Amazon Web Services**
- **Grammarly**
- **Microsoft**
- **Google**

## Company Culture
At Hugging Face, we pride ourselves on fostering a collaborative and inclusive company culture. We believe in the power of community and are constantly working to connect with AI enthusiasts and professionals. If you are passionate about machine learning and eager to make a difference, we invite you to join us on our journey!

## Careers at Hugging Face
We are always on the lookout for talented individuals who want to be part of our mission. Our current job openings encompass a wide range of roles across various levels and specialties. Working at Hugging Face offers an opportunity to be part of a forward-thinking team that values innovation and teamwork.

- **Why Join Us?** 
  - Work in a fast-paced environment with a dynamic team.
  - Contribute to groundbreaking projects and influence the future of AI.
  - Engage with a community of like-minded professionals.

## Get In Touch
If you're interested in exploring partnerships, utilizing our scalable solutions, or joining our team, feel free to reach out through our website:

**[Visit Us](https://huggingface.co)**

Join us in building the future of AI!

--- 

For more updates and community discussions, follow us on social media:  
- [GitHub](https://github.com/huggingface)  
- [Twitter](https://twitter.com/huggingface)  
- [LinkedIn](https://linkedin.com/company/huggingface)  
- [Discord](https://discord.gg/huggingface)  

Let's collaborate, innovate, and shape the future of machine learning together!