In [2]:
import os
import json
import requests
from bs4 import BeautifulSoup
from dotenv import load_dotenv
from openai import OpenAI
from IPython.display import Markdown, display, update_display

# Setup
load_dotenv()
openai = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
MODEL = "gpt-4o-mini"

headers = {
    "User-Agent": "Mozilla/5.0"
}

# Scrape page content and links
def scrape_website(url):
    res = requests.get(url, headers=headers)
    soup = BeautifulSoup(res.content, 'html.parser')
    title = soup.title.string if soup.title else "No Title"
    for tag in soup(["script", "style", "img", "input"]):
        tag.decompose()
    text = soup.get_text(separator="\n", strip=True)
    links = [link.get('href') for link in soup.find_all('a') if link.get('href')]
    return {"url": url, "title": title, "text": text, "links": links}

# Get only relevant brochure links
def get_relevant_links(url):
    site = scrape_website(url)
    system_prompt = "Select links useful for a company brochure (e.g., About, Careers). Respond with JSON like: " \
                    '{"links": [{"type": "about", "url": "..."}, {"type": "careers", "url": "..."}]}'
    user_prompt = "Links from the site:\n" + "\n".join(site["links"])

    response = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_prompt}
        ],
        response_format={"type": "json_object"}
    )
    return json.loads(response.choices[0].message.content)["links"]

# Compile landing + linked page content
def collect_all_content(url):
    main = scrape_website(url)
    combined = f"Main Page: {main['title']}\n{main['text']}\n\n"
    try:
        links = get_relevant_links(url)
        for link in links:
            full_url = link["url"] if link["url"].startswith("http") else url.rstrip("/") + "/" + link["url"].lstrip("/")
            sub = scrape_website(full_url)
            combined += f"\n\n[{link['type'].capitalize()} Page]: {sub['title']}\n{sub['text']}"
    except Exception as e:
        combined += f"\n\n(Note: Subpage fetch failed - {e})"
    return combined[:5000]

# Streamed brochure generation
def stream_brochure(company_name, url):
    content = collect_all_content(url)
    system_prompt = "You are an assistant who writes a markdown brochure using a company's website. \
Include its mission, culture, clients, and careers if possible."

    stream = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": f"Company: {company_name}\n\n{content}"}
        ],
        stream=True
    )

    output = ""
    display_handle = display(Markdown(""), display_id=True)

    for chunk in stream:
        delta = chunk.choices[0].delta.content or ""
        output += delta
        cleaned = output.replace("```", "").replace("markdown", "")
        update_display(Markdown(cleaned), display_id=display_handle.display_id)

# Example use
stream_brochure("Hugging Face", "https://huggingface.co")


# Hugging Face Brochure

## Welcome to Hugging Face
**The AI community building the future.**

At Hugging Face, we believe in empowering the machine learning community through collaboration and innovation. Our platform is designed to connect enthusiasts, developers, and researchers in a vibrant ecosystem where models, datasets, and applications flourish.

---

## Our Mission
Hugging Face is dedicated to fostering an open and inclusive community that drives machine learning progress. We build tools and platforms that allow users to create, share, and collaborate on artificial intelligence projects, ensuring that everyone has access to state-of-the-art machine learning technologies.

---

## Our Culture
At Hugging Face, we are committed to creating an environment of transparency, collaboration, and learning. Our culture emphasizes:

- **Community Collaboration:** We value contributions from all over the world, enabling a diverse and dynamic team.
- **Open Source Development:** We encourage openness by developing and sharing our software, models, and datasets.
- **Innovation and Growth:** We support continuous learning and development, empowering our team and users to grow their skills.

---

## Our Clients
Hugging Face serves a wide range of clients, including leading organizations across various sectors. Over **50,000** organizations use our platform to harness the power of AI, including:

- **Meta**
- **Amazon**
- **Google**
- **Microsoft**
- **Grammarly**

We are proud to have collaborated with both enterprises and startups, helping them accelerate their machine learning initiatives.

---

## Careers at Hugging Face
Join our growing team and be part of the future of AI! We are looking for passionate individuals who are eager to make an impact in the world of machine learning. Explore our current openings and discover how you can contribute to our mission.

### Current Openings
- Check our website for the latest job listings to find a position that aligns with your skills and interests.

---

## Join Us
Become a part of the Hugging Face community today! Sign up to start exploring our vast collection of over **1 million models** and numerous datasets. Let's build the future of AI together!

- **Explore Models**: [Browse 1M+ Models](https://huggingface.co/models)
- **Discover Datasets**: [Browse 250k+ Datasets](https://huggingface.co/datasets)
- **Engage with AI Apps**: [Explore AI Apps](https://huggingface.co/spaces)

For more information, visit our official website: [Hugging Face](https://huggingface.co)

---

Thank you for your interest in Hugging Face! Together, we can keep building revolutionary AI technology that impacts lives and industries around the world.