In [None]:
# imports
import os
import json
from dotenv import load_dotenv
from openai import OpenAI
import requests
from bs4 import BeautifulSoup
from IPython.display import display, Markdown, update_display

In [36]:
# Config
load_dotenv(override=True)
MODEL = "gpt-4o-mini"
openai = OpenAI(base_url="http://localhost:11434/v1", api_key="ollama")     # OpenAI()
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
}

# Link handling
link_system_prompt = "You are provided with a list of links found on a webpage. \
    You are able to decide which of the links would be most relevant to include in a brochure about the company,\
     such as links to an About page, or a Company page or Careers/Jobs page.\n"
link_system_prompt += "You should response in JSON as in this example:"
link_system_prompt += """
    "links": [
        {"type": "about page", "url": "https://full.url/goes/here/about"},
        {"type": "careers page", "url": "https://another.full.url/careers"}
    ]
}
"""
# Ollama API configuration
OLLAMA_API = "http://localhost:11434/api/chat"
HEADERS = {"Content-Type": "application/json"}
MODEL = "llama3.2"

# Websites
huggingface_company_name = "Hugging Face"
huggingface_url = "https://huggingface.co"
huggingface = Website("https://huggingface.co")
archlinux_company_name = "Arch Linux"
archlinux_url = "https://archlinux.org"

# Company brochure
system_prompt = "You are an assistant that analyzes the contents of several relevant pages from a company website \
and creates a short brochure about the company for prospective customers, investors and recruits. Respond in markdown.\
Include details of company culture, customers and careers/jobs if you have the information."


In [None]:
# Helpers
class Website:
    def __init__(self, url, headers=None):
        self.url = url
        self.headers = headers
        response = requests.get(url, headers=self.headers)
        self.body = response.content
        soup = BeautifulSoup(self.body, "html.parser")
        self.title = soup.title.string if soup.title else "No title"
        if soup.body:
            for irrelevant in soup(["script", "style", "img", "input"]):
                irrelevant.decompose()
            self.text = soup.body.get_text(separator="\n", strip=True)
        else:
            self.text = ""
        links = [link.get("href") for link in soup.find_all("a")]
        self.links = [link for link in links if link]


    def get_contents(self):
        return f"Webpage Title:\n{self.title}\nWebpage Contents:\n{self.text}\n\n"

ed = Website("https://edwarddonner.com", headers=headers)

In [None]:
def get_links_user_prompt(website):
    user_prompt = f"Here is the list of links on the website of {website.url} - "
    user_prompt += "please decide which of these are relevant web links for a \
        brochure about the company, respond with the full http URL in JSON format. \
        Do not include Terms of Service, Privacy, email links):\n"
    user_prompt += "Links (some might be relative links):\n"
    user_prompt += "\n".join(website.links)
    return user_prompt

print(get_links_user_prompt(ed))

In [None]:
def get_links(website):
    response = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": link_system_prompt},
            {"role": "user", "content": get_links_user_prompt(website)}
        ],
        response_format={"type": "json_object"}
    )
    result = response.choices[0].message.content
    return json.loads(result)

print(huggingface.links)
print(get_links(huggingface))


In [None]:
def get_all_details(website):
    result = "Landing page:\n"
    result += website.get_contents()
    links = get_links(website)
    print("Found links:", links)
    for link in links["links"]:
        result += f"\n\n{link['type']}:\n"
        result += Website(link["url"]).get_contents()
    return result

print(get_all_details(huggingface))

In [None]:
def get_brochure_user_prompt(company_name, url, max_char=5_000):
    user_prompt = f"You are looking at the company called: {company_name}\n"

    user_prompt += f"Here are the contents of its landing page and other relevant pages; use this information to build a short brochure of the company in markdown.\n"
    user_prompt += get_all_details(Website(url))
    return user_prompt[:max_char]

print(get_brochure_user_prompt(huggingface_company_name, huggingface_url))

In [35]:
def stream_brochure(company_name, url):
    stream = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": get_brochure_user_prompt(company_name, url)}
        ],
        stream=True
    )

    response = ""
    display_handle = display(Markdown(""), display_id=True)
    for chunk in stream:
        response += chunk.choices[0].delta.content or ""
        response = response.replace("```", "").replace("markdown", "")
        update_display(Markdown(response), display_id=display_handle.display_id)

stream_brochure(huggingface_company_name, huggingface_url)

Found links: {'links': [{'type': 'about page', 'url': 'https://huggingface.co/huggingface'}, {'type': 'careers page', 'url': 'https://apply.workable.com/huggingface/'}, {'type': 'company page', 'url': 'https://www.linkedin.com/company/huggingface/'}]}


# Hugging Face Brochure

## Welcome to Hugging Face
**The AI community building the future.**  
At Hugging Face, we empower individuals and organizations to collaborate on innovative AI solutions. Our platform is the go-to destination for machine learning enthusiasts, researchers, and enterprises alike.

---

### **What We Offer**
- **Models**: Explore over 1 million machine learning models across various domains. Discover trending projects and contribute to state-of-the-art technology.
- **Datasets**: Access a comprehensive library of over 250,000 datasets to fuel your AI projects.
- **Spaces**: Host and share applications with unlimited space for creativity.
- **Community**: Join a vibrant community of over 50,000 organizations and developers collectively advancing AI technology.

---

### **Our Technology**
We are committed to open-source principles and provide a robust set of tools for developers:
- **Transformers**: Industry-leading ML solutions for PyTorch and TensorFlow, with over 147,514 active contributors.
- **Diffusers**: State-of-the-art diffusion models for enhanced training capabilities.
- **Safetensors**: A secure way to store and distribute neural network weights.

---

### **Enterprise Solutions**
For organizations looking to scale, we offer:
- **Compute Services**: Deploy models with optimized inference endpoints starting at $0.60/hour.
- **Enterprise Support**: Advanced security, dedicated support, and team collaboration features starting at $20/user/month.

---

### **Company Culture**
Hugging Face fosters an inclusive and collaborative environment where innovation flourishes. Our culture extends beyond technology as we engage with our community through active forums, contributions, and shared learning experiences.

---

### **Join Us**
We are constantly evolving and looking for passionate individuals to join our team. Whether you are a developer, a researcher, or simply someone enthusiastic about AI, explore career opportunities with us!

---

### **Contact & Connect**
- **Website**: [Hugging Face](https://huggingface.co)
- **Social Media**: Follow us on [Twitter](https://twitter.com/huggingface), [LinkedIn](https://www.linkedin.com/company/huggingface/), and join our [Discord](https://discord.gg/huggingface) community.

---

**Explore the future of AI with Hugging Face! Together, we can build something extraordinary.**

In [37]:
stream_brochure(huggingface_company_name, huggingface_url)

Found links: {'links': [{'type': 'about page', 'url': 'https://huggingface.co/'}, {'type': 'company page', 'url': 'https://www.linkedin.com/company/huggingface/'}, {'type': 'blog', 'url': 'https://discuss.huggingface.co'}, {'type': 'facebook', 'url': 'https://www.facebook.com/huggingface'}, {'type': 'twitter', 'url': 'https://twitter.com/huggingface'}, {'type': 'github', 'url': 'https://github.com/huggingface'}, {'type': 'blog', 'url': 'https://huggingface.co/blog'}, {'type': 'news/updates', 'url': 'https://status.huggingface.co'}, {'type': 'datasets', 'url': 'https://huggingface.co/datasets'}, {'type': 'changelog', 'url': 'https://huggingface.co/changelog'}]}


Hugging Face Brochure
==========================

**Introduction**
---------------

Welcome to Hugging Face, the leading platform for building, sharing, and collaborating on machine learning (ML) models. Our vibrant community of ML developers, researchers, and enthusiasts have created a vast ecosystem that powers some of the world's most groundbreaking applications.

**Our Mission**
----------------

We are building the foundation of ML tooling with the community, empowering developers to create, discover, and collaborate on high-quality models. Our mission is to accelerate innovation in AI research and application, while providing practical tools for real-world problems.

**Company Culture**
-----------------

At Hugging Face, we value community-driven collaboration, open-source principles, and inclusivity. We believe that the collective efforts of passionate individuals can achieve remarkable results.

Our team consists of experienced engineers, researchers, and enthusiasts dedicated to delivering cutting-edge products that simplify ML development and deployment. With a strong focus on support, documentation, and continuous learning, we strive to create an exceptional user experience for our customers.

**Customers**
-------------

Over 50,000 organizations worldwide rely on Hugging Face for their AI needs. These innovators span across various industries, including:

*   **Meta**: Building cutting-edge AI technologies at the heart of Meta.
*   **AI2**: Developing novel approaches to machine translation and beyond.
*   **Amazon**: Creating AI-powered experiences that drive customer satisfaction.
*   **Google**: Investigating breakthroughs in image recognition, natural language processing, and more.

**Careers/Jobs**
----------------

If you're passionate about ML, data science, or software development, we encourage you to explore our job openings. Our team is committed to fostering a diverse and inclusive work environment where individuals can grow professionally and contribute their expertise to shaping the future of AI.

**Products and Services**
-------------------------

Our open-source platform offers robust tools for building, training, and deploying high-quality ML models, including:

*   **Transformers**: State-of-the-art library for PyTorch, TensorFlow, and JAX.
*   **Diffusers**: Diffusion models in PyTorch for advanced applications.
*   **Safetensors**: Safe way to store/distribute neural network weights.

Our enterprise solutions expand upon these tools with security features, dedicated support, and more.

**Stay Ahead**
--------------

Dive deeper into Hugging Face's ecosystem by connecting with us on social media platforms:

GitHub | Twitter | LinkedIn