In [2]:
import os
import requests
import json
from dotenv import load_dotenv
from bs4 import BeautifulSoup
import openai
from IPython.display import Markdown, display, update_display

load_dotenv()
openai.api_key = os.getenv("OPENAI_API_AZURE_KEY")
openai.api_base = os.getenv("AZURE_OPENAI_ENDPOINT")
headers = {"User-Agent": "Mozilla/5.0"}

link_system_prompt = """
You are provided with links found on a webpage. Decide which links are relevant for a brochure, like About, Careers, etc.
Respond in JSON:
{
    "links": [{"type": "about", "url": "https://example.com/about"}]
}
"""

class Website:
    def __init__(self, url):
        self.url = url
        self.scrape()

    def scrape(self):
        soup = BeautifulSoup(requests.get(self.url, headers=headers).content, 'html.parser')
        self.title = soup.title.string if soup.title else "No title"
        self.text = soup.get_text(separator="\n", strip=True)
        # Filter out non-http(s) links
        self.links = [link.get('href') for link in soup.find_all('a') if link.get('href') and link.get('href').startswith(('http', 'https'))]

    def get_contents(self):
        return f"Title:\n{self.title}\nContents:\n{self.text}"

class BrochureGenerator:
    @staticmethod
    def get_links(website):
        prompt = f"Here are the links on {website.url} - decide which are relevant for a brochure. Do not include Terms, Privacy.\n" + "\n".join(website.links)
        response = openai.chat.completions.create(
            model=os.getenv("OPENAI_AZURE_MODEL"),
            messages=[{"role": "system", "content": link_system_prompt}, {"role": "user", "content": prompt}],
            temperature=0.7
        )
        return json.loads(response.choices[0].message.content.strip("```json\n").strip("```"))

    @staticmethod
    def get_brochure(user_prompt):
        response = openai.chat.completions.create(
            model=os.getenv("OPENAI_AZURE_MODEL"),
            messages=[{"role": "system", "content": "Create a brochure from the provided info."}, {"role": "user", "content": user_prompt}]
        )
        display(Markdown(response.choices[0].message.content))

    @staticmethod
    def generate(company_name, url):
        website = Website(url)
        user_prompt = f"Company: {company_name}\n" + website.get_contents()
        links = BrochureGenerator.get_links(website)
        for link in links["links"]:
            user_prompt += f"\n{link['type']} - {Website(link['url']).get_contents()}"
        user_prompt = user_prompt[:5000]  # Limit prompt size
        BrochureGenerator.get_brochure(user_prompt)

# Example usage
BrochureGenerator.generate("HuggingFace", "https://huggingface.co")


**Hugging Face Brochure**

---
### **Hugging Face – The AI Community Building the Future**

**Welcome to Hugging Face!**  
Join the premier platform where the machine learning community collaborates on models, datasets, and applications. 

---

### **Explore Our Features**

- **Models**  
  Browse over 1 million models and discover the trending ones of the week!

- **Datasets**  
  Access an extensive library of 250,000+ datasets tailored for various machine learning tasks.

- **Spaces**  
  Engage with thousands of applications and contribute to innovative projects in AI.

- **Enterprise Solutions**  
  Enhance your organization's AI capabilities with our robust and secure enterprise offerings.

---

### **Trending Models This Week**

- **nvidia/parakeet-tdt-0.6b-v2**  
  - Updated: 1 day ago  
  - Downloads: 167k  
  - Likes: 874

- **nari-labs/Dia-1.6B**  
  - Updated: 3 days ago  
  - Downloads: 173k  
  - Likes: 2.19k

- **Lightricks/LTX-Video**  
  - Updated: 2 days ago  
  - Downloads: 291k  
  - Likes: 1.49k

Whether you're generating applications or interacting with AI agents, Hugging Face has the tools to make it happen!

---

### **Why Choose Hugging Face?**

#### **The Home of Machine Learning**  
- **Create**, **discover**, and **collaborate** like never before with the Hugging Face community.

#### **Accelerate Your ML Journey**  
- Move faster with our open-source stack. Deploy on optimized endpoints or update your applications to GPU seamlessly.

#### **Build Your Portfolio**  
- Share your work and showcase your skills to the world. More than 50,000 organizations trust Hugging Face.

---

### **Getting Started with Enterprise**

- **Pricing**  
  - **Compute**: Starting at $0.60/hour for GPU.
  - **Enterprise**: Starting at $20/user/month with features including Single Sign-On, Priority Support, and much more.

#### **Join 50,000+ Organizations**  
Including leading names like Meta, Google, Amazon, and Microsoft.

---

### **Our Open Source Contributions**

Build the future of ML with our extensive library:
- **Transformers**: 144,408 models
- **Diffusers**: 29,014 models
- **Tokenizers**: 9,685 models

And many more that empower developers and researchers alike.

---

### **Join the Community!**

- **Sign Up or Log In**  
Become a part of the Hugging Face community today.

- **Hugging Face Forums**  
Engage in discussions, seek help, or contribute your insights.

Connect with us on:
- **GitHub**
- **Twitter**
- **LinkedIn**
- **Discord**

---

**Explore. Collaborate. Innovate. With Hugging Face!**  
For more information, visit our website: [huggingface.co](http://huggingface.co)

--- 

*Note: All models and datasets are subject to updates. Check our website for the latest information.*