In [26]:
import os
import requests
import json
from typing import List
from bs4 import BeautifulSoup
from IPython.display import Markdown, display, update_display
import ollama
import http.client
http.client._MAXHEADERS = 1000

In [27]:
ollama_api = "http://localhost:11434/api/chat"
headers = {"Content-type":"application/json"}
model = "llama2:7b"

In [28]:
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36",
}


class Website:
    def __init__(self, url):
        self.url = url
        response = requests.get(url, headers=headers)
        self.body = response.content
        soup = BeautifulSoup(self.body, 'html.parser')
        self.title = soup.title.string if soup.title else "No title found"
        if soup.body:
            for irrelevant in soup.body(["script","style","img","input"]):
                irrelevant.decompose()
            self.text = soup.body.get_text(separator="\n", strip=True)
        else:
            self.text = ""
        links = [link.get('href') for link in soup.find_all('a')]
        self.links = [link for link in links if link]
        
    def get_contents(self):
        return f"Webpage Title:\n{self.title}\nWebpage Contents:\n{self.text}\n\n"
        

In [29]:
web = Website("https://huggingface.co")
print(web.get_contents())
print("---------------------------------------------------------------------------")
web.links

Webpage Title:
Hugging Face – The AI community building the future.
Webpage Contents:
Hugging Face
Models
Datasets
Spaces
Community
Docs
Enterprise
Pricing
Log In
Sign Up
The AI community building the future.
The platform where the machine learning community collaborates on models, datasets, and applications.
Explore AI Apps
or
Browse 1M+ models
Trending on
this week
Models
xai-org/grok-2
Updated
4 days ago
•
3.27k
•
783
microsoft/VibeVoice-1.5B
Updated
about 4 hours ago
•
10.2k
•
715
Qwen/Qwen-Image-Edit
Updated
3 days ago
•
51.2k
•
1.45k
deepseek-ai/DeepSeek-V3.1
Updated
1 day ago
•
38.2k
•
609
openbmb/MiniCPM-V-4_5
Updated
about 4 hours ago
•
1.46k
•
434
Browse 1M+ models
Spaces
Running
12.3k
12.3k
DeepSite v2
🐳
Generate any application with DeepSeek
Running
on
Zero
396
396
Qwen Image Edit
✒
Edit images based on user instructions
Running
on
Zero
MCP
457
457
Wan2.2 14B Fast
🎥
generate a video from an image with a text prompt
Running
on
Zero
130
130
Qwen Image Edit Fast!
✒
Fast 8 step

['/',
 '/models',
 '/datasets',
 '/spaces',
 '/docs',
 '/enterprise',
 '/pricing',
 '/login',
 '/join',
 '/spaces',
 '/models',
 '/xai-org/grok-2',
 '/microsoft/VibeVoice-1.5B',
 '/Qwen/Qwen-Image-Edit',
 '/deepseek-ai/DeepSeek-V3.1',
 '/openbmb/MiniCPM-V-4_5',
 '/models',
 '/spaces/enzostvs/deepsite',
 '/spaces/Qwen/Qwen-Image-Edit',
 '/spaces/zerogpu-aoti/wan2-2-fp8da-aoti-faster',
 '/spaces/multimodalart/Qwen-Image-Edit-Fast',
 '/spaces/lvwerra/jupyter-agent-2',
 '/spaces',
 '/datasets/fka/awesome-chatgpt-prompts',
 '/datasets/nvidia/Nemotron-Post-Training-Dataset-v2',
 '/datasets/liumindmind/NekoQA-10K',
 '/datasets/nvidia/Granary',
 '/datasets/nvidia/Llama-Nemotron-VLM-Dataset-v1',
 '/datasets',
 '/join',
 '/pricing#endpoints',
 '/pricing#spaces',
 '/pricing',
 '/enterprise',
 '/enterprise',
 '/enterprise',
 '/enterprise',
 '/enterprise',
 '/enterprise',
 '/enterprise',
 '/allenai',
 '/facebook',
 '/amazon',
 '/google',
 '/Intel',
 '/microsoft',
 '/grammarly',
 '/Writer',
 '/docs/

In [30]:
link_system_prompt = "You are provided with a list of links found on a webpage. \
    You have to decide which of the links would be most relevant to include in a brochure about the company, \
    such as links to an About Page, or a Company page, or Careers/Jobs page. \n"
link_system_prompt += "You should respond in JSON as this example:"
link_system_prompt += """"
{
    "links": [
        {"type": "about page", "url": "https://full.url/goes/here/about"},
        {"type": "careers page", "url": "https://another.full.url/careers"},
        {"type": "products page", "url": "https://another.full.url/products"}
    ]
}

"""

In [31]:
print(link_system_prompt)

You are provided with a list of links found on a webpage.     You have to decide which of the links would be most relevant to include in a brochure about the company,     such as links to an About Page, or a Company page, or Careers/Jobs page. 
You should respond in JSON as this example:"
{
    "links": [
        {"type": "about page", "url": "https://full.url/goes/here/about"},
        {"type": "careers page", "url": "https://another.full.url/careers"},
        {"type": "products page", "url": "https://another.full.url/products"}
    ]
}




In [32]:
def get_links_user_prompt(website):
    user_prompt = f"Here is the list of links on the website of {website.url} - "
    user_prompt += "please decide which of these are relevant web links for a brochure about the company, respond with the full https URL in JSON format. \
Do not include Terms of Service, Privacy, email links.\n"
    user_prompt += "Links (some might be relative links):\n"
    user_prompt += "\n".join(website.links)
    return user_prompt

In [33]:
payload = {
    "model":model,
    "messages":[
            {"role": "system", "content": link_system_prompt},
            {"role": "user", "content": get_links_user_prompt(web)}
      ],
    "stream":False
}


def get_links(url):
    web = Website(url)
    response = requests.post(ollama_api, json=payload, headers=headers)
    result = response.json()
    print(result["message"]["content"])
    # Parse the JSON content from the model's response
    try:
        links_json = json.loads(result["message"]["content"])
    except Exception as e:
        print("Error parsing JSON from model response:", e)
        links_json = {"links": []}
    return links_json

In [34]:
huggingface = Website("https://huggingface.co")
huggingface.links

['/',
 '/models',
 '/datasets',
 '/spaces',
 '/docs',
 '/enterprise',
 '/pricing',
 '/login',
 '/join',
 '/spaces',
 '/models',
 '/xai-org/grok-2',
 '/microsoft/VibeVoice-1.5B',
 '/Qwen/Qwen-Image-Edit',
 '/deepseek-ai/DeepSeek-V3.1',
 '/openbmb/MiniCPM-V-4_5',
 '/models',
 '/spaces/enzostvs/deepsite',
 '/spaces/Qwen/Qwen-Image-Edit',
 '/spaces/zerogpu-aoti/wan2-2-fp8da-aoti-faster',
 '/spaces/multimodalart/Qwen-Image-Edit-Fast',
 '/spaces/lvwerra/jupyter-agent-2',
 '/spaces',
 '/datasets/fka/awesome-chatgpt-prompts',
 '/datasets/nvidia/Nemotron-Post-Training-Dataset-v2',
 '/datasets/liumindmind/NekoQA-10K',
 '/datasets/nvidia/Granary',
 '/datasets/nvidia/Llama-Nemotron-VLM-Dataset-v1',
 '/datasets',
 '/join',
 '/pricing#endpoints',
 '/pricing#spaces',
 '/pricing',
 '/enterprise',
 '/enterprise',
 '/enterprise',
 '/enterprise',
 '/enterprise',
 '/enterprise',
 '/enterprise',
 '/allenai',
 '/facebook',
 '/amazon',
 '/google',
 '/Intel',
 '/microsoft',
 '/grammarly',
 '/Writer',
 '/docs/

In [35]:
get_links("https://huggingface.co")

Here are the relevant links from the Hugging Face website that could be included in a brochure about the company:

{
    "links": [
        {"type": "about page", "url": "https://huggingface.co/about"},
        {"type": "careers page", "url": "https://huggingface.co/careers"},
        {"type": "products page", "url": "https://huggingface.co/products"},
        {"type": "pricing page", "url": "https://huggingface.co/pricing"},
        {"type": "enterprise page", "url": "https://huggingface.co/enterprise"}
    ]
}

Explanation of the links:

* "/": The main webpage of Hugging Face.
* "/models": A link to the model directory on the website.
* "/datasets": A link to the dataset directory on the website.
* "/spaces": A link to the space directory on the website.
* "/docs": A link to the documentation section on the website.
* "/join": A link to the join page for Hugging Face, which includes information on how to become a member of the community.
* "/pricing": A link to the pricing page on t

{'links': []}

# Make the brochure

In [36]:
def get_all_details(url):
    result = "Landing Page:\n"
    result += Website(url).get_contents()
    links = get_links(url)
    print("Found Links:", links)
    for link in links["links"]:
        result += f"\n\n{link['type']}\n"
        result += Website(link["url"]).get_contents()
    return result

In [37]:
print(get_all_details("https://huggingface.co"))

Here are the relevant links for a brochure about Hugging Face:

{
"links": [
    {"type": "about page", "url": "https://www.huggingface.co/about"},
    {"type": "careers page", "url": "https://www.huggingface.co/careers"},
    {"type": "products page", "url": "https://www.huggingface.co/models"},
    {"type": "datasets page", "url": "https://www.huggingface.co/datasets"},
    {"type": "join page", "url": "https://apply.workable.com/huggingface"}
]
}

Explanation:

* The "About" page provides information about the company's history, mission, and values. It would be relevant to include this link in a brochure about Hugging Face.
* The "Careers" page lists available job opportunities at Hugging Face. This link would be relevant for a brochure targeting potential employees or employers.
* The "Models" page lists Hugging Face's AI models and their capabilities. This link would be relevant for a brochure targeting customers interested in using Hugging Face's products.
* The "Datasets" page p

In [38]:
system_prompt = "You are an assistant that analyzes the contents of several relevant pages from a company website \
and creates a short brochure about the company for prospective customers, investors and recruits. Respond in markdown.\
Include details of company culture, customers and careers/jobs if you have the information."

In [39]:
def get_brochure_user_prompt(company_name, url):
    user_prompt = f"You are looking at a company called: {company_name}\n"
    user_prompt += f"Here are the contents of its landing page and other relevant pages; use this information to build a short brochure of the company in markdown.\n"
    user_prompt += get_all_details(url)
    user_prompt = user_prompt[:5_000]
    return user_prompt

In [40]:
get_brochure_user_prompt("HuggingFace", "https://huggingface.co")

Here are the relevant web links for a brochure about Hugging Face:

{
    "links": [
        {"type": "about page", "url": "https://full.url/goes/here/about"},
        {"type": "careers page", "url": "https://another.full.url/careers"},
        {"type": "products page", "url": "https://another.full.url/products"}
    ]
}

The links that are most relevant for a brochure about Hugging Face are:

* /about: This link leads to the company's About page, which provides information on the company's history, mission, and values.
* /careers: The Careers page lists available job openings at Hugging Face and provides information on the company's culture and benefits.
* /products: The Products page showcases Hugging Face's offerings, including its transformer-based models and other AI tools.

The other links provided in the list are not relevant for a brochure about Hugging Face and have been excluded from the response.
Error parsing JSON from model response: Expecting value: line 1 column 1 (char 

'You are looking at a company called: HuggingFace\nHere are the contents of its landing page and other relevant pages; use this information to build a short brochure of the company in markdown.\nLanding Page:\nWebpage Title:\nHugging Face – The AI community building the future.\nWebpage Contents:\nHugging Face\nModels\nDatasets\nSpaces\nCommunity\nDocs\nEnterprise\nPricing\nLog In\nSign Up\nThe AI community building the future.\nThe platform where the machine learning community collaborates on models, datasets, and applications.\nExplore AI Apps\nor\nBrowse 1M+ models\nTrending on\nthis week\nModels\nxai-org/grok-2\nUpdated\n4 days ago\n•\n3.27k\n•\n783\nmicrosoft/VibeVoice-1.5B\nUpdated\nabout 4 hours ago\n•\n10.2k\n•\n715\nQwen/Qwen-Image-Edit\nUpdated\n3 days ago\n•\n51.2k\n•\n1.45k\ndeepseek-ai/DeepSeek-V3.1\nUpdated\n1 day ago\n•\n38.2k\n•\n610\nopenbmb/MiniCPM-V-4_5\nUpdated\nabout 4 hours ago\n•\n1.46k\n•\n434\nBrowse 1M+ models\nSpaces\nRunning\n12.3k\n12.3k\nDeepSite v2\n🐳\nGe

In [41]:
def create_brochure(company_name, url):
    payload = {
        "model": model,
        "messages": [
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": get_brochure_user_prompt(company_name, url)}
        ],
        "stream": False
    }
    response = requests.post(ollama_api, json=payload, headers=headers)
    result = response.json()
    display(Markdown(result["message"]["content"]))
    

In [42]:
create_brochure("HuggingFace", "https://huggingface.co")

Here are the relevant links from the Hugging Face website that could be included in a brochure about the company:

{
    "links": [
        {"type": "about page", "url": "https://www.huggingface.co/about"},
        {"type": "careers page", "url": "https://apply.workable.com/huggingface"},
        {"type": "products page", "url": "https://endpoints.huggingface.co"}
    ]
}

Explanation:

* /about: This link leads to the company's about page, which provides information on Hugging Face's mission, values, and team.
* /careers: This link leads to the company's careers page, where users can find and apply for job opportunities at Hugging Face.
* /endpoints: This link leads to the company's endpoints page, which provides information on the different APIs and services offered by Hugging Face.
* /models: This link leads to the company's models page, where users can find and learn about the different machine learning models available from Hugging Face.
* /datasets: This link leads to the company

Hugging Face is a community platform that brings together developers, researchers and businesses in the AI industry. Their mission is to create a space where professionals can collaborate on projects related to machine learning (ML). By providing an open-source stack, hosting and collaborating on public models, datasets, and applications, Hugging Face enables users to move faster and build better ML.

Key Features:

* Unlimited public models, datasets, and applications.
* Collaboration platform for developers, researchers, and businesses.
* Open-source stack for faster development.
* Inference Endpoints for deploying models.
* Spaces for updating applications.
* Paid Compute and Enterprise solutions for organizations.

Benefits:

* Faster development and deployment of ML projects.
* Access to a large community of ML professionals.
* Collaboration platform for sharing knowledge and ideas.
* Open-source stack for customization and flexibility.

Use Cases:

* Developers can use the platform to build and share models, datasets, and applications.
* Researchers can collaborate on projects and share resources.
* Businesses can use the platform to deploy ML solutions quickly and efficiently.

Target Audience:

* ML professionals (developers, researchers, data scientists).
* Businesses looking to deploy ML solutions.
* Organizations looking for a collaboration platform for their ML teams.

Jobs/Careers:
Hugging Face is always looking for talented individuals to join their team. They offer various job opportunities in areas such as software engineering, research and development, marketing, and more. If you're interested in working with Hugging Face, check out their careers page for current openings.

Press/Resources:
Hugging Face has a blog where they share news, updates, and insights related to the ML industry. They also provide resources such as documentation, tutorials, and forums for users to learn and stay up-to-date on the latest ML trends and technologies.

Social Media Links:
Hugging Face is active on various social media platforms, including GitHub, Twitter, LinkedIn, Discord, and more. Follow them to stay informed about their latest updates, news, and events.

Brochure:
Welcome to Hugging Face! Our platform brings together ML professionals from around the world, providing a space for collaboration, knowledge sharing, and innovation. With our open-source stack, inference endpoints, and spaces for updating applications, you'll be able to move faster and build better ML projects. Join our community today and start creating, discovering, and collaborating on ML projects!

In [43]:
def stream_brochure(company_name, url):
    payload = {
        "model": model,
        "messages": [
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": get_brochure_user_prompt(company_name, url)}
        ],
        "stream": True
    }
    stream = requests.post(ollama_api, json=payload, headers=headers, stream=True)
    
    response = ""
    display_handle = display(Markdown("Generating brochure..."), display_id=True)
    for chunk in stream.iter_lines():
        if chunk:
            try:
                chunk_json = json.loads(chunk.decode("utf-8"))
                # Adjust this line based on the actual response structure
                content = chunk_json.get("message", {}).get("content", "")
                response += content
                response = response.replace("```", "").replace("markdown", "")
                update_display(Markdown(response), display_id=display_handle.display_id)
            except Exception as e:
                # Optionally print or log the error
                print("error:-"+str(e))

In [None]:
# stream_brochure("Apple", "https://www.apple.com/")

Here are the relevant links from the Apple website that could be included in a brochure about the company:

* `/about`: The About page provides information about Apple's history, mission, and values.
* `/careers`: The Careers page lists job opportunities available at Apple.
* `/us/shop/goto/buy_iphone`: This link leads to the iPhone purchase page on the Apple website.
* `/ipad`: This link leads to the iPad product page on the Apple website.
* `/mac`: This link leads to the Mac product page on the Apple website.
* `/watch`: This link leads to the Apple Watch product page on the Apple website.
* `/airpods`: This link leads to the AirPods product page on the Apple website.
* `/us-edu/shop/goto/buy_accessories`: This link leads to the education store on the Apple website, where products can be purchased for educational use.
* `/wallet/apple-card`: This link leads to the Apple Card page on the Wallet app on the iPhone.
* `/apple-pay/`: This link leads to the Apple Pay page on the Apple webs

Apple - The Innovation Leader

Are you looking for a company that values creativity, innovation, and customer satisfaction? Look no further than Apple! With a wide range of products and services, Apple is revolutionizing various industries and changing the way we live, work, and play.

Experience the Future of Technology
Apple is known for its cutting-edge technology and sleek designs. From Macs and iPads to iPhones and Apple Watches, each product is crafted with precision and attention to detail. With Apple's seamless software integration, you can easily navigate and manage your devices, making life easier and more enjoyable.

Discover Unparalleled Customer Experience
At Apple, customer satisfaction is at the core of everything they do. Their commitment to quality and excellence is reflected in their products, services, and customer support. With Apple's intuitive user interface and robust ecosystem, you can effortlessly integrate your devices into your daily life, creating a truly seamless experience.

Invest in a Brighter Future
In addition to its innovative products, Apple is committed to making a positive impact on the environment and society. By using renewable energy sources, reducing waste, and supporting local communities, Apple is paving the way for a more sustainable future.

Join the Apple Team
Are you looking for a fulfilling career with a company that values creativity, innovation, and collaboration? Apple offers exciting opportunities in various fields, including engineering, design, marketing, and more. As an Apple employee, you'll be part of a dynamic and supportive community that is dedicated to changing the world through technology.

Conclusion:
Apple is not just a company - it's a movement. With its unwavering commitment to innovation, customer satisfaction, and sustainability, Apple is revolutionizing various industries and changing the way we live, work, and play. Whether you're an individual looking for the latest technology or a business seeking to integrate Apple products into your operations, there's never been a better time to join the Apple community. Explore our website or visit one of our many locations today!

In [44]:
stream_brochure("HuggingFace", "https://huggingface.co")

Here are the relevant links from the list provided for a brochure about Hugging Face:

{
"links": [
    {"type": "about page", "url": "https://huggingface.co/about"},
    {"type": "careers page", "url": "https://huggingface.co/careers"},
    {"type": "products page", "url": "https://huggingface.co/models"},
    {"type": "datasets page", "url": "https://huggingface.co/datasets"}
]
}

Explanation:

* / is not a relevant link for a brochure about the company, so it has been excluded.
* /models, /datasets, and /docs are relevant links related to the company's products and services, so they have been included in the list.
* /about, /careers, and /join/discord are relevant links related to the company's information and recruitment, so they have been included in the list.
* The remaining links are not relevant for a brochure about Hugging Face, so they have been excluded.
Error parsing JSON from model response: Expecting value: line 1 column 1 (char 0)
Found Links: {'links': []}


Hugging Face is a company that has created an online platform for the machine learning community to collaborate on models, datasets, and applications. On their landing page, they highlight their mission of building the future of AI and their platform's features, such as browsing 1 million+ models, exploring different modalities, creating and discovering new ML projects, and more. They also offer paid compute and enterprise solutions for organizations looking to accelerate their ML capabilities.

Here is a brochure summary of Hugging Face:

**Company Culture**

* Building the future of AI
* Collaborative platform for the machine learning community
* Open-source stack for faster development

**Customers and Careers/Jobs**

* 783 models
* 3.93k followers (AI at Meta, Amazon, Google, Intel, Microsoft, etc.)
* Team & Enterprise solutions for advanced platform (paid compute and enterprise)
* Job opportunities in engineering, research, and more

**Key Features**

* Browse 1 million+ models
* Explore different modalities (text, image, video, audio, or 3D)
* Create and discover new ML projects
* Collaborate on public models, datasets, and applications
* Optimized inference endpoints for faster deployment
* Paid compute and enterprise solutions for organizations

**Pricing**

* Starting at $0.60/hour for GPU (compute)
* Starting at $20/user/month (team & enterprise solutions)

**Resources**

* Blog
* Documentation
* Forum
* Service status
* Social media links (GitHub, Twitter, LinkedIn, Discord)

Overall, Hugging Face provides a platform for the machine learning community to collaborate and accelerate their development, with features such as browsing models, exploring different modalities, creating new projects, and more. They offer paid compute and enterprise solutions for organizations looking to further advance their ML capabilities.