In [136]:
import ollama
from bs4 import BeautifulSoup
import requests
import json
from IPython.display import Markdown, display
import time

In [137]:
def fetch_website(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')
    title = soup.title.string if soup.title else 'No title found'
    if soup.body:
        for irrelevant in soup.body(['script', 'style', 'img', 'input']):
            irrelevant.decompose()
        text = soup.body.get_text(separator='\n', strip=True)
    else:
        text = ''
    links = [link.get('href') for link in soup.find_all('a')]
    return (title + '\n\n' + text)[:2_000], [link for link in links]


In [138]:
def get_links_user_prompt(url):
    user_prompt = f"""
Here is the list of links on the website {url} -
Please decide which of these are relevant web links for a brochure about the company, 
respond with the full https URL in JSON format.
Do not include Terms of Service, Privacy, email links.

Links (some might be relative links):

"""
    links = fetch_website(url)[1]
    user_prompt += "\n".join(links)
    return user_prompt

In [139]:
url = 'https://anthropic.com'

link_system_prompt = """
You are provided with a list of links found on a webpage.
You are able to decide which of the links would be most relevant to include in a brochure about the company,
such as links to an About page, or a Company page, or Careers/Jobs pages.
You should respond in JSON as in this example and nothing else:

{
    "links": [
        {"type": "about page", "url": "https://full.url/goes/here/about"},
        {"type": "careers page", "url": "https://another.full.url/careers"}
    ]
}
"""

message = [
    {'role':'system', 'content': link_system_prompt},
    {'role': 'user', 'content': get_links_user_prompt(url)}
]

In [140]:
def llm_relevant_links():
    response = ollama.chat(model='llama3.1', messages=message, stream=False)
    result = response.message.content
    links = json.loads(result)
    return links

In [141]:
def fetch_page_and_all_relevant_links():
    contents = fetch_website(url)[0]
    relevant_links = llm_relevant_links()
    result = f'## Landing Page:\n\n{contents}\n## Relevant Links:\n'
    for link in relevant_links['links']:
        result += f'\n\n### Link: {link['type']}\n'
        result += fetch_website(link['url'])[0]
    return result

In [142]:
# brochure_system_prompt = """
# You are an assistant that analyzes the contents of several relevant pages from a company website
# and creates a short brochure about the company for prospective customers, investors and recruits.
# Respond in markdown without code blocks.
# Include details of company culture, customers and careers/jobs if you have the information.
# """

# Or uncomment the lines below for a more humorous brochure - this demonstrates how easy it is to incorporate 'tone':

brochure_system_prompt = """
You are an assistant that analyzes the contents of several relevant pages from a company website
and creates a short, humorous, entertaining, witty brochure about the company for prospective customers, investors and recruits.
Respond in markdown without code blocks.
Include details of company culture, customers and careers/jobs if you have the information.
"""

In [None]:
def get_brochure_user_prompt(company_name):
    user_prompt = f"""
You are looking at a company called: {company_name}
Here are the contents of its landing page and other relevant pages;
use this information to build a short brochure of the company in markdown without code blocks.\n\n
"""
    user_prompt += fetch_page_and_all_relevant_links()
    user_prompt = user_prompt[:5_000] # Truncate if more than 5,000 characters
    return user_prompt

In [144]:
response = ollama.chat(model='llama3.1',
                messages=[
                    {'role': 'system', 'content': brochure_system_prompt},
                    {'role': 'user', 'content': get_brochure_user_prompt('anthropic')}
                ],
                stream=True)
# create a display placeholder
display_handle = display(Markdown(""), display_id=True)

full_text = ""
for chunk in response:
    full_text += chunk['message']['content']
    # update the same output cell in real-time
    display_handle.update(Markdown(full_text))
    time.sleep(0.05)  # optional small delay to smooth rendering

**Welcome to Anthropic: Where AI Meets Humanity**

At Anthropic, we're on a mission to create AI that benefits humanity's long-term well-being. Our team of experts is dedicated to pushing the boundaries of what's possible with artificial intelligence while ensuring its safe and responsible use.

**Our Approach**

We believe that designing powerful technologies requires both bold steps forward and intentional pauses to consider the effects. That's why we're committed to transparency, accountability, and continuous learning as we develop our AI research and products.

**Meet Claude: The Best Model in the World**

Claude is our flagship model, designed for agents, coding, and computer use. With its unparalleled capabilities, Claude is a game-changer for anyone looking to unlock the full potential of AI.

**Our Culture**

At Anthropic, we value collaboration, creativity, and a passion for learning. Our team is comprised of talented individuals from diverse backgrounds who share our vision of creating AI that benefits humanity. We're always looking for like-minded individuals to join us on this journey.

**Careers at Anthropic**

Join our team of innovators and contribute to the development of cutting-edge AI research and products. Check out our Careers page to learn more about our current openings and how you can be a part of our mission.

**Get Involved**

Stay up-to-date with the latest news, updates, and announcements from Anthropic. Follow us on social media or sign up for our newsletter to stay informed about our progress and initiatives.

**Let's Shape the Future Together**

At Anthropic, we're committed to creating a future where AI is a force for good. Join us in our mission to secure the benefits of AI while mitigating its risks. Let's shape the future together!