In [84]:
import os
import requests
import json
from dotenv import load_dotenv
from bs4 import BeautifulSoup
from IPython.display import Markdown, display, update_display
from openai import OpenAI


In [60]:
# Load env vars
load_dotenv(override=True)
api_key = os.getenv("OPENAI_API_KEY")
openai = OpenAI()

headers = {
 "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
}

In [61]:
class Website():

    def __init__(self, url):
        response = requests.get(url,headers=headers)
        self.body = response.content
        soup = BeautifulSoup(self.body, 'html.parser')
        self.title = soup.title.string if soup.title else "No title found"

        for irrelevant in soup.body(["script", "style", "img", "input"]):
            irrelevant.decompose()
        self.text = soup.body.get_text(separator = "\n", strip=True)

        links = [link.get('href') for link in soup.find_all('a')]
        self.links = [link for link in links if link]

    def get_contents(self):
        return f"\nWebsite title: {self.title} \n\nWebsite contents: {self.text}\n\n"

In [62]:
ed = Website("http://edwarddonner.com")
#print(ed.links)

In [63]:
system_prompt = "You are going to be presented with a list of website links found on a webpage. \
You need to figure out which links are relevant to be put on a brochure of the company. \
You may include links such as About page, Careers page, Contact page etc.\n"

system_prompt += "You should respond in a JSON as in the below example: \n"

system_prompt +=  """
{
    "links": [
        {"type": "about page", "url": "https://full.url/goes/here/about"},
        {"type": "careers page": "url": "https://another.full.url/careers"}
    ]
}
"""

In [73]:
def get_user_prompt(website):
    user_prompt = f"Below is a list of links from the website {website.title}"
    user_prompt += "\nPlease select useful links from the list, which can be used in a brochure of the company.\
    Respond with the full https URL in JSON format. Ignore the links of Terms, emails, Privacy. Ignore the email addresses"
    user_prompt += "Links (some of the links may be relative links): \n"
    user_prompt += "\n".join(website.links)
    return user_prompt

In [74]:
def get_links(website):
    response = openai.chat.completions.create(
        model = "gpt-4o-mini",
        messages = [
            {"role": "system", "content": system_prompt},
            {"role":"user", "content":get_user_prompt(website)}           
        ],
        response_format = {"type":"json_object"}
    )
    result = response.choices[0].message.content
    return json.loads(result)

In [75]:
print(get_links(ed))

{'links': [{'type': 'home page', 'url': 'https://edwarddonner.com/'}, {'type': 'about page', 'url': 'https://edwarddonner.com/about-me-and-about-nebula/'}, {'type': 'blog page', 'url': 'https://edwarddonner.com/posts/'}, {'type': 'linkedin profile', 'url': 'https://www.linkedin.com/in/eddonner/'}, {'type': 'twitter profile', 'url': 'https://twitter.com/edwarddonner'}, {'type': 'facebook profile', 'url': 'https://www.facebook.com/edward.donner.52'}]}


In [66]:
def get_all_details(website):
    result = f" \n Landing page: \n"
    result += website.get_contents()
    links = get_links(website)
    for link in links['links']:
        result += f"\n {link['type']}:\n"
        result += Website(link['url']).get_contents()
    return result

In [67]:
system_prompt_2 = "You are an assistant that analyzes the contents of several relevant pages from a company website \
and creates a short brochure about the company for prospective customers, investors and recruits. Respond in markdown.\
Include details of company culture, customers and careers/jobs if you have the information."

In [68]:
def get_user_prompt_2(company_name, website):
    user_prompt = f"You are looking at a company called: {company_name}\n"
    user_prompt += f"Here are the contents of its landing page and other relevant pages; use this information to build a short brochure of the company in markdown.\n"
    user_prompt += get_all_details(website)
    user_prompt = user_prompt[:5000]
    return user_prompt
    

In [71]:
def create_brochure(company_name, website):
    response = openai.chat.completions.create(
        model = "gpt-4o-mini",
        messages = [
            {"role": "system", "content":system_prompt_2},
            {"role": "user", "content":get_user_prompt_2(company_name, website)}
        ]
    )
    result= response.choices[0].message.content
    display(Markdown(result))

In [85]:
def stream_brochure(company_name, website):
    stream = openai.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {"role": "system", "content": system_prompt_2},
            {"role": "user", "content": get_user_prompt_2(company_name, website)}
          ],
        stream=True
    )
    
    response = ""
    display_handle = display(Markdown(""), display_id=True)
    for chunk in stream:
        response += chunk.choices[0].delta.content or ''
        response = response.replace("```","").replace("markdown", "")
        update_display(Markdown(response), display_id=display_handle.display_id)

In [86]:
ani = Website("https://www.anthropic.com/")
stream_brochure("Anthropic AI ", ani)

# Anthropic AI Brochure

Welcome to **Anthropic**, an AI research and safety company committed to building reliable and interpretable AI systems.

---

## Our Mission

At Anthropic, we believe that AI will have a transformative impact on the world. Our goal is to create AI systems that not only enhance human capabilities but can also be relied upon for safety and ethical considerations.

### Core Principles:
- **Safety as a Science**: We conduct systematic research into AI safety, applying our findings to develop safe and reliable AI solutions.
- **Interdisciplinary Collaboration**: Our team consists of researchers, engineers, policy experts, and operational leaders, allowing us to approach complex AI challenges from multiple angles.

---

## Our Products

### Claude AI

Discover **Claude**, our flagship AI model designed for versatility and safety. With the recent launch of **Claude 3.5 Sonnet** and **Claude 3.5 Haiku**, we provide cutting-edge capabilities for businesses looking to integrate AI into their operations. 

### API Development

Build your own AI applications with our easy-to-use **Claude API**. Create custom experiences that cater to unique business needs.

### Enterprise Solutions

**Claude for Enterprise** offers organizations the tools to deploy reliable AI solutions tailored to their industry specifications.

---

## Our Customers

We cater to a diverse range of customers, including:
- Enterprises seeking to harness AI for productivity and innovation.
- Developers interested in building on our API.
- Organizations focused on promoting safe and ethical AI usage.

---

## Join Our Team

At Anthropic, we are always on the lookout for passionate individuals eager to make a difference in the AI landscape. Our culture promotes collaboration and a shared mission of safety and accountability.

### Open Positions
Explore a variety of roles across research, engineering, policy, and product development. We invite you to contribute your unique skills to shape the future of AI.

---

## Company Culture

We pride ourselves on a culture of:
- **Collaboration**: We believe that the best ideas come from diverse perspectives, fostering teamwork across all levels.
- **Innovation**: Continuous learning and exploration are central to our identity. We encourage experimentation and creativity in our research.
- **Commitment to Safety**: Everyone at Anthropic is dedicated to prioritizing safety and ethical considerations in all of our work.

---

### Connect with Us

- [Twitter](https://twitter.com/anthropic) | [LinkedIn](https://www.linkedin.com/company/anthropic) | [YouTube](https://www.youtube.com/anthropic) 

Join us in our mission to build safer AI applications for a better future.

---

**Anthropic**â€”Where AI safety meets innovation.