In [1]:
import os
import requests
import json
from typing import List
from dotenv import load_dotenv
from bs4 import BeautifulSoup
from IPython.display import Markdown, display, update_display
from openai import OpenAI

In [6]:
headers = {
 "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
}

class Website:
    """
    A utility class to represent a Website that we have scraped, now with links
    """

    def __init__(self, url):
        self.url = url
        response = requests.get(url, headers=headers)
        self.body = response.content
        soup = BeautifulSoup(self.body, 'html.parser')
        self.title = soup.title.string if soup.title else "No title found"
        if soup.body:
            for irrelevant in soup.body(["script", "style", "img", "input"]):
                irrelevant.decompose()
            self.text = soup.body.get_text(separator="\n", strip=True)
        else:
            self.text = ""
        links = [link.get('href') for link in soup.find_all('a')]
        self.links = [link for link in links if link]

    def get_contents(self):
        return f"Webpage Title:\n{self.title}\nWebpage Contents:\n{self.text}\n\n"

In [101]:
r = Website("https://www.principles.com/")
ed.links

['./',
 'https://www.facebook.com/raydalio/',
 'https://twitter.com/RayDalio',
 'https://www.linkedin.com/in/raydalio',
 'https://www.instagram.com/principles/?hl=en',
 'https://www.tiktok.com/@principlesbyraydalio',
 'https://www.threads.net/@raydalio',
 'https://economicprinciples.org/',
 'https://youtu.be/xguam0TKMw8',
 '#ted-talk-video',
 'https://youtu.be/PHe0bXAIuk0',
 '/principles-for-success',
 'https://apps.apple.com/us/app/principles-in-action/id1211294305',
 'https://play.google.com/store/apps/details?id=com.principles.pia',
 'https://principlesyou.com/',
 'https://principlesus.com/',
 'https://principles.com/redirect',
 'https://www.facebook.com/raydalio/',
 'https://twitter.com/RayDalio',
 'https://www.linkedin.com/in/raydalio',
 'https://www.instagram.com/principles/?hl=en',
 'https://www.tiktok.com/@principlesbyraydalio',
 'https://www.youtube.com/@principlesbyraydalio',
 'https://www.threads.net/@raydalio',
 'https://www.bridgewater.com/',
 'https://oceanx.org/',
 'http

In [42]:
link_system_prompt = "You are provided with a list of links found on a webpage. \
You are able to decide which of the links would be most relevant to include in a brochure about the company, \
such as links to an About page, or a Company page, or Careers/Jobs pages.\n"
link_system_prompt += "You should respond in JSON as in this example:"
link_system_prompt += """
{
    "links": [
        {"type": "about page", "url": "https://full.url/goes/here/about"},
        {"type": "careers page": "url": "https://another.full.url/careers"}
    ]
}
"""
print(link_system_prompt)

You are provided with a list of links found on a webpage. You are able to decide which of the links would be most relevant to include in a brochure about the company, such as links to an About page, or a Company page, or Careers/Jobs pages.
You should respond in JSON as in this example:
{
    "links": [
        {"type": "about page", "url": "https://full.url/goes/here/about"},
        {"type": "careers page": "url": "https://another.full.url/careers"}
    ]
}



In [None]:
def get_links_user_prompt(website):
    user_prompt = f"Here is the list of links on the website of {website.url} - "
    user_prompt += "please decide which of these are relevant web links for a brochure about the company, respond with the full https URL in JSON format. \
Do not include Terms of Service, Privacy, email links.\n"
    user_prompt += "Links (some might be relative links):\n"
    user_prompt += "\n".join(website.links)
    return user_prompt

print(get_links_user_prompt(rd))

In [44]:
MODEL_LLAMA = 'gemma2:2b'

def get_links(url):
    website = Website(url)
    openai = OpenAI(base_url='http://localhost:11434/v1', api_key='ollama')
    response = openai.chat.completions.create(
        model=MODEL_LLAMA,
        messages=[
            {"role": "system", "content": link_system_prompt},
            {"role": "user", "content": get_links_user_prompt(website)}
      ],
        response_format={"type": "json_object"}
    )
    result = response.choices[0].message.content
    return result

In [None]:
huggingface = Website("https://huggingface.co")
huggingface.links
get_links("https://huggingface.co")

In [46]:
def make_links(url):
    links = get_links(url)
    return links

def get_all_details(url, links):
    result = "Landing page:\n"
    result += Website(url).get_contents()
    for link in links["links"]:
        result += Website(link).get_contents()
    return result

In [60]:
url = "https://anthropic.com"
output = make_links(url)

In [83]:
output = output[:542]
# extracted structure to use in function
output = {
    "links": [
        "https://www.anthropic.com/company", 
        "https://www.anthropic.com/research", 
        "https://www.anthropic.com/claude",
        "https://www.anthropic.com/careers", 
        "https://www.linkedin.com/company/anthropicresearch", 
        "https://twitter.com/AnthropicAI", 
        "https://www.youtube.com/@anthropic-ai", 
        "https://trust.anthropic.com/", 
        "https://support.anthropic.com/",
        "https://status.anthropic.com/",  
        "https://www.antropic.com/claude"
    ]  }   
output

{'links': ['https://www.anthropic.com/company',
  'https://www.anthropic.com/research',
  'https://www.anthropic.com/claude',
  'https://www.anthropic.com/careers',
  'https://www.linkedin.com/company/anthropicresearch',
  'https://twitter.com/AnthropicAI',
  'https://www.youtube.com/@anthropic-ai',
  'https://trust.anthropic.com/',
  'https://support.anthropic.com/',
  'https://status.anthropic.com/',
  'https://www.antropic.com/claude']}

In [88]:
sumarized_output = get_all_details(url, output)

Landing page:
Webpage Title:
Home \ Anthropic
Webpage Contents:
Claude
Overview
Team
Enterprise
API
Pricing
Research
Company
Careers
News
Try Claude
AI
research
and
products
that put safety at the frontier
Claude.ai
Meet Claude 3.5 Sonnet
Claude 3.5 Sonnet, our most intelligent AI model, is now available.
Talk to Claude
API
Build with Claude
Create AI-powered applications and custom experiences using Claude.
Learn more
Announcements
Introducing computer use, a new Claude 3.5 Sonnet, and Claude 3.5 Haiku
Oct 22, 2024
Model updates
3.5 Sonnet
3.5 Haiku
Our Work
Product
Claude for Enterprise
Sep 4, 2024
Alignment
·
Research
Constitutional AI: Harmlessness from AI Feedback
Dec 15, 2022
Announcements
Core Views on AI Safety: When, Why, What, and How
Mar 8, 2023
Work with Anthropic
Anthropic is an AI safety and research company based in San Francisco. Our interdisciplinary team has experience across ML, physics, policy, and product. Together, we generate research and create reliable, benefic

In [89]:
system_prompt = "You are an assistant that analyzes the contents of several relevant pages from a company website \
and creates a short brochure about the company for prospective customers, investors and recruits. Respond in markdown.\
Include details of company culture, customers and careers/jobs if you have the information."

def get_brochure_user_prompt(company_name, brochure_output):
    user_prompt = f"You are looking at a company called: {company_name}\n"
    user_prompt += f"Here are the contents of its landing page and other relevant pages; use this information to build a short brochure of the company in markdown.\n"
    user_prompt += brochure_output
    user_prompt = user_prompt[:5_000] # Truncate if more than 5,000 characters
    return user_prompt

In [90]:
brochure_output = sumarized_output

In [97]:
def create_brochure(company_name, brochure_output):
    ollama_via_openai = OpenAI(base_url='http://localhost:11434/v1', api_key='ollama')
    
    response = ollama_via_openai.chat.completions.create(
        model=MODEL_LLAMA,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": get_brochure_user_prompt(company_name, brochure_output)}
          ],
    )
    result = response.choices[0].message.content
    display(Markdown(result))



In [99]:
create_brochure("Anthropic", brochure_output)

## Anthropic: Building Safer AI that Creates a Better World

**About Anthropic:**

Established on safe and interpretable AI systems lies at the heart of Anthropic's mission. We're more than just an AI company – we believe AI holds transformative power, but its safety is paramount. 

Our interdisciplinary team combines expertise in ML, physics, policy, product development, and research to ensure reliable AI systems that benefit people and society.

**Our Approach:**

* **Commitment to Safety:** We treat AI safety as a systematic science, conducting rigorous research, applying it rigorously to our products, and sharing what we learn to advance global dialogue.
* **Research Driven:** Our cutting-edge research fuels the development of Claude 3.5 Sonnet and Claude 3.5 Haiku. Exploring novel research in interpretability, reinforcement learning from human feedback, policy, and societal impact analysis, we push boundaries for AI's future.
* **Building for Impact:** From Claude, our advanced language model capable of powering AI-enhanced applications, to partnerships with businesses, organizations, and researchers globally, we aim to deliver real-world solutions.

**Our Team:**
We are a collaborative team composed of researchers, engineers, policy experts, business leaders, and operational specialists from diverse backgrounds, all working together to build reliable and understandable AI systems. Our values of trust, communication, and collaboration drive innovation.


**Join Our Journey:**
Explore careers at Anthropic. We invite individuals passionate about making impactful contributions to the AI field to join our team!

**(Visit our Careers page for open positions!)** 
**Connect with us:**
* Website: [link to website]
* Twitter: [link to twitter]


 **Don't just read it – experience Anthropic!**  Give Claude a try and explore the power of safer AI.


In [106]:
def create_brochure_via_stream(company_name, brochure_output):
    ollama_via_openai = OpenAI(base_url='http://localhost:11434/v1', api_key='ollama')
    
    response = ollama_via_openai.chat.completions.create(
        model=MODEL_LLAMA,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": get_brochure_user_prompt(company_name, brochure_output)}
          ],
        stream=True
    )
    for chunk in response:
        print(chunk.choices[0].delta.content, end='')

create_brochure_via_stream("Anthropic", brochure_output)

##  Anthropic: Building Safer AI For Everyone

**Making a real impact in AI safety:** Anthropic believes that AI holds immense potential to reshape our world,** but that potential comes with risks. That's why we strive to build **reliable, interpretable, and steerable AI systems**. We believe **safety is paramount**, and it's at the forefront of everything we do.

**Here's how:**

Building Trustworthy AI Systems:**  We develop cutting-edge AI models like Claude, a powerful chatbot designed with safety as its core focus
Solving Real Problems:** Our research covers cutting edge areas in AI safetly, including interpretability and human feedback integration 
Encouraging Collaboration:** We embrace collaboration with industry leaders, academics, policymakers, and civil society organizations to ensure global-scale progress.

**Who we are:**

* **People first:**  We value a diverse, interdisciplinary team bringing their unique experience from different backgrounds - researchers, engineers, po