In [2]:
import os
import requests
from dotenv import load_dotenv
from bs4 import BeautifulSoup
from IPython.display import Markdown, display
from openai import OpenAI

In [3]:
load_dotenv(override=True)
api_key = os.getenv('OPENAI_API_KEY')

if not api_key:
    print("No API key was found - please head over to the troubleshooting notebook in this folder to identify & fix!")
elif not api_key.startswith("sk-proj-"):
    print("An API key was found, but it doesn't start sk-proj-; please check you're using the right key - see troubleshooting notebook")
elif api_key.strip() != api_key:
    print("An API key was found, but it looks like it might have space or tab characters at the start or end - please remove them - see troubleshooting notebook")
else:
    print("API key found and looks good so far!")

API key found and looks good so far!


In [4]:
openai = OpenAI()

In [5]:
headers = {
 "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
}

class Website:

    def __init__(self, url):
        """
        Create this Website object from the given url using the BeautifulSoup library
        """
        self.url = url
        response = requests.get(url, headers=headers)
        soup = BeautifulSoup(response.content, 'html.parser')
        self.title = soup.title.string if soup.title else "No title found"
        for irrelevant in soup.body(["script", "style", "img", "input"]):
            irrelevant.decompose()
        self.text = soup.body.get_text(separator="\n", strip=True)

In [6]:
cnn_news = Website("https://cnn.com")
print(cnn_news.title)
print(cnn_news.text)

Breaking News, Latest News and Videos | CNN
CNN values your feedback
1. How relevant is this ad to you?
2. Did you encounter any technical issues?
Video player was slow to load content
Video content never loaded
Ad froze or did not finish loading
Video content did not start after ad
Audio on ad was too loud
Other issues
Ad never loaded
Ad prevented/slowed the page from loading
Content moved around while ad loaded
Ad was repetitive to ads I've seen previously
Other issues
Cancel
Submit
Thank You!
Your effort and contribution in providing this feedback is much
                                        appreciated.
Close
Ad Feedback
Close icon
US
World
Politics
Business
Health
Entertainment
Style
Travel
Sports
Science
Climate
Weather
Ukraine-Russia War
Israel-Hamas War
Games
More
US
World
Politics
Business
Health
Entertainment
Style
Travel
Sports
Science
Climate
Weather
Ukraine-Russia War
Israel-Hamas War
Games
Watch
Listen
Live TV
Subscribe
Sign in
My Account
Settings
Newsletters
Topics yo

In [7]:
system_prompt = "You are an assistant that analyzes the contents of a website \
and provides a short summary, ignoring text that might be navigation related. \
Respond in markdown."

In [8]:
def user_prompt_for(website):
    user_prompt = f"You are looking at a website titled {website.title}"
    user_prompt += "\nThe contents of this website is as follows; \
please provide a short summary of this website in markdown. \
If it includes news or announcements, then summarize these too.\n\n"
    user_prompt += website.text
    return user_prompt

In [9]:
print(user_prompt_for(cnn_news))

You are looking at a website titled Breaking News, Latest News and Videos | CNN
The contents of this website is as follows; please provide a short summary of this website in markdown. If it includes news or announcements, then summarize these too.

CNN values your feedback
1. How relevant is this ad to you?
2. Did you encounter any technical issues?
Video player was slow to load content
Video content never loaded
Ad froze or did not finish loading
Video content did not start after ad
Audio on ad was too loud
Other issues
Ad never loaded
Ad prevented/slowed the page from loading
Content moved around while ad loaded
Ad was repetitive to ads I've seen previously
Other issues
Cancel
Submit
Thank You!
Your effort and contribution in providing this feedback is much
                                        appreciated.
Close
Ad Feedback
Close icon
US
World
Politics
Business
Health
Entertainment
Style
Travel
Sports
Science
Climate
Weather
Ukraine-Russia War
Israel-Hamas War
Games
More
US
World


In [10]:
def messages_for(website):
    return [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt_for(website)}
    ]

In [13]:
messages_for(cnn_news)

[{'role': 'system',
  'content': 'You are an assistant that analyzes the contents of a website and provides a short summary, ignoring text that might be navigation related. Respond in markdown.'},
 {'role': 'user',
  'content': "You are looking at a website titled Breaking News, Latest News and Videos | CNN\nThe contents of this website is as follows; please provide a short summary of this website in markdown. If it includes news or announcements, then summarize these too.\n\nCNN values your feedback\n1. How relevant is this ad to you?\n2. Did you encounter any technical issues?\nVideo player was slow to load content\nVideo content never loaded\nAd froze or did not finish loading\nVideo content did not start after ad\nAudio on ad was too loud\nOther issues\nAd never loaded\nAd prevented/slowed the page from loading\nContent moved around while ad loaded\nAd was repetitive to ads I've seen previously\nOther issues\nCancel\nSubmit\nThank You!\nYour effort and contribution in providing thi

In [17]:
def summarize(url):
    website = Website(url)
    response = openai.chat.completions.create(
        model = "gpt-4o-mini",
        messages = messages_for(website)
    )
    return response.choices[0].message.content

In [18]:
summarize("https://cnn.com")

"# CNN Website Summary\n\nCNN provides a variety of breaking news and in-depth analysis on global events across multiple categories, including:\n\n- **US News**: Coverage of significant happenings in the United States, including updates on political developments, business news, and health-related topics.\n- **World News**: Insights and reports on international affairs, including ongoing conflicts such as the Ukraine-Russia war and the Israel-Hamas war.\n- **Politics**: Extensive reporting on elections, political figures, and related events, including coverage of former President Trump's activities and legal challenges.\n- **Business and Technology**: Updates on market trends, economic analyses, and innovations in technology.\n- **Health and Science**: Articles exploring medical research, health tips, and scientific discoveries.\n\n### Recent Highlights:\n- **International Affairs**: Reports indicate that Israel's diplomatic situation is worsening, with heightened isolation on the world

In [19]:
def display_summary(url):
    summary = summarize(url)
    display(Markdown(summary))

In [20]:
display_summary("https://cnn.com")

# Summary of CNN Website

The CNN website provides a wide range of breaking news, feature stories, videos, and analysis. It covers various topics including U.S. and world news, politics, business, health, entertainment, science, and the environment. Key headlines from recent news included:

- **Israel's Isolation**: Israel is facing increasing isolation on the global stage, especially amid ongoing conflicts.
- **Ukraine-Russia War**: A significant Russian drone attack resulted in casualties in Kyiv. The situation has prompted heightened military readiness in neighboring Poland.
- **Trump and the Arts**: Trump has announced a controversial initiative targeting the arts, which has sparked significant discussion about its implications.
- **Natural Disasters**: Hurricane Humberto was recently upgraded to a Category 5, with additional weather events being monitored.

In addition to news articles, the site features multimedia content, including video updates and analyses from various contributors. It also emphasizes user engagement, inviting feedback on ads and technical issues. 

The site serves as a comprehensive platform for staying informed about current events and important global issues.

In [21]:
display_summary("https://anthropic.com")

# Summary of Anthropic Website

Anthropic is a public benefit corporation focused on advancing artificial intelligence (AI) with an emphasis on safety and human benefit. The company aims to mitigate the risks associated with AI while maximizing its benefits.

## Key Areas

- **Research & Economic Futures**: Anthropic conducts research to understand AI's impact on the labor market and the economy through initiatives like the Anthropic Economic Index.
  
- **Products**: The company's main product line includes Claude, a family of AI models designed to be user-friendly and beneficial. This includes Claude Console, Claude Code, and the Claude Developer Platform.

- **AI Safety**: The organization places significant importance on AI safety, advocating for responsible AI development practices through policies and public discourse.

- **Learning**: Anthropic Academy is dedicated to educating users on how to build and utilize AI technologies safely and responsibly.

## News & Announcements

- **Claude Updates**: Information about the introduction of Claude 4 and updates to its product offerings.
  
- **Project Vend**: Announced on August 5, 2025, suggesting ongoing development initiatives.
  
- **Reports**: Publication of the latest Anthropic Economic Index report which provides insights into the incorporation of AI into various economic sectors.

The overall mission of Anthropic revolves around harnessing AI for the long-term benefit of humanity while addressing its challenges and implications in society.