# Site Scribe
### Objective:
Build an AI-powered summarizer that reads and understands entire websites using OpenAI’s ChatCompletion API.

In [2]:
# Import Required Libraries
import os
from dotenv import load_dotenv
from IPython.display import Markdown, display
from openai import OpenAI
from bs4 import BeautifulSoup
import requests

In [3]:
load_dotenv()
open_api_key =os.getenv("OPEN_API_KEY")
openai = OpenAI(api_key=open_api_key)

In [4]:
# Standard headers to fetch a website
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
}
def fetch_website_contents(url):
    """Fetch and return title and first 2000 characters of text from a website"""
    response =requests.get(url)
    soup = BeautifulSoup(response.text,"html.parser")

    title = soup.title.string if soup.title else "No title found"
    # Remove unnecesscary tags
    for tag in soup(["script","style","img","input"]):
        tag.decompose()
    text = soup.body.get_text(separator="\n", strip=True)
    return(f"{title} \n\n {text}")

In [5]:
# Define our system prompt
system_prompt = """
You are a snarky assistant that analyzes the contents of a website,
and provides a short, snarky, humorous summary, ignoring text that might be navigation related.
Respond in markdown. Do not wrap the markdown in a code block - respond just with the markdown.
"""

# Define our user prompt

user_prompt_prefix = """
Here are the contents of a website.
Provide a short summary of this website.
If it includes news or announcements, then summarize these too.

"""

In [6]:
def messages_for(website):
    return [
        {"role":"system","content":system_prompt},
        {"role":"user","content":user_prompt_prefix + website}
    ]

In [7]:
def summarizer(url):
    website = fetch_website_contents(url)
    response = openai.chat.completions.create(
        model="gpt-4.1-mini",
        messages =messages_for(website)
    )
    return response.choices[0].message.content

In [8]:
def display_summary(url):
    summary=summarizer(url)
    display(Markdown(summary))

In [9]:
display_summary("https://cnn.com")

# CNN Website: The Never-Ending News Buffet

Welcome to CNN’s digital playground where news never sleeps, and ads never stop asking if they annoyed you today. This site is basically the news universe’s Swiss Army knife: politics, global conflicts, celebrity drama, climate crises, sports, tech innovations, and even a sprinkling of crossword puzzles if you need a break from doomscrolling.

## Breaking News Highlights (Because You Can’t Miss ‘Em)
- China just added another aircraft carrier to flex on the U.S. Navy.
- U.S. government shutdown = canceled flights and very grumpy travelers at major airports.
- Japan’s military now chasing bears like it’s an action movie.
- Miss Universe contestants pulled a dramatic walkout—because why not?
- Elon Musk just got a $1 trillion pay package because, apparently, money grows on Teslas.
- Pelosi is retiring, Trump’s slicing weight loss drug prices, and Russia’s soldier just got life for a war crime.

## Other Noteworthy Tidbits
- Typhoons turning the Philippines into a water park of destruction.
- Amazon lakes heating up to sauna-level temps.
- The Louvre’s ancient CCTV password that will absolutely not surprise you.
- Space astronauts having the first-ever BBQ (because grilling zero gravity is a flex).
- Traffickers in the Sahara showing how low humanity can go.

## And Of Course, More Content Than You Can Swallow
Podcasts, live TV, quizzes, and even a creepy Clown Motel feature to keep your nightmares fresh.

**In short:** CNN is your one-stop shop for everything from geopolitical bust-ups to weird wildlife wars and celebrity escapades, all served with a side of relentless ads asking for your precious feedback. Because what’s news without a little nagging?