In [11]:
import os
import requests
from dotenv import load_dotenv
from bs4 import BeautifulSoup
from IPython.display import Markdown, display
from openai import OpenAI

In [12]:
load_dotenv()
api_key = os.getenv('OPENAI_API_KEY')
openai = OpenAI()

In [13]:
headers = {
 "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
}

class Website:
    url: str
    title: str
    text: str

    def __init__(self, url):
        self.url = url
        response = requests.get(url, headers=headers)
        soup = BeautifulSoup(response.content, 'html.parser')
        self.title = soup.title.string if soup.title else "No title found"
        for irrelevant in soup.body(["script", "style", "img", "input"]):
            irrelevant.decompose()
        self.text = soup.body.get_text(separator="\n", strip=True)


In [14]:
def user_prompt_for(website):
    user_prompt = f"You are looking at a website titled {website.title}"
    user_prompt += "\nThe contents of this website is as follows; \
please provide a short summary of this website in markdown. \
If it includes news or announcements, then summarize these too.\n\n"
    user_prompt += website.text
    return user_prompt


In [15]:
def messages_for(website):

    system_prompt = "You are an assistant that analyzes the contents of a website \
and provides a short summary, ignoring text that might be navigation related. \
Respond in markdown."
    return [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt_for(website)}
    ]


In [16]:
def summarize(url):
    website = Website(url)
    response = openai.chat.completions.create(
        model = "gpt-4o-mini",
        messages = messages_for(website)
    )

    return response.choices[0].message.content

In [17]:
def display_summary(url):
    summary = summarize(url)
    display(Markdown(summary))


In [18]:
# url = "https://www.prosigmaka.com"
# url = "https://www.prodemy.id"
# url = "http://www.detik.com"
# url = "http://www.tempo.co"
url = "https://www.kompas.com/"


display_summary(url)

# Summary of Kompas.com

Kompas.com is a prominent news website that provides the latest, accurate, and trustworthy news in Indonesia. The website covers a wide range of topics including national news, politics, health, technology, sports, and lifestyle. It serves as a platform for various news categories, such as regional updates, corporate news, educational content, and travel insights.

## Recent Headlines:
1. **Political Updates**: The site features articles on recent surveys revealing popular political figures and ongoing controversies regarding public figure actions, including Mendikti Satryo's response to accusations.
2. **Public Health Initiatives**: Announcements about free health check-up programs spearheaded by the Ministry of Health have been highlighted.
3. **Environmental Issues**: Coverage on the continuing demolition of illegal sea fences in Tangerang despite requests from the Minister of Maritime Affairs and Fisheries to halt the process.
4. **Economic Insights**: Discussions surrounding the rising Bitcoin prices and their connection to the recent political climate.
5. **Social Affairs**: Stories about public responses to community hardships, such as a resident's struggles that highlight broader economic challenges.

Overall, Kompas.com aims to inform readers with comprehensive news and analysis on current events affecting Indonesia and the world.