In [5]:
# Required setup
# python -m venv .venv
# source .venv/bin/activate
# pip install -r requirements.txt

In [5]:
import os
import requests
from dotenv import load_dotenv
from bs4 import BeautifulSoup
from IPython.display import Markdown, display
from openai import OpenAI

In [6]:
load_dotenv()
api_key = os.getenv('OPENAI_API_KEY')

In [7]:
openai = OpenAI()

In [8]:
class WebsiteFetcher:
    
    def __init__(self, url):
        self.url = url
        response = requests.get(url)
        soup = BeautifulSoup(response.content, 'html.parser')
        self.title = soup.title.string if soup.title else "Title not found"
        for irrelevant in soup.body(["script", "style", "img", "input"]):
            irrelevant.decompose()
        self.text = soup.body.get_text(separator="\n", strip=True)

In [15]:
content = WebsiteFetcher("https://fiance.yahoo.com")
print(content.title)
print(content.text)


Yahoo Finance - Stock Market Live, Quotes, Business & Finance News
News
Today's news
US
Politics
World
Tech
Reviews and deals
Audio
Computing
Gaming
Health
Home
Phones
Science
TVs
Climate change
Health
Science
Originals
The 360
Newsletters
Life
Health
COVID-19
Fall allergies
Health news
Mental health
Relax
Sexual health
Studies
The Unwind
Parenting
Family health
So mini ways
Style and beauty
It Figures
Unapologetically
Horoscopes
Shopping
Buying guides
Food
Travel
Autos
Gift ideas
Buying guides
Entertainment
Celebrity
TV
Movies
Music
How to Watch
Interviews
Videos
Finance
My Portfolio
News
Latest
Stock Market
Originals
The Morning Brief
Economies
Premium News
Earnings
Tech
Housing
Crypto
Biden Economy
Markets
Stocks: Most Actives
Stocks: Gainers
Stocks: Losers
Trending Tickers
Futures
World Indices
US Treasury Bonds Rates
Currencies
Crypto
Top ETFs
Top Mutual Funds
Options: Highest Open Interest
Options: Highest Implied Volatility
Sectors
Basic Materials
Communication Services
Consumer

In [16]:
system_prompt = "You are my assistant to analyze website content and provide a summary, remove navigation text and links. Respond in markdown."

In [17]:
def get_user_prompt(website):
    user_prompt = f"Provide a summary of this website in markdown.\n\n"
    user_prompt += website.text
    return user_prompt

In [18]:
system_prompt

'You are my assistant to analyze website content and provide a summary, remove navigation text and links. Respond in markdown.'

In [14]:
print(get_user_prompt(content))

Provide a summary of this website in markdown.

CNN values your feedback
1. How relevant is this ad to you?
2. Did you encounter any technical issues?
Video player was slow to load content
Video content never loaded
Ad froze or did not finish loading
Video content did not start after ad
Audio on ad was too loud
Other issues
Ad never loaded
Ad prevented/slowed the page from loading
Content moved around while ad loaded
Ad was repetitive to ads I've seen previously
Other issues
Cancel
Submit
Thank You!
Your effort and contribution in providing this feedback is much
                                        appreciated.
Close
Ad Feedback
Close icon
US
World
Politics
Business
Health
Entertainment
Style
Travel
Sports
Science
Climate
Weather
Ukraine-Russia War
Israel-Hamas War
Underscored
Games
More
US
World
Politics
Business
Health
Entertainment
Style
Travel
Sports
Science
Climate
Weather
Ukraine-Russia War
Israel-Hamas War
Underscored
Games
Watch
Listen
Live TV
Subscribe
Sign in
My Account
Se

In [20]:
def get_messages(website):
    return [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": get_user_prompt(website)}
    ]

In [24]:
def get_summary(url):
    website = WebsiteFetcher(url)
    response = openai.chat.completions.create(
        model = "gpt-4o-mini",
        messages = get_messages(website)
    )
    return response.choices[0].message.content

In [25]:
print(get_summary("https://finance.yahoo.com"))

# Website Summary

This website serves as a comprehensive portal for news and information across various sectors, including finance, technology, health, and entertainment. 

## News Categories
- **US News**: Covers domestic events across politics, weather, and general news updates.
- **World News**: Focused on global events and international affairs.
- **Health**: Updates on medical news, studies, mental health, and lifestyle health tips.
- **Science**: Insights into scientific advancements and research.
- **Tech**: Features on technology trends, reviews, and product deals.

## Finance
- **Market Overview**: Analyzes stock market performance, including indices like Dow and Nasdaq.
- **Investment Insights**: Offers reports on stocks, mutual funds, ETFs, and provides market analysis.
- **Personal Finance Tips**: Guidance on managing personal finances, loans, banking, credit cards, and investment strategies.

## Sports
- **Live Scores**: Coverage of various sports events, including NFL, N