In [5]:
# Install external libraries (Unnecessary in some environments like Colab/Jupyter)
!pip install bs4 requests python-dotenv --quiet
!pip install --upgrade openai --quiet

# Import libraries
import os
import openai
import requests
from bs4 import BeautifulSoup
from dotenv import load_dotenv

# Load environment variables
load_dotenv()

# Get OpenAI API key
api_key = os.getenv("OPENAI_API_KEY")  # Use `os.getenv()` to avoid KeyError

if not api_key:
    print("No API key found in environment variables.")
else:
    client = openai.OpenAI(api_key=api_key)  # Correct initialization

# Asking user input
message = input("Enter a website to analyze: ")

# Try block for troubleshooting
try:
    response = client.chat.completions.create(
        model="gpt-4o",
        messages=[{"role": "user", "content": message}]
    )
    print(response.choices[0].message.content)
except openai.OpenAIError as e:
    print(f"OpenAI API Error: {e}")
except Exception as e:
    print(f"Unexpected Error: {e}")

Enter a website to analyze:  https://www.google.com/


It seems you've shared a link to Google's homepage. How can I assist you with Google or any other inquiry you might have?


In [12]:
import requests
from bs4 import BeautifulSoup

# Some websites require headers to allow fetching:
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
}

class Website:
    def __init__(self, url):
        """
        Create this Website object from the given URL using the BeautifulSoup library.
        Extracts the title and main text while removing unnecessary elements.
        """
        self.url = url
        self.title = "No title found"  # Default title if request fails
        self.text = "No content available"  # Default text if request fails

        try:
            print(f"Fetching website: {url}")
            response = requests.get(url, headers=headers, timeout=10)
            response.raise_for_status()  # Raise HTTPError for bad responses
            
            # Parse the page
            print("Parsing HTML content...")
            soup = BeautifulSoup(response.content, 'html.parser')

            # Extract the title
            if soup.title:
                self.title = soup.title.string
                print(f"Page title: {self.title}")

            # Remove unnecessary elements (scripts, styles, images, inputs)
            if soup.body:
                print("Cleaning page content...")
                for irrelevant in soup.body(["script", "style", "img", "input"]):
                    irrelevant.decompose()
                self.text = soup.body.get_text(separator="\n", strip=True)

        except requests.exceptions.RequestException as e:
            print(f"Error fetching {url}: {e}")
            self.title = "Error"
            self.text = ""

    def __repr__(self):
        return f"Website(title='{self.title}', url='{self.url}')"

# Test AI code
url = input('Enter the URL here: ')  # Get URL input from the user

# Create a Website object using the provided URL
oai = Website(url)

# Display extracted content
print("\nExtracted Title:")
print(oai.title)

print("\nExtracted Text (First 500 characters):")
print(oai.text[:500])  # Print only the first 500 characters of extracted text

Enter the URL here:  https://www.google.com/


Fetching website: https://www.google.com/
Parsing HTML content...
Page title: Google
Cleaning page content...

Extracted Title:
Google

Extracted Text (First 500 characters):
About
Store
Gmail
Images
Sign in
See more
Delete
Delete
Report inappropriate predictions
Advertising
Business
How Search works
Our third decade of climate action: join us
Privacy
Terms
Settings
Search settings
Advanced search
Your data in Search
Search history
Search help
Send feedback
Dark theme: Off
Google apps


In [14]:
#system and user message prompts
#system prompt:
system_prompt='you are a nice helpful assistant that analyzes a website with a given URL by the user. Ignore navigation \
related materials and focus on giving informative information about the website, providing a short summary.'
#user prompt needs a function:
def user_prompt_for(website):
    user_prompt = f"""You are looking at a website titled {website.title}.

    The contents of this website are as follows; please provide a short summary of this website in markdown.
    If it includes news or announcements, then summarize these too.

    {website.text}
    """
    return user_prompt

# Testing the code
url = input('Enter the URL here: ')
oai = Website(url)  # Creating a Website object

print("\nGenerated User Prompt:")
print(user_prompt_for(oai))  # Printing the generated prompt


Enter the URL here:  https://www.google.com/


Fetching website: https://www.google.com/
Parsing HTML content...
Page title: Google
Cleaning page content...

Generated User Prompt:
You are looking at a website titled Google.

    The contents of this website are as follows; please provide a short summary of this website in markdown.
    If it includes news or announcements, then summarize these too.

    About
Store
Gmail
Images
Sign in
See more
Delete
Delete
Report inappropriate predictions
Advertising
Business
How Search works
Our third decade of climate action: join us
Privacy
Terms
Settings
Search settings
Advanced search
Your data in Search
Search history
Search help
Send feedback
Dark theme: Off
Google apps
    


In [19]:
import requests
import openai
import os
from bs4 import BeautifulSoup
from IPython.display import display, Markdown
from dotenv import load_dotenv  # Load environment variables

# Load environment variables
load_dotenv()

# Initialize OpenAI API client with API key from environment variable
api_key = os.getenv("OPENAI_API_KEY")  # Use environment variable
if not api_key:
    raise ValueError("OPENAI_API_KEY not found. Please set it in your environment variables.")

client = openai.OpenAI(api_key=api_key)

# Headers for web scraping
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
}

# Website Class
class Website:
    def __init__(self, url):
        self.url = url
        self.title = "No title found"
        self.text = "No content available"

        try:
            print(f"Fetching website: {url}")
            response = requests.get(url, headers=headers, timeout=10)
            response.raise_for_status()
            
            soup = BeautifulSoup(response.content, 'html.parser')
            self.title = soup.title.string if soup.title else "No title found"

            if soup.body:
                for irrelevant in soup.body(["script", "style", "img", "input"]):
                    irrelevant.decompose()
                self.text = soup.body.get_text(separator="\n", strip=True)
        except requests.exceptions.RequestException as e:
            print(f"Error fetching {url}: {e}")
            self.title = "Error"
            self.text = ""

# System Prompt
system_prompt = "You are an assistant that analyzes the contents of a website and provides a short summary, ignoring text that might be navigation related. Respond in markdown."

# Function to generate messages for OpenAI API
def messages_for(website):
    return [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt_for(website)}
    ]

# Function to create user prompt
def user_prompt_for(website):
    return f"""You are looking at a website titled {website.title}.

The contents of this website are as follows; please provide a short summary of this website in markdown.
If it includes news or announcements, then summarize these too.

{website.text}
"""

# Function to call OpenAI API and summarize the webpage
def summarize(url):
    website = Website(url)  # Create Website object
    
    try:
        response = client.chat.completions.create(
            model="gpt-4o-mini",
            messages=messages_for(website)  # Generate prompt messages
        )
        return response.choices[0].message.content  # Extract response content
    except openai.OpenAIError as e:
        return f"OpenAI API Error: {e}"

# Function to display the summary in Markdown format
def display_summary(url):
    summary = summarize(url)
    display(Markdown(summary))  # Correct usage of Markdown

# Test the function with a website
display_summary("https://edwarddonner.com")

Fetching website: https://edwarddonner.com


# Summary of Edward Donner's Website

Edward Donner is a software developer and AI enthusiast who enjoys experimenting with large language models (LLMs). He is the co-founder and CTO of Nebula.io, a company focused on utilizing AI to help individuals discover their potential and manage talent effectively. Donner has a background in AI startups, previously founding untapt, which was acquired in 2021.

## Recent News and Announcements
- **January 23, 2025**: Published resources for an LLM Workshop focused on hands-on experience with agents.
- **December 21, 2024**: Welcomed new members to the SuperDataScientists community.
- **November 13, 2024**: Released resources for "Mastering AI and LLM Engineering."
- **October 16, 2024**: Shared resources for transitioning from a Software Engineer to an AI Data Scientist. 

Donner also expresses personal interests in DJing and electronic music production, although he humorously admits to being out of practice.

In [25]:
baka = input('Enter website URL: ')  # Get URL as a string
display_summary(baka)  # Pass the URL string, NOT a Website object

Enter website URL:  https://www.amazon.com/


Fetching website: https://www.amazon.com/


# Amazon.com Summary

Amazon.com is a comprehensive e-commerce platform where users can purchase a wide range of products, including books, electronics, clothing, and household items. The site provides a user-friendly interface for shopping, with options for searching, filtering, and comparing products.

## Key Features
- Extensive product categories
- User reviews and ratings
- Personalized recommendations
- Options for fast and convenient shipping

## Notes
No specific news or announcements are provided in the content. 

## Additional Information
The website also emphasizes the importance of privacy and user agreement policies, ensuring that users are aware of their rights and responsibilities while using the platform.