In [None]:
# Module Installations if not part of the requirements,
# !pip install selenium
# !pip install dotenv

In [None]:
import tkinter as tk
from tkinter import scrolledtext, messagebox
import threading
import requests
import sys
import os
import time
from dotenv import load_dotenv

In [None]:
# Try to import the necessary modules
try:
    import google.generativeai as genai
except ImportError:
    print("Installing Google Generative AI package...")
    import subprocess
    subprocess.check_call([sys.executable, "-m", "pip", "install", "google-generativeai"])
    import google.generativeai as genai

try:
    from selenium import webdriver
    from selenium.webdriver.common.by import By
    from selenium.webdriver.support.ui import WebDriverWait
    from selenium.webdriver.support import expected_conditions as EC
except ImportError:
    print("Installing Selenium package...")
    import subprocess
    subprocess.check_call([sys.executable, "-m", "pip", "install", "selenium"])
    from selenium import webdriver
    from selenium.webdriver.common.by import By
    from selenium.webdriver.support.ui import WebDriverWait
    from selenium.webdriver.support import expected_conditions as EC


In [None]:
# Configure your API key here
load_dotenv(override=True)
api_key = os.getenv('GOOGLE_API_KEY')

# Check the key
if not api_key:
    print("No API key was found - please head over to the troubleshooting notebook in this folder to identify & fix!")
elif not api_key.startswith("AIz"):
    print("An API key was found, but it doesn't start sk-proj-; please check you're using the right key - see troubleshooting notebook")
elif api_key.strip() != api_key:
    print("An API key was found, but it looks like it might have space or tab characters at the start or end - please remove them - see troubleshooting notebook")
else:
    print("API key found and looks good so far!")


In [None]:
# Website class using Selenium
class Website:
    def __init__(self, url):
        """
        Create this Website object from the given url using Selenium WebDriver
        """
        self.url = url
        
        # Configure Chrome options
        chrome_options = webdriver.ChromeOptions()
        chrome_options.add_argument('--headless')
        chrome_options.add_argument('--no-sandbox')
        chrome_options.add_argument('--disable-dev-shm-usage')
        chrome_options.add_argument('--user-data-dir=/tmp/chrome-user-data-' + str(time.time()))
        
        try:
            # Initialize the WebDriver with the options
            self.driver = webdriver.Chrome(options=chrome_options)
            
            # Load the webpage
            self.driver.get(url)
            
            # Get the page title
            self.title = self.driver.title if self.driver.title else "No title found"
            
            # Wait for the body to be present
            WebDriverWait(self.driver, 10).until(
                EC.presence_of_element_located((By.TAG_NAME, "body"))
            )
            
            # Execute JavaScript to remove unwanted elements
            self.driver.execute_script("""
                const elementsToRemove = document.querySelectorAll('script, style, img, input');
                elementsToRemove.forEach(element => element.remove());
            """)
            
            # Get the text content of the body
            body_element = self.driver.find_element(By.TAG_NAME, "body")
            self.text = body_element.text
            
        except Exception as e:
            self.title = "Error loading page"
            self.text = f"Failed to load webpage: {str(e)}"
        finally:
            # Close the browser session if it was created
            if hasattr(self, 'driver'):
                self.driver.quit()


In [None]:
def messages_for_gemini(website):
    # Create a prompt for Gemini
    max_length = 10000  # Limiting content length to avoid exceeding token limits
    truncated_text = website.text[:max_length] if len(website.text) > max_length else website.text
    
    prompt = f"""
    Website: {website.url}
    Title: {website.title}
    
    Content:
    {truncated_text}
    
    Please provide a concise summary of this website content. Focus on the main topics, 
    purpose of the website, and key information presented.
    """
    
    return prompt


In [None]:
def summarize(url, result_callback, error_callback):
    try:
        # Check if API key is available
        if not api_key:
            error_callback("Error: Gemini API Key not found. Please create a .env file with GEMINI_API_KEY=your_key")
            return
            
        # Configure Gemini API
        genai.configure(api_key=api_key)
        
        # Load website content
        website = Website(url)
        
        # Create a Gemini model instance
        model = genai.GenerativeModel('gemini-2.0-flash')
        
        # Generate the summary
        response = model.generate_content(messages_for_gemini(website))
        
        # Return the result through callback
        result_callback(response.text)
        
    except Exception as e:
        # Handle errors and exceptions
        error_callback(f"Error: {str(e)}")


In [None]:
class WebsiteSummarizerApp:
    def __init__(self, root):
        self.root = root
        self.root.title("Website Summarizer")
        self.root.geometry("800x600")
        self.root.configure(padx=20, pady=20)
        
        # Create URL input section
        self.url_label = tk.Label(root, text="Enter the website address (copy and paste here):", font=("Arial", 12))
        self.url_label.pack(anchor="w", pady=(0, 5))
        
        self.url_entry = tk.Entry(root, width=80, font=("Arial", 10))
        self.url_entry.pack(fill="x", pady=(0, 15))
        
        # Create button frame for horizontal button layout
        self.button_frame = tk.Frame(root)
        self.button_frame.pack(pady=(0, 15))
        
        # Create the Summarize button
        self.summarize_button = tk.Button(self.button_frame, text="Summarize", command=self.start_summarize, 
                                         font=("Arial", 12, "bold"), bg="#4CAF50", fg="white",
                                         padx=20, pady=5)
        self.summarize_button.pack(side=tk.LEFT, padx=(0, 10))
        
        # Create the Clear/Reset button
        self.clear_button = tk.Button(self.button_frame, text="Clear/Reset", command=self.clear_all, 
                                     font=("Arial", 12, "bold"), bg="#f44336", fg="white",
                                     padx=20, pady=5)
        self.clear_button.pack(side=tk.LEFT)
        
        # Create status label for feedback
        self.status_label = tk.Label(root, text="", font=("Arial", 10), fg="blue")
        self.status_label.pack(pady=(0, 10))
        
        # Create Results section
        self.results_label = tk.Label(root, text="Results:", font=("Arial", 12, "bold"))
        self.results_label.pack(anchor="w", pady=(0, 5))
        
        # Create scrolled text widget for results
        self.results_text = scrolledtext.ScrolledText(root, width=80, height=20, font=("Arial", 10), wrap=tk.WORD)
        self.results_text.pack(fill="both", expand=True)
        self.results_text.config(state="disabled")  # Make it read-only initially
        
    def start_summarize(self):
        url = self.url_entry.get().strip()
        
        if not url:
            messagebox.showerror("Error", "Please enter a website URL")
            return
            
        if not (url.startswith('http://') or url.startswith('https://')):
            url = 'https://' + url
            self.url_entry.delete(0, tk.END)
            self.url_entry.insert(0, url)
        
        # Clear previous results
        self.results_text.config(state="normal")
        self.results_text.delete(1.0, tk.END)
        self.results_text.config(state="disabled")
        
        # Update status
        self.status_label.config(text="Summarizing website... This may take a moment.")
        self.summarize_button.config(state="disabled")
        
        # Start summarization in a separate thread
        threading.Thread(target=self.run_summarization, args=(url,), daemon=True).start()
    
    def run_summarization(self, url):
        summarize(url, self.handle_result, self.handle_error)
    
    def handle_result(self, result):
        # Update the UI with the result
        self.root.after(0, lambda: self._update_ui(result, True))
    
    def handle_error(self, error_message):
        # Update the UI with the error
        self.root.after(0, lambda: self._update_ui(error_message, False))
    
    def clear_all(self):
        """Clear all inputs and results"""
        # Clear URL entry
        self.url_entry.delete(0, tk.END)
        
        # Clear results
        self.results_text.config(state="normal")
        self.results_text.delete(1.0, tk.END)
        self.results_text.config(state="disabled")
        
        # Reset status
        self.status_label.config(text="")
        
        # Set focus to URL entry
        self.url_entry.focus()
        
    def _update_ui(self, text, is_success):
        # Enable the results text widget and insert the text
        self.results_text.config(state="normal")
        self.results_text.delete(1.0, tk.END)
        self.results_text.insert(tk.END, text)
        self.results_text.config(state="disabled")
        
        # Update status and re-enable button
        if is_success:
            self.status_label.config(text="Summary complete!")
        else:
            self.status_label.config(text="Error occurred.")
        
        self.summarize_button.config(state="normal")


In [None]:
# If running directly (not in Colab)
if __name__ == "__main__":
    root = tk.Tk()
    app = WebsiteSummarizerApp(root)
    root.mainloop()

In [None]:
# For Google Colab, add a special method to run the app

# API Key configuration in Google Colab
# from google.colab import userdata
# api_key=userdata.get('GOOGLE_API_KEY_1')

# def run_app_in_colab():
#     # This is needed because Colab runs in a different environment
#     import nest_asyncio
#     nest_asyncio.apply()
    
#     # Initialize and run the app
#     root = tk.Tk()
#     app = WebsiteSummarizerApp(root)
#     root.mainloop()


# For Colab usage, uncomment the line below:
# run_app_in_colab()
