In [1]:
import google.generativeai as genai
import requests
from bs4 import BeautifulSoup

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
genai.configure(api_key="your_api_key")

In [3]:
def extract_text_from_url(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.content, "html.parser")

    # Remove scripts and styles
    for script in soup(["script", "style", "img", "video", "iframe"]):
        script.decompose()

    # Get clean text
    text = soup.get_text(separator="\n")
    cleaned_text = ' '.join(text.split())
    return cleaned_text

In [4]:
def summarize_with_gemini(text):
    model = genai.GenerativeModel("models/gemini-1.5-flash")
    prompt = f"""
Please generate a **detailed summary** of the following webpage content.
Make sure it covers all key points, uses **bullet points**, includes **subheadings** where relevant,
and avoids any reference to images, videos, or links.

Text content:
{text}
"""
    response = model.generate_content(prompt)
    return response.text


In [5]:
from IPython.display import Markdown, display

def display_summary(url):
    summary = summarize(url)
    display(Markdown(summary))


In [6]:
url = input("Enter the url for the website: ")
text = extract_text_from_url(url)
summary = summarize_with_gemini(text)

In [7]:
from IPython.display import display, Markdown

def pretty_display(summary):
    sections = summary.split("\n\n")

    display(Markdown("## 📄 **SUMMARY OF THE PAGE**"))

    for section in sections:
        lines = section.strip().split("\n")
        if not lines:
            continue

        heading = lines[0].strip().replace(":", "").upper()
        display(Markdown(f"### 🟩 **{heading}**"))  # Bigger font for heading

        for line in lines[1:]:
            clean_line = line.strip().lstrip("*").strip()
            if clean_line:
                display(Markdown(f"- {clean_line}"))


In [8]:
# pretty_display(summary)

In [9]:
import os


In [10]:
def save_summary_to_markdown(summary, filename="summary.md"):
    with open(filename, "w", encoding="utf-8") as md_file:
        md_file.write("# Website Summary\n\n")
        md_file.write(summary)
    print("✅ Summary saved at:", os.path.abspath("summary.md"))

In [11]:
save_summary_to_markdown(summary)

✅ Summary saved at: c:\Users\HP\OneDrive\Desktop\Project\prompt_eng\summarizer\summary.md


In [21]:
from urllib.parse import urljoin

def extract_clean_text_from_url(url):
    try:
        response = requests.get(url)
        soup = BeautifulSoup(response.text, "html.parser")

        # Remove scripts/styles
        for tag in soup(["script", "style"]):
            tag.decompose()

        # Get clean text
        text = soup.get_text(separator="\n")
        cleaned = "\n".join([line.strip() for line in text.splitlines() if line.strip()])
        return cleaned
    except Exception as e:
        print(f"Error fetching webpage: {e}")
        return None


In [22]:
def search_keyword(keyword, summary_file_path, source_url):
    try:
        with open(summary_file_path, "r", encoding="utf-8") as f:
            summary = f.read()
    except FileNotFoundError:
        print("⚠️ Summary file not found.")
        return

    keyword_lower = keyword.lower()

    if keyword_lower in summary.lower():
        print(f"\n✅ Keyword '{keyword}' found in summary.\n")
        for line in summary.split("\n"):
            if keyword_lower in line.lower():
                print("•", line.strip())
    else:
        print(f"\n⚠️ Keyword '{keyword}' not found in summary. Now searching the web...\n")
        webpage_text = extract_clean_text_from_url(source_url)

        if not webpage_text:
            print("❌ Failed to extract text from the web.")
            return

        keyword_lines = [line for line in webpage_text.split("\n") if keyword_lower in line.lower()]

        if keyword_lines:
            print(f"🔎 Found keyword '{keyword}' on the webpage. Here's a mini summary:\n")
            for line in keyword_lines[:5]:  # Show top 5 relevant lines
                print("•", line.strip())
        else:
            print("❌ No such keyword found on the webpage either.")

In [23]:
import json
import os
from datetime import datetime

def log_summary(source_url, summary_file_path, summary_text=None, model_used="Gemini"):
    history_file = "history.json"

    if os.path.exists(history_file):
        try:
            with open(history_file, "r") as f:
                history = json.load(f)
            if not isinstance(history, dict):
                history = {}
        except json.JSONDecodeError:
            history = {}
    else:
        history = {}

    summary_entry = {
        "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
        "source_url": source_url,
        "summary_file": os.path.basename(summary_file_path),
        "used_model": model_used
    }

    if summary_text:
        summary_entry["summary_preview"] = summary_text[:200] + "..."

    history.setdefault("summaries", []).append(summary_entry)

    with open(history_file, "w") as f:
        json.dump(history, f, indent=4)

    print(f"✅ Summary logged to '{history_file}'")


log_summary(url, "summary.md")



✅ Summary logged to 'history.json'


In [24]:
user_keyword = input("🔍 Enter the keyword to search: ")
summary_file_path = "summary.md"

In [31]:
search_keyword(user_keyword, summary_file_path, url)


⚠️ Keyword 'artificial' not found in summary. Now searching the web...

🔎 Found keyword 'artificial' on the webpage. Here's a mini summary:

• generative artificial intelligence
• artificial intelligence
• and artificial intelligence assistant based on large language models.
• Artificial intelligence content detection
• Reflection (artificial intelligence)
