In [5]:
"""
Website Summarizer using Ollama
AI-powered web scraping and summarization tool
"""

import os
import requests
from bs4 import BeautifulSoup
from IPython.display import Markdown, display
from IPython import get_ipython

# need to look like a browser or sites will block us
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
}

class Website:
    def __init__(self, url):  # fixed the constructor
        """scrape website and clean up the text"""
        self.url = url
        response = requests.get(url, headers=headers)
        soup = BeautifulSoup(response.content, 'html.parser')
        
        # get the title
        self.title = soup.title.string if soup.title else "No title found"
        
        # get body text but remove junk first
        if soup.body:
            # kill scripts, css, images etc
            for irrelevant in soup.body(["script", "style", "img", "input"]):
                irrelevant.decompose()
            self.text = soup.body.get_text(separator="\n", strip=True)
        else:
            self.text = "No body content found."

# tell the AI what to do
system_prompt = "You are an assistant that analyzes the contents of a website \
    and provides a short summary, ignoring text that might be navigation related. \
    Respond in markdown."

def user_prompt_for(website):
    """build the prompt with website content"""
    user_prompt = f"You are looking at a website titled '{website.title}'"
    user_prompt += "\nThe contents of this website is as follows; \
    please provide a short summary of this website in markdown. \
    If it includes news or announcements, then summarize these too.\n\n"
    user_prompt += website.text
    return user_prompt

def messages_for(website):
    """format messages for ollama"""
    return [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt_for(website)}
    ]

def summarize(url, model="llama3.2"):
    """main function - scrape site and get AI summary"""
    try:
        import ollama
    except ImportError:
        print("Error: ollama package not installed. Install with: pip install ollama")
        return "Error: Ollama not available"
    
    try:
        website = Website(url)
        messages = messages_for(website)
        response = ollama.chat(model=model, messages=messages)
        return response['message']['content']
    except Exception as e:
        return f"Error summarizing website: {str(e)}"

def display_summary(url, model="llama3.2"):
    """for jupyter - shows nice markdown"""
    summary = summarize(url, model)
    display(Markdown(summary))

def print_summary(url, model="llama3.2"):
    """for regular python - just prints to console"""
    summary = summarize(url, model)
    print(summary)

def is_running_in_jupyter():
    """Detect if we're inside a Jupyter notebook."""
    try:
        shell = get_ipython().__class__.__name__
        return shell == "ZMQInteractiveShell"
    except NameError:
        return False

def summarize_and_show(url, model="llama3.2"):
    """Auto-detect environment and run appropriate output function."""
    if is_running_in_jupyter():
        display_summary(url, model)
    else:
        print_summary(url, model)

def stop_model(model_name="llama3.2"):
    """
    Gracefully stop the specified Ollama model to free up system memory.

    Ollama loads models into RAM, and depending on your system specs,
    leaving them running after you're done can consume several gigabytes
    of memory unnecessarily.

    This function ensures that after a summarization task is complete,
    the model is cleanly unloaded from memory.

    It's especially useful if you're working on a laptop, switching between models,
    or using large models that might lead to memory pressure or crashes
    during multiple runs.
    """
    import subprocess
    subprocess.run(["ollama", "stop", model_name])

if __name__ == "__main__":
    url = "https://cnn.com"
    print(f"Summarizing {url}...")
    summarize_and_show(url)
    stop_model("llama3.2")

Summarizing https://cnn.com...


This appears to be the main page of CNN.com, a news website that provides up-to-date coverage on national and international news, politics, business, entertainment, sports, and more.

The page is divided into several sections:

1. **Headlines**: A list of recent and breaking news headlines.
2. **Top Stories**: A selection of the most important stories of the day, with links to full articles.
3. **Politics**: News and analysis on politics and government from around the world.
4. **Business**: Financial news, market updates, and business analysis.
5. **Entertainment**: News and reviews on movies, TV shows, music, and celebrities.
6. **Sports**: Sports news, scores, and highlights from around the world.
7. **Weather**: Current weather conditions, forecasts, and severe weather alerts.
8. **International**: News and analysis on international affairs and global events.
9. **Health**: News and analysis on health issues, medical breakthroughs, and wellness topics.
10. **Science**: News and analysis on scientific discoveries, space exploration, and emerging technologies.
11. **Technology**: News and analysis on technology trends, innovation, and cybersecurity.
12. **Travel**: Travel news, destination guides, and travel tips.

The page also features various sections such as:

* **CNN Polls**: A selection of current polls and surveys on various topics.
* **Facts First**: A section that debunks common myths and misinformation.
* **Calculators**: Financial calculators and tools for business and personal finance.
* **Markets**: Live market updates, news, and analysis.
* **Videos**: A collection of videos on various topics, including news, sports, entertainment, and more.

The page also includes several interactive features, such as:

* **Today's Crossword**: A daily crossword puzzle to test your smarts.
* **Jumble Crossword**: A scrambled word game to challenge your vocabulary.
* **Photo Shuffle**: A random photo gallery to explore.
* **Sudoblock**: A tile-matching puzzle game.

Overall, this page provides a comprehensive overview of the news and information available on CNN.com.

In [2]:
import subprocess

model_name = "llama3.2"

# Stop model cleanly (optional reset)
subprocess.run(["ollama", "stop", model_name])

CompletedProcess(args=['ollama', 'stop', 'llama3.2'], returncode=0)