In [14]:
import os
import requests
from urllib.parse import urljoin
from bs4 import BeautifulSoup
from IPython.display import Markdown, display
import ollama
import gradio as gr

In [15]:
class Website:
    url: str
    title: str
    text: str
    logo: str

    def __init__(self, url):
        self.url = url
        response = requests.get(url)
        response.raise_for_status()
        soup = BeautifulSoup(response.content, 'html.parser')
       
        self.title = soup.title.string if soup.title else "No title found"
       
        if soup.body:
            for irrelevant in soup.body(["script", "style", "img", "input"]):
                irrelevant.decompose()
            self.text = soup.body.get_text(separator="\n", strip=True)
        else:
            self.text = ""
       
        icon_link = soup.find("link", rel=lambda x: x and "icon" in x.lower())
        if icon_link and icon_link.get("href"):
            href = icon_link["href"]
            self.logo = href if href.startswith("http") else urljoin(url, href)
        else:
            self.logo = None  # Will return None if no logo found

In [10]:
def message_llama(prompt):
    messages = [
        {"role": "system","content": system_message},
        {"role": "user", "content": prompt}
    ]
    resp = ollama.chat(model=MODEL, messages=messages)
    return resp["message"]["content"]

In [16]:
system_message = "You are an assistant that analyzes the contents of a website and provides a short summary, ignoring text that might be navigation related. Respond in markdown."
MODEL = "llama3.2"

In [17]:
def user_prompt_for(website):
    prompt = f"You are looking at a website titled '{website.title}'.\n"
    prompt += "The contents of this website is as follows; please provide a short summary of this website in markdown. If it includes news or announcements, summarize these too.\n\n"
    prompt += website.text
    return prompt

In [12]:
def messages_for(website):
    return [
        {"role": "system", "content": system_message},
        {"role": "user", "content": user_prompt_for(website)}
    ]

In [18]:
def summarize(url):
    website = Website(url)
    messages = messages_for(website)
    response = ollama.chat(model=MODEL, messages=messages)
    return website.logo, response['message']['content']  # logo can be None

In [19]:
def display_summary(url):
    logo, summary = summarize(url)
    return logo, summary

In [21]:
gr.Interface(
    fn=display_summary,
    inputs=gr.Textbox(label="Website URL"),
    outputs=[gr.Image(label="Website Logo"), gr.Markdown(label="Summary")],
    title="Website Analyzer",
    description="Enter a website URL to get a summary and its logo."
).launch(share=True, inbrowser=True)

* Running on local URL:  http://127.0.0.1:7860
* Running on public URL: https://879e9b9276ea2e098d.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


