In [None]:
import os
from dotenv import load_dotenv
from IPython.display import Markdown, display
from openai import OpenAI
from bs4 import BeautifulSoup
import requests


# Standard headers to fetch a website
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
}

load_dotenv()
openai = OpenAI()



OLLAMA_BASE_URL = "http://localhost:11434/v1"

ollama = OpenAI(base_url=OLLAMA_BASE_URL, api_key='ollama')

system_prompt = """You are an AI product extraction assistant.

Your task is to analyze raw scraped e-commerce website content and extract structured product listings that match a specified product name provided by the user.

You will be given:

1. The scraped website content (HTML or text)
2. A target product name

Instructions:

* Identify all products in the content that match or are closely related to the target product name.

* Ignore text that may be navigation related, including menus, headers, footers, category links, breadcrumbs, filters, login sections, advertisements, or pagination elements.

* Focus only on actual product listing information.

* Extract at least 10 relevant product listings where available.

* For each product extract:

  * Product Name
  * Product Description
  * Product Price

Data Handling Rules:

* Ignore listings that do not contain a visible price.
* Normalize all prices into numeric values (remove currency symbols).
* If a price is given as a range, use the higher value.
* Ignore advertisements or unrelated products.

Sorting Rules:
* Sort the final list strictly on price and quality.


Output Format:

Respond in markdown.
Do not wrap the markdown in a code block - respond just with the markdown.

Return the result as a Markdown table with the following columns:

| Product Name | Description | Price |

Order in ascending order with the lowest price at the top
Provide a summary after the table of the most recomended as per the price.

* The table must contain at least 10 products where available.
* Prices must be shown as numeric values with currency.
* Do not include any commentary before or after the table.
 """
user_prompt = """
 Here is the website content and the product name

"""
def fetch_website_contents(url):
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.content, "html.parser")
    title = soup.title.string if soup.title else "No title found"
    if soup.body:
        for irrelevant in soup.body(["script", "style", "img", "input"]):
            irrelevant.decompose()
        text = soup.body.get_text(separator="\n", strip=True)
    else:
        text = ""
    return (title + "\n\n" + text)[:2_000]

def messages_for(website, product_name):
    return [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt + website + product_name}
    ]


def summarize(url, product_name):
    website = fetch_website_contents(url)
    response = ollama.chat.completions.create(
        model = "llama3.2",
        messages = messages_for(website, product_name)
    )
    return response.choices[0].message.content

def display_summary(url, product_name):
    summary = summarize(url, product_name)
    display(Markdown(summary))    


display_summary("https://amazon.com", "static bikes")