
# 🧠 Multi-Product Competitor Intelligence Summarizer using Web Scraping + LLM

This notebook scrapes product pages using `Selenium`, collects the product information, and summarizes key features and comparison insights using `Ollama (LLaMA3) and OpenAI`.
    

In [0]:
# imports

import os
import requests
from dotenv import load_dotenv
from openai import OpenAI

# Load environment variables in a file called .env

load_dotenv(override=True)
api_key = os.getenv('OPENAI_API_KEY')

# Check the key

if not api_key:
    print("No API key was found - please head over to the troubleshooting notebook in this folder to identify & fix!")
elif not api_key.startswith("sk-proj-"):
    print("An API key was found, but it doesn't start sk-proj-; please check you're using the right key - see troubleshooting notebook")
elif api_key.strip() != api_key:
    print("An API key was found, but it looks like it might have space or tab characters at the start or end - please remove them - see troubleshooting notebook")
else:
    print("API key found and looks good so far!")


In [0]:
# Define our system prompt - you can experiment with this later, changing the last sentence to 'Respond in markdown in Spanish."

system_prompt = "Summarize the following product information for comparison."

In [0]:

# 📦 Install required packages (run once)
!pip install selenium bs4 requests


In [0]:
openai = OpenAI()

In [0]:
def summarize_with_openai(text, model="gpt-4o-mini"):
    response = openai.chat.completions.create(
        model=model,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": text}
        ],
        temperature=0.7
    )
    return response.choices[0].message.content




In [0]:

# ⚙️ Selenium setup (headless)
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
import time

def scrape_text_from_url(url):
    options = Options()
    options.add_argument("--headless=new")
    driver = webdriver.Chrome(options=options)
    driver.get(url)
    time.sleep(3)
    
    # You can tune this selector depending on the site
    body = driver.find_element(By.TAG_NAME, 'body')
    text = body.text
    driver.quit()
    return text.strip()



In [0]:

# 🧠 LLM Prompting using Ollama (local llama3)
import subprocess

def summarize_with_ollama(text):
    prompt = f"Summarize the following product description:\n\n{text}\n\nSummary:"
    try:
        print("inside ollama")
        result = subprocess.run(
            ["ollama", "run", "llama3.2"],
            input=prompt,
            capture_output=True, text=True, check=True, encoding="utf-8"
        )
        print("git result")
        return result.stdout.strip()
    except subprocess.CalledProcessError as e:
        return f"Error running ollama: {e.stderr}"


In [0]:

# 🔁 Analyze multiple product URLs and summarize
product_urls = {
    "iPhone 15 Pro": "https://www.apple.com/in/iphone-15-pro/",
    "Samsung S24 Ultra": "https://www.samsung.com/in/smartphones/galaxy-s24-ultra/",
}

product_texts = {}

for name, url in product_urls.items():
    print(f"Scraping {name} ...")
    product_texts[name] = scrape_text_from_url(url)


In [0]:

# 📄 Display side-by-side summaries
for name, text in product_texts.items():
    print(f"\n🔹 {name} Summary with Ollama:")
    print(summarize_with_ollama(text))

    print(f"\n🔹 {name} Summary with OpenAI GPT:")
    print(summarize_with_openai(text))
    print("="*100)

