A demonstration on leveraging MCP capabilities with SLMs for performing simple tasks. My efforts are towards performing everything from scratch for better visibility of the complete process. The task at hand is to select best restaurant in Sydney on the basis of reviews and ratings.
P.S This notebook is not complete and is under active development

In [None]:
# Enable this install the required dependencies
# !pip install ollama selenium beautifulsoup4 webdriver-manager requests

In [None]:
import ollama
import time
import platform
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException, NoSuchElementException
from bs4 import BeautifulSoup

# Import necessary Service objects and managers (handle Safari separately)
try:
    from selenium.webdriver.chrome.service import Service as ChromeService
    from webdriver_manager.chrome import ChromeDriverManager
except ImportError:
    ChromeService = None
    ChromeDriverManager = None

try:
    from selenium.webdriver.firefox.service import Service as FirefoxService
    from webdriver_manager.firefox import GeckoDriverManager
except ImportError:
    FirefoxService = None
    GeckoDriverManager = None

try:
    from selenium.webdriver.safari.service import Service as SafariService
except ImportError:
    SafariService = None

In [None]:
# --- Configuration ---
OLLAMA_MODEL = #Enter your model name
SEARCH_ENGINE_URL = "https://html.duckduckgo.com/html/"  # HTML version of DuckDuckGo
SEARCH_QUERY = "best restaurants Sydney reviews ratings"
BROWSER_CHOICE = 'safari'  # Change to your desired browser ('chrome', 'firefox', or 'safari')
MAX_LINKS_TO_SEARCH = 3  # Number of website links to deep-search
WAIT_TIMEOUT = 10  # Seconds to wait for elements
RUN_HEADLESS = True  # Set to False to see the browser window

# --- CSS Selectors for HTML DuckDuckGo ---
DDG_RESULT_BLOCK_SELECTOR = "div.result"
DDG_LINK_SELECTOR = "a.result__a"  # Links inside the result blocks

In [None]:
# --- Utility functions ---

def setup_driver(browser_choice, headless=True):
    """Sets up the Selenium WebDriver."""
    print(f"Setting up Selenium WebDriver for {browser_choice}...")
    effective_headless = headless
    try:
        if browser_choice.lower() == 'chrome':
            if not ChromeService or not ChromeDriverManager:
                raise ImportError("Chrome components not installed. Run 'pip install selenium webdriver-manager'")
            options = webdriver.ChromeOptions()
            if headless:
                options.add_argument('--headless')
            options.add_argument('--no-sandbox')
            options.add_argument('--disable-dev-shm-usage')
            service = ChromeService(ChromeDriverManager().install())
            driver = webdriver.Chrome(service=service, options=options)

        elif browser_choice.lower() == 'firefox':
            if not FirefoxService or not GeckoDriverManager:
                raise ImportError("Firefox components not installed. Run 'pip install selenium webdriver-manager'")
            options = webdriver.FirefoxOptions()
            if headless:
                options.add_argument('--headless')
            service = FirefoxService(GeckoDriverManager().install())
            driver = webdriver.Firefox(service=service, options=options)

        elif browser_choice.lower() == 'safari':
            if platform.system() != 'Darwin':
                raise SystemError("Safari automation is only supported on macOS.")
            if not SafariService:
                raise ImportError("Safari components could not be imported.")
            if headless:
                print("Warning: Safari does not reliably support headless mode via Selenium. Running with visible window.")
                effective_headless = False
            print("INFO: Ensure 'Allow Remote Automation' is enabled in Safari's Develop menu.")
            options = webdriver.SafariOptions()
            driver = webdriver.Safari(options=options)
        else:
            raise ValueError("Unsupported browser choice. Use 'chrome', 'firefox', or 'safari'.")

        print(f"WebDriver setup complete (Headless: {effective_headless}).")
        return driver, effective_headless

    except Exception as e:
        print(f"Error setting up WebDriver: {e}")
        raise

def perform_duckduckgo_search(driver, query):
    """Performs a search on the HTML version of DuckDuckGo."""
    print(f"Performing search for: '{query}' on {SEARCH_ENGINE_URL}")
    try:
        driver.get(SEARCH_ENGINE_URL)
        time.sleep(1)  # Let the page load

        # In the HTML version, the search box has name "q"
        search_box = WebDriverWait(driver, WAIT_TIMEOUT).until(
            EC.presence_of_element_located((By.NAME, 'q'))
        )
        search_box.send_keys(query)
        search_box.send_keys(Keys.RETURN)
        print("Search submitted. Waiting for results...")
        WebDriverWait(driver, WAIT_TIMEOUT).until(
            EC.presence_of_element_located((By.CSS_SELECTOR, DDG_RESULT_BLOCK_SELECTOR))
        )
        print("Search results loaded.")
        return driver.page_source

    except TimeoutException:
        print("Error: Timed out waiting for DuckDuckGo elements.")
        return None
    except Exception as e:
        print(f"An error occurred during search: {e}")
        return None

def scrape_search_result_links(html_content):
    """Scrapes the first few result links from the HTML version of DuckDuckGo."""
    links = []
    soup = BeautifulSoup(html_content, 'html.parser')
    result_blocks = soup.select(DDG_RESULT_BLOCK_SELECTOR)
    print(f"Found {len(result_blocks)} result blocks.")
    for block in result_blocks:
        link_tag = block.select_one(DDG_LINK_SELECTOR)
        if link_tag and link_tag.has_attr('href'):
            links.append(link_tag['href'])
            print(f"Scraped link: {link_tag['href']}")
        if len(links) >= MAX_LINKS_TO_SEARCH:
            break
    return links

def deep_search_on_site(driver, url):
    """Visits the given URL and extracts main textual content."""
    print(f"\nDeep searching site: {url}")
    try:
        driver.get(url)
        time.sleep(2)  # Allow time for page load; adjust as needed
        html = driver.page_source
        soup = BeautifulSoup(html, 'html.parser')
        # A simple heuristic: get the text of the <body>
        body = soup.find('body')
        if body:
            text = body.get_text(separator=' ', strip=True)
            print(f"Extracted {len(text)} characters from {url}")
            return text
        else:
            print("No <body> found.")
            return ""
    except Exception as e:
        print(f"Error during deep search of {url}: {e}")
        return ""

def analyze_deep_results_with_ollama(deep_results, query):
    """Aggregates deep search content and sends it to Ollama for analysis."""
    print(f"\nSending deep search results from {len(deep_results)} websites to Ollama...")
    if not deep_results:
        return "No deep search results to analyze."
    
    context = f"Deep search results for query '{query}':\n\n"
    for idx, result in enumerate(deep_results):
        context += f"Website {idx+1} ({result['url']}):\n"
        # Optionally limit the amount of text passed (e.g., first 1000 characters)
        snippet = result['content'][:1000]
        context += snippet + "\n\n"
    
    prompt = f"""
{context}
Based on the content extracted from the above websites, which one appears to offer the best detailed information on good restaurants in Sydney? 
Please explain your reasoning and identify the website (by its URL) that seems most promising.
"""

    try:
        response = ollama.chat(
            model=OLLAMA_MODEL,
            messages=[
                {'role': 'system', 'content': 'You are an AI assistant analyzing deep search results for restaurant information.'},
                {'role': 'user', 'content': prompt}
            ]
        )
        print("Ollama analysis complete.")
        return response['message']['content']
    except Exception as e:
        print(f"Error communicating with Ollama: {e}")
        return f"Error analyzing results: {e}"


In [None]:
# --- Main Execution ---
if __name__ == "__main__":
    driver = None
    try:
        # 1. Setup WebDriver
        driver, _ = setup_driver(BROWSER_CHOICE, RUN_HEADLESS)
        
        # 2. Perform DuckDuckGo Search (HTML version)
        html = perform_duckduckgo_search(driver, SEARCH_QUERY)
        if not html:
            print("Failed to retrieve search page content.")
            exit(1)
        
        # 3. Scrape the first few result links
        links = scrape_search_result_links(html)
        if not links:
            print("No result links found.")
            exit(1)
        
        # 4. Deep search each website and collect content
        deep_results = []
        for url in links:
            content = deep_search_on_site(driver, url)
            if content:
                deep_results.append({'url': url, 'content': content})
            else:
                print(f"Skipping {url} due to lack of content.")
        
        if not deep_results:
            print("No deep search results obtained from the websites.")
        else:
            # 5. Analyze the aggregated deep search content with Ollama
            analysis_result = analyze_deep_results_with_ollama(deep_results, SEARCH_QUERY)
            print("\n" + "="*20 + " FINAL DEEP SEARCH ANALYSIS " + "="*20)
            print(analysis_result)
            print("="*80)
            
    except Exception as e:
        print(f"An unexpected error occurred: {e}")
    finally:
        if driver:
            print("\nClosing the browser...")
            driver.quit()
            print("Browser closed.")