In [3]:
import requests
from bs4 import BeautifulSoup
from elasticsearch import Elasticsearch
from sentence_transformers import SentenceTransformer
from urllib.parse import urlparse
import re
import time
from pprint import pprint 
from tqdm import tqdm
from typing import Optional, Callable

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
from config import (
    ELASTICSEARCH_URL,
    INDEX_NAME_HUGGINGFACE,
    INDEX_NAME_GEMINI,
    INDEX_NAME_OPENAI,
    SITEMAP_URL,
    HUGGINGFACE_EMBEDDING_MODEL_NAME,
    GEMINI_EMBEDDING_MODEL_NAME,
    OPENAI_EMBEDDING_MODEL_NAME
)

**Connect To Elastic Search**

In [5]:
es = Elasticsearch(ELASTICSEARCH_URL)
print("Connected to Elasticsearch!")

client_info = es.info()
pprint(client_info.body)

Connected to Elasticsearch!
{'cluster_name': 'docker-cluster',
 'cluster_uuid': 'T1HeaWnRTOqX_BBgREVVbA',
 'name': '64c49e436740',
 'tagline': 'You Know, for Search',
 'version': {'build_date': '2025-10-21T10:06:21.288851013Z',
             'build_flavor': 'default',
             'build_hash': '25d88452371273dd27356c98598287b669a03eae',
             'build_snapshot': False,
             'build_type': 'docker',
             'lucene_version': '10.3.1',
             'minimum_index_compatibility_version': '8.0.0',
             'minimum_wire_compatibility_version': '8.19.0',
             'number': '9.2.0'}}


**Query List**
# 
Since the doc is not public, I could not write script to read it. That's why I am using static list. In the future, we will 
use script to read data from a url.

In [30]:
query_list = [
    "What is baby botox and how does it work?",
    "Find sculptra providers in my area.",
    "What are fat dissolving injections for the chin?",
    "Where can I find botox treatments locally?",
    "What is lemon bottle fat dissolving?",
    "What is the cost of botox for neck pain treatment?",
    "lip flip services in Cork",
    "What are polynucleotides and how do they function?",
    "Provide details on Sculptra treatments available in Ireland.",
    "Where to get botox in Dublin",
    "Find Sculptra providers in my area.",
    "Find clinics offering Sculptra treatments in Ireland.",
    "Describe what Sculptra is and how it works.",
    "What is the cost of lip fillers in Ireland?",
    "What are effective neck tightening treatments?",
    "What is the cost of a thread lift in Ireland?",
    "Where can I find botox deals in Dublin?",
    "What are the benefits of eyebrow botox?",
    "What are the benefits of chin filler?",
    "Where can I get botox in Cork?",
    "Where can I get botox treatments in Waterford?",
    "What exactly is Sculptra?",
    "Explain Nefertiti Botox.",
    "How much does jawline filler cost",
    "What is facial cork used for",
    "What is blepharoplasty?",
    "Lip filler services in Galway",
    "How much does Sculptra cost in Dublin?",
    "Show me lip flip before and after results.",
    "Where can I find botox for 3 areas at 150?",
    "What are the best Sculptra clinics in Ireland?",
    "Find a blepharoplasty surgeon in Cork",
    "What are the best methods for under chin fat removal?",
    "Find beauty salons in Greystones.",
    "What are the benefits of filler under eyes?",
    "What is a profilo facial treatment",
    "What are the options for non-surgical rhinoplasty in Ireland?",
    "What is a Nefertiti neck lift procedure?",
    "What are the benefits of lip filler",
    "What are effective body fat reduction treatments?",
    "Where can I get botox in dublin",
    "Find clinics offering Sculptra in Cork.",
    "What are polynucleotides for under eyes",
    "Find Botox clinics in Dublin.",
    "Are there any botox offers available",
    "What is gummy smile botox treatment",
    "I need to find information about botox services in Dublin.",
    "lip flip procedure in cork",
    "Where to find botox services in Dublin City Centre?",
    "What is Sculptra?"
]

len(query_list)

50

Crate Index utils function

In [6]:
def create_index(index_name: str):
    if es.indices.exists(index=index_name):
        print(f"Index '{index_name}' already exists.")
        return

    mapping = {
        "mappings": {
            "properties": {
                "url": {"type": "keyword"},
                "title": {"type": "text"},
                "content": {"type": "text"},
                "embedding": {"type": "dense_vector", "dims": 384}
            }
        }
    }

    es.indices.create(index=index_name, body=mapping)
    print(f"Index '{index_name}' created successfully.")

Extract page content utils function

In [7]:
def extract_page_text(url):
    try:
        res = requests.get(url, timeout=10)
        res.raise_for_status()
        soup = BeautifulSoup(res.text, "html.parser")

        # Remove unwanted elements
        for tag in soup(["script", "style", "noscript"]):
            tag.extract()

        title = soup.title.string.strip() if soup.title else "Untitled"
        text = re.sub(r"\s+", " ", soup.get_text(separator=" ", strip=True))
        return title, text

    except Exception as e:
        print(f"Error fetching {url}: {e}")
        return None, None

create_knn_query utils func

In [35]:
# The search query structure for k-NN
def create_knn_query(query_vector, k_value=5):
    """
    Creates an Elasticsearch k-NN search body.
    query_vector: The vector embedding of the user's query text.
    k_value: The number of nearest neighbors to return.
    """
    return {
        "knn": {
            "field": "embedding",  # Replace with your vector field name
            "query_vector": query_vector,
            "k": k_value,
            "num_candidates": 100  # Number of candidates to consider (tuning parameter)
        },
        # You may also include "fields" to limit the data returned for each hit
        "fields": ["url", "_id"]
    }

get_embedder model utils function

In [8]:
def get_embedder(model_type: str, api_key: Optional[str] = None) -> Callable[[str], list[float]]:
    """
    Initializes the embedding model and returns a function to encode text.
    
    Args:
        model_type: 'HuggingFace', 'Gemini', or 'OpenAI'.
        api_key: The necessary API key for the chosen service (if applicable).
        
    Returns:
        A callable function that takes a text string and returns a list of floats (embedding).
    """
    model_type = model_type.lower()

    if model_type == "huggingface":
        # Requires: pip install sentence-transformers
        # The specific model (e.g., 'all-MiniLM-L6-v2') can be passed as an argument
        try:
            from sentence_transformers import SentenceTransformer
        except ImportError:
            raise ImportError("HuggingFace embedder requires 'sentence-transformers'. Please install it.")
        
        # Initialize the model (e.g., a common sentence-transformer model)
        hf_model = SentenceTransformer(HUGGINGFACE_EMBEDDING_MODEL_NAME) 
        # Return a lambda that calls the encode method and converts to list
        return lambda content: hf_model.encode(content).tolist()

    elif model_type == "gemini":
        # Requires: pip install google-genai
        try:
            from google import genai
        except ImportError:
            raise ImportError("Gemini embedder requires 'google-genai'. Please install it.")
        
        if not api_key:
             raise ValueError("Gemini embedder requires an API key.")

        client = genai.Client(api_key=api_key)
        
        def gemini_embed(content: str) -> list[float]:
            # Use the 'embedding-001' model for general embeddings
            response = client.models.embed_content(
                model=GEMINI_EMBEDDING_MODEL_NAME,
                content=content,
                task_type='RETRIEVAL_DOCUMENT'
            )
            return response['embedding']

        return gemini_embed

    elif model_type == "openai":
        # Requires: pip install openai
        try:
            from openai import OpenAI
        except ImportError:
            raise ImportError("OpenAI embedder requires 'openai'. Please install it.")
        
        if not api_key:
             raise ValueError("OpenAI embedder requires an API key.")
             
        client = OpenAI(api_key=api_key)

        def openai_embed(content: str) -> list[float]:
            # Use the standard text-embedding-3-small model
            response = client.embeddings.create(
                input=content,
                model=OPENAI_EMBEDDING_MODEL_NAME
            )
            # The API returns a list of embeddings, we take the first (and only) one
            return response.data[0].embedding

        return openai_embed

    else:
        raise ValueError(f"Unknown model type: {model_type}. Must be 'HuggingFace', 'Gemini', or 'OpenAI'.")

scrape sitemap and index pages utils func

In [None]:
def scrape_and_index(model_type: str, index_name: str, api_key: Optional[str] = None):
    '''
    Args:
        model_type: 'HuggingFace', 'Gemini', or 'OpenAI'.
        index_name: str
        api_key: The necessary API key for the chosen service (if applicable).
    Return: None
    '''
    # 1. Get Embedder Function
    try:
        # Get the callable function that performs the embedding
        embed_content = get_embedder(model_type, api_key=api_key) 
    except (ImportError, ValueError) as e:
        print(f"FATAL ERROR: Could not initialize embedder. {e}")
        return

    # 2. Scrape Sitemap
    print(f"Fetching sitemap from: {SITEMAP_URL}")
    sitemap_xml = requests.get(SITEMAP_URL).text
    soup = BeautifulSoup(sitemap_xml, "xml")

    # Filter URLs to the specific path
    urls = [loc.text for loc in soup.find_all("loc") if "/en-ie/" in loc.text]
    
    print(f"Found **{len(urls)}** URLs to crawl.")
    pprint(urls[:5])
    print("...")

    # 3. Process and Index Pages
    total_urls = len(urls)
    operations = []
    count_operation = 0
    batch_size = 50
    for url in tqdm(urls, total=total_urls, desc="Fetching and Indexing pages..."):
        count_operation += 1
        operations.append({"index": {"_index": index_name}})
        try:
            # Scrape content
            title, content = extract_page_text(url)
            
            # Skip if content is empty
            if not content:
                print(f"Skipped: Empty content for {url}")
                continue

            # Generate Embedding
            # Now we use the returned callable function 'embed_content'
            full_text = str(title) + str(content)
            embedding = embed_content(full_text)

            # Create Document
            doc = {
                "url": url,
                "title": title,
                "content": content,
                "embedding": embedding.tolist()
            }

            operations.append(doc)

            if count_operation >= batch_size:
                es.bulk(operations=operations)
                count_operation = 0
                operations.clear()

            # Index Document
            es.index(index=index_name, document=doc)
            # print(f"Indexed: {url}")

        except Exception as e:
            print(f"Failed to process/index {url}. Error: {e}")
    
    if operations:
        es.bulk(operations=operations)


**Scrape the pages from the sitemap and use HuggingFace Embedding Model**

In [19]:
create_index(index_name=INDEX_NAME_HUGGINGFACE)
scrape_and_index(model_type='HuggingFace', index_name=INDEX_NAME_HUGGINGFACE)

Index 'huggingface' created successfully.


'(ReadTimeoutError("HTTPSConnectionPool(host='huggingface.co', port=443): Read timed out. (read timeout=10)"), '(Request ID: e1d14493-30ed-4c55-8e51-b827a3f7db86)')' thrown while requesting HEAD https://huggingface.co/api/resolve-cache/models/sentence-transformers/all-MiniLM-L6-v2/c9745ed1d9f207416be6d2e6f8de32d1f16199bf/tokenizer_config.json
Retrying in 1s [Retry 1/5].


Fetching sitemap from: https://www.sisuclinic.com/sitemap.xml
Found **141** URLs to crawl.
['https://www.sisuclinic.com/en-ie/treatments',
 'https://www.sisuclinic.com/en-ie/providers',
 'https://www.sisuclinic.com/en-ie/faqs',
 'https://www.sisuclinic.com/en-ie/locations',
 'https://www.sisuclinic.com/en-ie/about-us']
...


Fetching and Indexing pages...:   1%|          | 1/141 [00:02<05:17,  2.27s/it]

Indexed: https://www.sisuclinic.com/en-ie/treatments


Fetching and Indexing pages...:   1%|▏         | 2/141 [00:03<03:41,  1.59s/it]

Indexed: https://www.sisuclinic.com/en-ie/providers


Fetching and Indexing pages...:   2%|▏         | 3/141 [00:04<03:30,  1.52s/it]

Indexed: https://www.sisuclinic.com/en-ie/faqs


Fetching and Indexing pages...:   3%|▎         | 4/141 [00:05<03:04,  1.35s/it]

Indexed: https://www.sisuclinic.com/en-ie/locations


Fetching and Indexing pages...:   4%|▎         | 5/141 [00:07<02:53,  1.28s/it]

Indexed: https://www.sisuclinic.com/en-ie/about-us


Fetching and Indexing pages...:   4%|▍         | 6/141 [00:08<03:10,  1.41s/it]

Indexed: https://www.sisuclinic.com/en-ie/pricing


Fetching and Indexing pages...:   5%|▍         | 7/141 [00:10<03:06,  1.39s/it]

Indexed: https://www.sisuclinic.com/en-ie/results


Fetching and Indexing pages...:   6%|▌         | 8/141 [00:11<02:51,  1.29s/it]

Indexed: https://www.sisuclinic.com/en-ie/accessibility


Fetching and Indexing pages...:   6%|▋         | 9/141 [00:12<03:00,  1.37s/it]

Indexed: https://www.sisuclinic.com/en-ie/contact-us


Fetching and Indexing pages...:   7%|▋         | 10/141 [00:13<02:44,  1.26s/it]

Indexed: https://www.sisuclinic.com/en-ie/get-the-look


Fetching and Indexing pages...:   8%|▊         | 11/141 [00:14<02:36,  1.20s/it]

Indexed: https://www.sisuclinic.com/en-ie/pre-post-treatment-care


Fetching and Indexing pages...:   9%|▊         | 12/141 [00:15<02:34,  1.20s/it]

Indexed: https://www.sisuclinic.com/en-ie/blogs


Fetching and Indexing pages...:   9%|▉         | 13/141 [00:17<02:31,  1.18s/it]

Indexed: https://www.sisuclinic.com/en-ie/press


Fetching and Indexing pages...:  10%|▉         | 14/141 [00:18<02:45,  1.31s/it]

Indexed: https://www.sisuclinic.com/en-ie/shop


Fetching and Indexing pages...:  11%|█         | 15/141 [00:19<02:43,  1.30s/it]

Indexed: https://www.sisuclinic.com/en-ie/digital-gift-voucher


Fetching and Indexing pages...:  11%|█▏        | 16/141 [00:21<03:04,  1.48s/it]

Indexed: https://www.sisuclinic.com/en-ie/sisu-body-quiz


Fetching and Indexing pages...:  12%|█▏        | 17/141 [00:23<02:50,  1.37s/it]

Indexed: https://www.sisuclinic.com/en-ie/sisu-facial-quiz


Fetching and Indexing pages...:  13%|█▎        | 18/141 [00:24<02:41,  1.31s/it]

Indexed: https://www.sisuclinic.com/en-ie/reviews


Fetching and Indexing pages...:  13%|█▎        | 19/141 [00:25<02:29,  1.23s/it]

Indexed: https://www.sisuclinic.com/en-ie/botox-dysport-fl


Fetching and Indexing pages...:  14%|█▍        | 20/141 [00:26<02:23,  1.19s/it]

Indexed: https://www.sisuclinic.com/en-ie/polynucleotides


Fetching and Indexing pages...:  15%|█▍        | 21/141 [00:27<02:19,  1.16s/it]

Indexed: https://www.sisuclinic.com/en-ie/our-clinics/botox-dublin


Fetching and Indexing pages...:  16%|█▌        | 22/141 [00:28<02:13,  1.12s/it]

Indexed: https://www.sisuclinic.com/en-ie/our-clinics/botox-cork


Fetching and Indexing pages...:  16%|█▋        | 23/141 [00:29<02:08,  1.09s/it]

Indexed: https://www.sisuclinic.com/en-ie/our-clinics/botox-galway


Fetching and Indexing pages...:  17%|█▋        | 24/141 [00:30<02:03,  1.06s/it]

Indexed: https://www.sisuclinic.com/en-ie/our-clinics/botox-killarney


Fetching and Indexing pages...:  18%|█▊        | 25/141 [00:31<02:06,  1.09s/it]

Indexed: https://www.sisuclinic.com/en-ie/our-clinics/botox-limerick


Fetching and Indexing pages...:  18%|█▊        | 26/141 [00:32<02:11,  1.14s/it]

Indexed: https://www.sisuclinic.com/en-ie/our-clinics/botox-naas


Fetching and Indexing pages...:  19%|█▉        | 27/141 [00:33<02:01,  1.06s/it]

Indexed: https://www.sisuclinic.com/en-ie/our-clinics/botox-waterford


Fetching and Indexing pages...:  20%|█▉        | 28/141 [00:34<01:55,  1.02s/it]

Indexed: https://www.sisuclinic.com/en-ie/our-clinics/botox-belfast


Fetching and Indexing pages...:  21%|██        | 29/141 [00:35<02:03,  1.10s/it]

Indexed: https://www.sisuclinic.com/en-ie/our-clinics/botox-hampstead


Fetching and Indexing pages...:  21%|██▏       | 30/141 [00:36<01:59,  1.08s/it]

Indexed: https://www.sisuclinic.com/en-ie/our-clinics/botox-mayfair


Fetching and Indexing pages...:  22%|██▏       | 31/141 [00:37<01:53,  1.03s/it]

Indexed: https://www.sisuclinic.com/en-ie/our-clinics/botox-richmond


Fetching and Indexing pages...:  23%|██▎       | 32/141 [00:39<01:59,  1.09s/it]

Indexed: https://www.sisuclinic.com/en-ie/our-clinics/botox-dysport-flatiron


Fetching and Indexing pages...:  23%|██▎       | 33/141 [00:40<01:52,  1.04s/it]

Indexed: https://www.sisuclinic.com/en-ie/our-clinics/botox-dysport-soho


Fetching and Indexing pages...:  24%|██▍       | 34/141 [00:41<01:50,  1.04s/it]

Indexed: https://www.sisuclinic.com/en-ie/our-clinics/botox-dysport-fort-lauderdale


Fetching and Indexing pages...:  25%|██▍       | 35/141 [00:42<02:01,  1.15s/it]

Indexed: https://www.sisuclinic.com/en-ie/our-clinics/botox-dysport-houston


Fetching and Indexing pages...:  26%|██▌       | 36/141 [00:43<01:55,  1.10s/it]

Indexed: https://www.sisuclinic.com/en-ie/our-clinics/botox-dysport-miami


Fetching and Indexing pages...:  26%|██▌       | 37/141 [00:44<01:52,  1.09s/it]

Indexed: https://www.sisuclinic.com/en-ie/careers


Fetching and Indexing pages...:  27%|██▋       | 38/141 [00:46<02:20,  1.36s/it]

Indexed: https://www.sisuclinic.com/en-ie/polynucleotides-for-eyes


Fetching and Indexing pages...:  28%|██▊       | 39/141 [00:47<02:17,  1.35s/it]

Indexed: https://www.sisuclinic.com/en-ie/bridal-package


Fetching and Indexing pages...:  28%|██▊       | 40/141 [00:48<02:06,  1.25s/it]

Indexed: https://www.sisuclinic.com/en-ie/polynucleotides-hair-loss-treatment


Fetching and Indexing pages...:  29%|██▉       | 41/141 [00:52<03:06,  1.86s/it]

Indexed: https://www.sisuclinic.com/en-ie/cheek-filler


Fetching and Indexing pages...:  30%|██▉       | 42/141 [00:53<02:43,  1.65s/it]

Indexed: https://www.sisuclinic.com/en-ie/exclusive-packages


Fetching and Indexing pages...:  30%|███       | 43/141 [00:54<02:33,  1.57s/it]

Indexed: https://www.sisuclinic.com/en-ie/botox-dysport-nyc


Fetching and Indexing pages...:  31%|███       | 44/141 [00:56<02:34,  1.59s/it]

Indexed: https://www.sisuclinic.com/en-ie/sisu-vision-ai


Fetching and Indexing pages...:  32%|███▏      | 45/141 [00:58<02:34,  1.61s/it]

Indexed: https://www.sisuclinic.com/en-ie/blepharoplasty


Fetching and Indexing pages...:  33%|███▎      | 46/141 [00:58<02:13,  1.41s/it]

Indexed: https://www.sisuclinic.com/en-ie/locations/ballincollig


Fetching and Indexing pages...:  33%|███▎      | 47/141 [00:59<02:00,  1.28s/it]

Indexed: https://www.sisuclinic.com/en-ie/locations/blackrock-dublin


Fetching and Indexing pages...:  34%|███▍      | 48/141 [01:00<01:52,  1.21s/it]

Indexed: https://www.sisuclinic.com/en-ie/locations/castleknock-dublin


Fetching and Indexing pages...:  35%|███▍      | 49/141 [01:02<01:54,  1.24s/it]

Indexed: https://www.sisuclinic.com/en-ie/locations/cork-city-centre


Fetching and Indexing pages...:  35%|███▌      | 50/141 [01:03<01:57,  1.29s/it]

Indexed: https://www.sisuclinic.com/en-ie/locations/douglas-village-shopping-centre-cork-city


Fetching and Indexing pages...:  36%|███▌      | 51/141 [01:04<01:56,  1.29s/it]

Indexed: https://www.sisuclinic.com/en-ie/locations/dublin-city-centre


Fetching and Indexing pages...:  37%|███▋      | 52/141 [01:05<01:43,  1.17s/it]

Indexed: https://www.sisuclinic.com/en-ie/locations/galway-city-centre


Fetching and Indexing pages...:  38%|███▊      | 53/141 [01:07<01:50,  1.26s/it]

Indexed: https://www.sisuclinic.com/en-ie/locations/greystones-wicklow


Fetching and Indexing pages...:  38%|███▊      | 54/141 [01:08<01:50,  1.27s/it]

Indexed: https://www.sisuclinic.com/en-ie/locations/harvey-nichols-dundrum-dublin


Fetching and Indexing pages...:  39%|███▉      | 55/141 [01:09<01:45,  1.22s/it]

Indexed: https://www.sisuclinic.com/en-ie/locations/killarney-kerry


Fetching and Indexing pages...:  40%|███▉      | 56/141 [01:11<01:51,  1.31s/it]

Indexed: https://www.sisuclinic.com/en-ie/locations/limerick-city-centre


Fetching and Indexing pages...:  40%|████      | 57/141 [01:12<01:51,  1.33s/it]

Indexed: https://www.sisuclinic.com/en-ie/locations/naas-kildare


Fetching and Indexing pages...:  41%|████      | 58/141 [01:13<01:46,  1.29s/it]

Indexed: https://www.sisuclinic.com/en-ie/locations/ranelagh-dublin


Fetching and Indexing pages...:  42%|████▏     | 59/141 [01:16<02:08,  1.57s/it]

Indexed: https://www.sisuclinic.com/en-ie/locations/sutton-dublin


Fetching and Indexing pages...:  43%|████▎     | 60/141 [01:17<01:55,  1.42s/it]

Indexed: https://www.sisuclinic.com/en-ie/locations/terenure-dublin


Fetching and Indexing pages...:  43%|████▎     | 61/141 [01:18<01:50,  1.38s/it]

Indexed: https://www.sisuclinic.com/en-ie/locations/waterford-city-centre


Fetching and Indexing pages...:  44%|████▍     | 62/141 [01:19<01:47,  1.37s/it]

Indexed: https://www.sisuclinic.com/en-ie/treatments/anti-wrinkle


Fetching and Indexing pages...:  45%|████▍     | 63/141 [01:21<01:55,  1.48s/it]

Indexed: https://www.sisuclinic.com/en-ie/treatments/anti-wrinkle-treatment-for-men


Fetching and Indexing pages...:  45%|████▌     | 64/141 [01:22<01:54,  1.49s/it]

Indexed: https://www.sisuclinic.com/en-ie/treatments/body-fat-reduction


Fetching and Indexing pages...:  46%|████▌     | 65/141 [01:24<01:46,  1.41s/it]

Indexed: https://www.sisuclinic.com/en-ie/treatments/brow-lift


Fetching and Indexing pages...:  47%|████▋     | 66/141 [01:25<01:43,  1.38s/it]

Indexed: https://www.sisuclinic.com/en-ie/treatments/chin-fat-reduction


Fetching and Indexing pages...:  48%|████▊     | 67/141 [01:26<01:32,  1.25s/it]

Indexed: https://www.sisuclinic.com/en-ie/treatments/chin-filler


Fetching and Indexing pages...:  48%|████▊     | 68/141 [01:28<01:50,  1.52s/it]

Indexed: https://www.sisuclinic.com/en-ie/treatments/decolletage


Fetching and Indexing pages...:  49%|████▉     | 69/141 [01:29<01:37,  1.36s/it]

Indexed: https://www.sisuclinic.com/en-ie/treatments/dermal-filler


Fetching and Indexing pages...:  50%|████▉     | 70/141 [01:30<01:30,  1.28s/it]

Indexed: https://www.sisuclinic.com/en-ie/treatments/gummy-smile


Fetching and Indexing pages...:  50%|█████     | 71/141 [01:31<01:23,  1.19s/it]

Indexed: https://www.sisuclinic.com/en-ie/treatments/hyperhidrosis


Fetching and Indexing pages...:  51%|█████     | 72/141 [01:32<01:21,  1.18s/it]

Indexed: https://www.sisuclinic.com/en-ie/treatments/jaw-sculpting


Fetching and Indexing pages...:  52%|█████▏    | 73/141 [01:33<01:16,  1.12s/it]

Indexed: https://www.sisuclinic.com/en-ie/treatments/lip-fillers


Fetching and Indexing pages...:  52%|█████▏    | 74/141 [01:35<01:24,  1.26s/it]

Indexed: https://www.sisuclinic.com/en-ie/treatments/lip-flip


Fetching and Indexing pages...:  53%|█████▎    | 75/141 [01:36<01:24,  1.29s/it]

Indexed: https://www.sisuclinic.com/en-ie/treatments/migraines


Fetching and Indexing pages...:  54%|█████▍    | 76/141 [01:37<01:18,  1.21s/it]

Indexed: https://www.sisuclinic.com/en-ie/treatments/neck-lift


Fetching and Indexing pages...:  55%|█████▍    | 77/141 [01:39<01:20,  1.26s/it]

Indexed: https://www.sisuclinic.com/en-ie/treatments/non-surgical-rhinoplasty


Fetching and Indexing pages...:  55%|█████▌    | 78/141 [01:39<01:10,  1.12s/it]

Error fetching https://www.sisuclinic.com/en-ie/treatments/polynucleotides: 'NoneType' object has no attribute 'strip'
Skipped: Empty content for https://www.sisuclinic.com/en-ie/treatments/polynucleotides


Fetching and Indexing pages...:  56%|█████▌    | 79/141 [01:41<01:22,  1.33s/it]

Indexed: https://www.sisuclinic.com/en-ie/treatments/polynucleotides-for-eyes


Fetching and Indexing pages...:  57%|█████▋    | 80/141 [01:43<01:21,  1.33s/it]

Indexed: https://www.sisuclinic.com/en-ie/treatments/profhilo-body


Fetching and Indexing pages...:  57%|█████▋    | 81/141 [01:44<01:16,  1.28s/it]

Indexed: https://www.sisuclinic.com/en-ie/treatments/profhilo-face


Fetching and Indexing pages...:  58%|█████▊    | 82/141 [01:46<01:33,  1.59s/it]

Indexed: https://www.sisuclinic.com/en-ie/treatments/sculptra-r


Fetching and Indexing pages...:  59%|█████▉    | 83/141 [01:47<01:26,  1.49s/it]

Indexed: https://www.sisuclinic.com/en-ie/treatments/sisu-balance


Fetching and Indexing pages...:  60%|█████▉    | 84/141 [01:48<01:15,  1.33s/it]

Indexed: https://www.sisuclinic.com/en-ie/treatments/sisu-glow


Fetching and Indexing pages...:  60%|██████    | 85/141 [01:50<01:26,  1.55s/it]

Indexed: https://www.sisuclinic.com/en-ie/treatments/sisu-harmonize


Fetching and Indexing pages...:  61%|██████    | 86/141 [01:52<01:21,  1.48s/it]

Indexed: https://www.sisuclinic.com/en-ie/treatments/sisu-liquid-facelift


Fetching and Indexing pages...:  62%|██████▏   | 87/141 [01:55<01:47,  2.00s/it]

Indexed: https://www.sisuclinic.com/en-ie/treatments/sisu-platinum


Fetching and Indexing pages...:  62%|██████▏   | 88/141 [01:56<01:32,  1.75s/it]

Indexed: https://www.sisuclinic.com/en-ie/treatments/sisu-rejuvenate


Fetching and Indexing pages...:  63%|██████▎   | 89/141 [01:57<01:19,  1.53s/it]

Indexed: https://www.sisuclinic.com/en-ie/treatments/sisu-revitalize


Fetching and Indexing pages...:  64%|██████▍   | 90/141 [01:58<01:09,  1.35s/it]

Indexed: https://www.sisuclinic.com/en-ie/treatments/sisu-shine


Fetching and Indexing pages...:  65%|██████▍   | 91/141 [01:59<01:01,  1.23s/it]

Indexed: https://www.sisuclinic.com/en-ie/treatments/sisu-signature


Fetching and Indexing pages...:  65%|██████▌   | 92/141 [02:00<01:01,  1.25s/it]

Indexed: https://www.sisuclinic.com/en-ie/treatments/teeth-grinding


Fetching and Indexing pages...:  66%|██████▌   | 93/141 [02:01<00:58,  1.23s/it]

Indexed: https://www.sisuclinic.com/en-ie/treatments/the-taylor-package


Fetching and Indexing pages...:  67%|██████▋   | 94/141 [02:03<00:57,  1.23s/it]

Indexed: https://www.sisuclinic.com/en-ie/treatments/thread-lift


Fetching and Indexing pages...:  67%|██████▋   | 95/141 [02:04<00:55,  1.22s/it]

Indexed: https://www.sisuclinic.com/en-ie/treatments/traptox


Fetching and Indexing pages...:  68%|██████▊   | 96/141 [02:05<00:51,  1.15s/it]

Indexed: https://www.sisuclinic.com/en-ie/treatments/under-eye-filler


Fetching and Indexing pages...:  69%|██████▉   | 97/141 [02:06<00:47,  1.08s/it]

Indexed: https://www.sisuclinic.com/en-ie/blogs/a-friendly-guide-to-starting-your-aesthetic-journey-with-sisu-clinic


Fetching and Indexing pages...:  70%|██████▉   | 98/141 [02:07<00:44,  1.02s/it]

Indexed: https://www.sisuclinic.com/en-ie/blogs/aesthetic-treatments-for-brides-and-grooms-the-ins-the-outs-and-everything-you-need-to-know


Fetching and Indexing pages...:  70%|███████   | 99/141 [02:07<00:40,  1.03it/s]

Indexed: https://www.sisuclinic.com/en-ie/blogs/aesthetic-treatments-for-men-breaking-down-the-stigma-ie


Fetching and Indexing pages...:  71%|███████   | 100/141 [02:08<00:40,  1.02it/s]

Failed to process/index https://www.sisuclinic.com/en-ie/blogs/all-you-need-to-know-about-our-masseter-treatment. Error: BadRequestError(400, 'illegal_argument_exception', 'Malformed action/metadata line [57], expected field [create], [delete], [index] or [update] but found [url]')


Fetching and Indexing pages...:  72%|███████▏  | 101/141 [02:09<00:38,  1.05it/s]

Failed to process/index https://www.sisuclinic.com/en-ie/blogs/all-you-need-to-know-about-trapezius-aka-swan-neck. Error: BadRequestError(400, 'illegal_argument_exception', 'Malformed action/metadata line [57], expected field [create], [delete], [index] or [update] but found [url]')


Fetching and Indexing pages...:  72%|███████▏  | 102/141 [02:10<00:37,  1.04it/s]

Failed to process/index https://www.sisuclinic.com/en-ie/blogs/best-botox-in-dundrum. Error: BadRequestError(400, 'illegal_argument_exception', 'Malformed action/metadata line [57], expected field [create], [delete], [index] or [update] but found [url]')


Fetching and Indexing pages...:  73%|███████▎  | 103/141 [02:12<00:41,  1.09s/it]

Failed to process/index https://www.sisuclinic.com/en-ie/blogs/bridal-anti-wrinkle-how-to-get-a-picture-perfect-wedding-day. Error: BadRequestError(400, 'illegal_argument_exception', 'Malformed action/metadata line [57], expected field [create], [delete], [index] or [update] but found [url]')


Fetching and Indexing pages...:  74%|███████▍  | 104/141 [02:13<00:39,  1.08s/it]

Failed to process/index https://www.sisuclinic.com/en-ie/blogs/can-aesthetic-medicine-help-treat-acne-all-you-need-to-know. Error: BadRequestError(400, 'illegal_argument_exception', 'Malformed action/metadata line [57], expected field [create], [delete], [index] or [update] but found [url]')


Fetching and Indexing pages...:  74%|███████▍  | 105/141 [02:14<00:36,  1.01s/it]

Failed to process/index https://www.sisuclinic.com/en-ie/blogs/can-you-exercise-after-anti-wrinkle-treatment. Error: BadRequestError(400, 'illegal_argument_exception', 'Malformed action/metadata line [57], expected field [create], [delete], [index] or [update] but found [url]')


Fetching and Indexing pages...:  75%|███████▌  | 106/141 [02:15<00:37,  1.06s/it]

Failed to process/index https://www.sisuclinic.com/en-ie/blogs/can-you-get-anti-wrinkle-whilst-pregnant. Error: BadRequestError(400, 'illegal_argument_exception', 'Malformed action/metadata line [57], expected field [create], [delete], [index] or [update] but found [url]')


Fetching and Indexing pages...:  76%|███████▌  | 107/141 [02:16<00:34,  1.00s/it]

Failed to process/index https://www.sisuclinic.com/en-ie/blogs/everything-you-need-to-know-about-our-lip-filler-treatment. Error: BadRequestError(400, 'illegal_argument_exception', 'Malformed action/metadata line [57], expected field [create], [delete], [index] or [update] but found [url]')


Fetching and Indexing pages...:  77%|███████▋  | 108/141 [02:17<00:31,  1.04it/s]

Failed to process/index https://www.sisuclinic.com/en-ie/blogs/from-lips-to-jawline-the-best-places-to-get-dermal-fillers-what-they-do. Error: BadRequestError(400, 'illegal_argument_exception', 'Malformed action/metadata line [57], expected field [create], [delete], [index] or [update] but found [url]')


Fetching and Indexing pages...:  77%|███████▋  | 109/141 [02:18<00:31,  1.02it/s]

Failed to process/index https://www.sisuclinic.com/en-ie/blogs/how-long-does-lip-filler-swelling-last-all-you-need-to-know. Error: BadRequestError(400, 'illegal_argument_exception', 'Malformed action/metadata line [57], expected field [create], [delete], [index] or [update] but found [url]')


Fetching and Indexing pages...:  78%|███████▊  | 110/141 [02:18<00:29,  1.07it/s]

Failed to process/index https://www.sisuclinic.com/en-ie/blogs/how-men-can-start-a-self-care-routine. Error: BadRequestError(400, 'illegal_argument_exception', 'Malformed action/metadata line [57], expected field [create], [delete], [index] or [update] but found [url]')


Fetching and Indexing pages...:  79%|███████▊  | 111/141 [02:19<00:27,  1.07it/s]

Failed to process/index https://www.sisuclinic.com/en-ie/blogs/how-to-balance-facial-asymmetry. Error: BadRequestError(400, 'illegal_argument_exception', 'Malformed action/metadata line [57], expected field [create], [delete], [index] or [update] but found [url]')


Fetching and Indexing pages...:  79%|███████▉  | 112/141 [02:20<00:25,  1.13it/s]

Failed to process/index https://www.sisuclinic.com/en-ie/blogs/how-to-fix-hollow-sunken-baggy-under-eyes. Error: BadRequestError(400, 'illegal_argument_exception', 'Malformed action/metadata line [57], expected field [create], [delete], [index] or [update] but found [url]')


Fetching and Indexing pages...:  80%|████████  | 113/141 [02:21<00:26,  1.07it/s]

Failed to process/index https://www.sisuclinic.com/en-ie/blogs/how-to-improve-jawline-definition. Error: BadRequestError(400, 'illegal_argument_exception', 'Malformed action/metadata line [57], expected field [create], [delete], [index] or [update] but found [url]')


Fetching and Indexing pages...:  81%|████████  | 114/141 [02:22<00:25,  1.04it/s]

Failed to process/index https://www.sisuclinic.com/en-ie/blogs/how-to-increase-collagen-production-in-skin. Error: BadRequestError(400, 'illegal_argument_exception', 'Malformed action/metadata line [57], expected field [create], [delete], [index] or [update] but found [url]')


Fetching and Indexing pages...:  82%|████████▏ | 115/141 [02:23<00:24,  1.08it/s]

Failed to process/index https://www.sisuclinic.com/en-ie/blogs/how-to-treat-excessive-sweatingg. Error: BadRequestError(400, 'illegal_argument_exception', 'Malformed action/metadata line [57], expected field [create], [delete], [index] or [update] but found [url]')


Fetching and Indexing pages...:  82%|████████▏ | 116/141 [02:24<00:23,  1.06it/s]

Failed to process/index https://www.sisuclinic.com/en-ie/blogs/introducing-sisu-vision-ai-see-your-treatment-results-before-you-even-book. Error: BadRequestError(400, 'illegal_argument_exception', 'Malformed action/metadata line [57], expected field [create], [delete], [index] or [update] but found [url]')


Fetching and Indexing pages...:  83%|████████▎ | 117/141 [02:25<00:21,  1.10it/s]

Failed to process/index https://www.sisuclinic.com/en-ie/blogs/kim-k-jennifer-aniston-obsessed-with-salmon-sperm-facials-power-of-polynucleotides. Error: BadRequestError(400, 'illegal_argument_exception', 'Malformed action/metadata line [57], expected field [create], [delete], [index] or [update] but found [url]')


Fetching and Indexing pages...:  84%|████████▎ | 118/141 [02:26<00:20,  1.10it/s]

Failed to process/index https://www.sisuclinic.com/en-ie/blogs/kim-kardashian-and-neck-anti-wrinkle-treatment-the-aesthetic-revolution. Error: BadRequestError(400, 'illegal_argument_exception', 'Malformed action/metadata line [57], expected field [create], [delete], [index] or [update] but found [url]')


Fetching and Indexing pages...:  84%|████████▍ | 119/141 [02:27<00:20,  1.07it/s]

Failed to process/index https://www.sisuclinic.com/en-ie/blogs/lemon-bottle-revolutionary-fat-dissolver-or-is-it-social-media-hype. Error: BadRequestError(400, 'illegal_argument_exception', 'Malformed action/metadata line [57], expected field [create], [delete], [index] or [update] but found [url]')


Fetching and Indexing pages...:  85%|████████▌ | 120/141 [02:28<00:19,  1.06it/s]

Failed to process/index https://www.sisuclinic.com/en-ie/blogs/lip-fillers-sisu-clinics-nationwide. Error: BadRequestError(400, 'illegal_argument_exception', 'Malformed action/metadata line [57], expected field [create], [delete], [index] or [update] but found [url]')


Fetching and Indexing pages...:  86%|████████▌ | 121/141 [02:29<00:18,  1.06it/s]

Failed to process/index https://www.sisuclinic.com/en-ie/blogs/lip-flip-gummy-smile-what-is-the-difference. Error: BadRequestError(400, 'illegal_argument_exception', 'Malformed action/metadata line [57], expected field [create], [delete], [index] or [update] but found [url]')


Fetching and Indexing pages...:  87%|████████▋ | 122/141 [02:30<00:18,  1.01it/s]

Failed to process/index https://www.sisuclinic.com/en-ie/blogs/lip-flip-vs-lip-filler-which-one-gives-the-best-results. Error: BadRequestError(400, 'illegal_argument_exception', 'Malformed action/metadata line [57], expected field [create], [delete], [index] or [update] but found [url]')


Fetching and Indexing pages...:  87%|████████▋ | 123/141 [02:31<00:17,  1.05it/s]

Failed to process/index https://www.sisuclinic.com/en-ie/blogs/lying-down-after-anti-wrinkle-is-it-a-good-idea. Error: BadRequestError(400, 'illegal_argument_exception', 'Malformed action/metadata line [57], expected field [create], [delete], [index] or [update] but found [url]')


Fetching and Indexing pages...:  88%|████████▊ | 124/141 [02:32<00:17,  1.01s/it]

Failed to process/index https://www.sisuclinic.com/en-ie/blogs/meet-laila-our-newest-24-7-365-employee-at-sisu-clinic. Error: BadRequestError(400, 'illegal_argument_exception', 'Malformed action/metadata line [57], expected field [create], [delete], [index] or [update] but found [url]')


Fetching and Indexing pages...:  89%|████████▊ | 125/141 [02:33<00:15,  1.03it/s]

Failed to process/index https://www.sisuclinic.com/en-ie/blogs/preventative-anti-wrinkle-treatments-all-you-need-to-know. Error: BadRequestError(400, 'illegal_argument_exception', 'Malformed action/metadata line [57], expected field [create], [delete], [index] or [update] but found [url]')


Fetching and Indexing pages...:  89%|████████▉ | 126/141 [02:33<00:13,  1.07it/s]

Failed to process/index https://www.sisuclinic.com/en-ie/blogs/sculptra-collagen-injections-in-ireland-restore-youthful-volume-naturally. Error: BadRequestError(400, 'illegal_argument_exception', 'Malformed action/metadata line [57], expected field [create], [delete], [index] or [update] but found [url]')


Fetching and Indexing pages...:  90%|█████████ | 127/141 [02:34<00:13,  1.05it/s]

Failed to process/index https://www.sisuclinic.com/en-ie/blogs/should-i-get-preventative-anti-wrinkle-treatment. Error: BadRequestError(400, 'illegal_argument_exception', 'Malformed action/metadata line [57], expected field [create], [delete], [index] or [update] but found [url]')


Fetching and Indexing pages...:  91%|█████████ | 128/141 [02:35<00:11,  1.09it/s]

Failed to process/index https://www.sisuclinic.com/en-ie/blogs/tech-driven-care-how-sisu-uses-smart-tech-to-make-every-patient-feel-special. Error: BadRequestError(400, 'illegal_argument_exception', 'Malformed action/metadata line [57], expected field [create], [delete], [index] or [update] but found [url]')


Fetching and Indexing pages...:  91%|█████████▏| 129/141 [02:36<00:10,  1.11it/s]

Failed to process/index https://www.sisuclinic.com/en-ie/blogs/the-non-surgical-facelift-how-it-works-and-what-to-expect-us. Error: BadRequestError(400, 'illegal_argument_exception', 'Malformed action/metadata line [57], expected field [create], [delete], [index] or [update] but found [url]')


Fetching and Indexing pages...:  92%|█████████▏| 130/141 [02:37<00:09,  1.13it/s]

Failed to process/index https://www.sisuclinic.com/en-ie/blogs/treatment-for-a-gummy-smile. Error: BadRequestError(400, 'illegal_argument_exception', 'Malformed action/metadata line [57], expected field [create], [delete], [index] or [update] but found [url]')


Fetching and Indexing pages...:  93%|█████████▎| 131/141 [02:38<00:09,  1.08it/s]

Failed to process/index https://www.sisuclinic.com/en-ie/blogs/treatment-for-migraines-how-does-it-work. Error: BadRequestError(400, 'illegal_argument_exception', 'Malformed action/metadata line [57], expected field [create], [delete], [index] or [update] but found [url]')


Fetching and Indexing pages...:  94%|█████████▎| 132/141 [02:39<00:08,  1.06it/s]

Failed to process/index https://www.sisuclinic.com/en-ie/blogs/treatment-for-tmj-how-does-it-work. Error: BadRequestError(400, 'illegal_argument_exception', 'Malformed action/metadata line [57], expected field [create], [delete], [index] or [update] but found [url]')


Fetching and Indexing pages...:  94%|█████████▍| 133/141 [02:40<00:07,  1.09it/s]

Failed to process/index https://www.sisuclinic.com/en-ie/blogs/understanding-the-difference-between-anti-wrinkle-and-dermal-fillers. Error: BadRequestError(400, 'illegal_argument_exception', 'Malformed action/metadata line [57], expected field [create], [delete], [index] or [update] but found [url]')


Fetching and Indexing pages...:  95%|█████████▌| 134/141 [02:41<00:06,  1.09it/s]

Failed to process/index https://www.sisuclinic.com/en-ie/blogs/what-are-thread-lifts. Error: BadRequestError(400, 'illegal_argument_exception', 'Malformed action/metadata line [57], expected field [create], [delete], [index] or [update] but found [url]')


Fetching and Indexing pages...:  96%|█████████▌| 135/141 [02:42<00:06,  1.09s/it]

Failed to process/index https://www.sisuclinic.com/en-ie/blogs/what-is-a-non-surgical-nose-job. Error: BadRequestError(400, 'illegal_argument_exception', 'Malformed action/metadata line [57], expected field [create], [delete], [index] or [update] but found [url]')


Fetching and Indexing pages...:  96%|█████████▋| 136/141 [02:43<00:05,  1.02s/it]

Failed to process/index https://www.sisuclinic.com/en-ie/blogs/what-is-my-skin-type. Error: BadRequestError(400, 'illegal_argument_exception', 'Malformed action/metadata line [57], expected field [create], [delete], [index] or [update] but found [url]')


Fetching and Indexing pages...:  97%|█████████▋| 137/141 [02:44<00:03,  1.01it/s]

Failed to process/index https://www.sisuclinic.com/en-ie/blogs/what-is-our-sculptra-r-treatment-all-you-need-to-know. Error: BadRequestError(400, 'illegal_argument_exception', 'Malformed action/metadata line [57], expected field [create], [delete], [index] or [update] but found [url]')


Fetching and Indexing pages...:  98%|█████████▊| 138/141 [02:45<00:02,  1.02it/s]

Failed to process/index https://www.sisuclinic.com/en-ie/blogs/what-is-the-difference-between-anti-wrinkle-and-profhilo. Error: BadRequestError(400, 'illegal_argument_exception', 'Malformed action/metadata line [57], expected field [create], [delete], [index] or [update] but found [url]')


Fetching and Indexing pages...:  99%|█████████▊| 139/141 [02:46<00:01,  1.06it/s]

Failed to process/index https://www.sisuclinic.com/en-ie/blogs/what-to-do-before-and-after-anti-wrinkle-treatment. Error: BadRequestError(400, 'illegal_argument_exception', 'Malformed action/metadata line [57], expected field [create], [delete], [index] or [update] but found [url]')


Fetching and Indexing pages...:  99%|█████████▉| 140/141 [02:47<00:00,  1.02it/s]

Failed to process/index https://www.sisuclinic.com/en-ie/blogs/why-men-get-anti-wrinkle-treatment. Error: BadRequestError(400, 'illegal_argument_exception', 'Malformed action/metadata line [57], expected field [create], [delete], [index] or [update] but found [url]')


Fetching and Indexing pages...: 100%|██████████| 141/141 [02:48<00:00,  1.19s/it]

Failed to process/index https://www.sisuclinic.com/en-ie/aesthetic-treatments/exosomes. Error: BadRequestError(400, 'illegal_argument_exception', 'Malformed action/metadata line [57], expected field [create], [delete], [index] or [update] but found [url]')





BadRequestError: BadRequestError(400, 'illegal_argument_exception', 'Malformed action/metadata line [57], expected field [create], [delete], [index] or [update] but found [url]')

In [20]:
count = es.count(index=INDEX_NAME_HUGGINGFACE)
count["count"]

148

In [31]:
len(query_list)

50

In [36]:
embedding_model = get_embedder(model_type="HuggingFace")
embedded_query = embedding_model("What is baby botox and how does it work?")
query_body = create_knn_query(query_vector=embedded_query, k_value=5)

In [37]:
response = es.search(
    index=INDEX_NAME_HUGGINGFACE,
    body=query_body
)

response

ObjectApiResponse({'took': 1413, 'timed_out': False, '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0}, 'hits': {'total': {'value': 5, 'relation': 'eq'}, 'max_score': 0.76685464, 'hits': [{'_index': 'huggingface', '_id': 'TggxfpoBAdziKzBU8AY2', '_score': 0.76685464, '_source': {'url': 'https://www.sisuclinic.com/en-ie/our-clinics/botox-dysport-miami', 'title': 'Botoxâ\x84¢:Dysportâ\x84¢ Miami - Sisu Clinic', 'content': "Botoxâ\x84¢:Dysportâ\x84¢ Miami - Sisu Clinic Black Friday is live! Save up to 30% on select treatments. Try our AI Makeover now and preview your refreshed look for free. Try Now Ireland United States Ireland United Kingdom Treatments Patient Results Get The Look Locations Meet Your Provider Pricing Blogs Shop Book Today choose region Republic Of Ireland United States of America United Kingdom learn more View Anti-Wrinkle View Get The Look Offers View All Treatments View Pricing View All Locations View Patients Results Meet Your Provider Have a questio

In [42]:
hits = response["hits"]["hits"]
for hit in hits:
    print(hit["_score"])

0.76685464
0.76685464
0.7471477
0.7471477
0.7464458
