**Connect to Elasticsearch**

In [21]:
from pprint import pprint 
from elasticsearch import Elasticsearch

es = Elasticsearch("http://localhost:9200")

client_info = es.info()

print("Successfully connected to Elasticsearch!")
pprint(client_info.body)

Successfully connected to Elasticsearch!
{'cluster_name': 'docker-cluster',
 'cluster_uuid': 'T1HeaWnRTOqX_BBgREVVbA',
 'name': '64c49e436740',
 'tagline': 'You Know, for Search',
 'version': {'build_date': '2025-10-21T10:06:21.288851013Z',
             'build_flavor': 'default',
             'build_hash': '25d88452371273dd27356c98598287b669a03eae',
             'build_snapshot': False,
             'build_type': 'docker',
             'lucene_version': '10.3.1',
             'minimum_index_compatibility_version': '8.0.0',
             'minimum_wire_compatibility_version': '8.19.0',
             'number': '9.2.0'}}


**Preparing the index**
#
We are adding a new field with type dense_vector to store the embeddings.

In [None]:
es.indices.delete(index='my_index', ignore_unavailable=True)
es.indices.create(
    index="my_index",
    mappings={
        "properties": {
            "embedding": {
                "type": "dense_vector",
            }
        }
    },
)

ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'my_index'})

(images/all-MiniLM-L6-v2_model.png)

I chose the all-MiniLM-L6-v2 model for its speed, compact size, and versatility as a general-purpose model. It features an embedding dimension of 384 and truncates text that exceeds 256 words. This model is very popular in the community with almost 50M downloads in one month.

To download and utilize this model, Hugging Face offers a Python package called sentence-transformers. This framework simplifies the process of computing dense vector representations.

In [23]:
from sentence_transformers import SentenceTransformer


model = SentenceTransformer("all-MiniLM-L6-v2")
model

'(ReadTimeoutError("HTTPSConnectionPool(host='huggingface.co', port=443): Read timed out. (read timeout=10)"), '(Request ID: fbf94424-d4ab-4850-9f67-0725d67f97d8)')' thrown while requesting HEAD https://huggingface.co/api/resolve-cache/models/sentence-transformers/all-MiniLM-L6-v2/c9745ed1d9f207416be6d2e6f8de32d1f16199bf/modules.json
Retrying in 1s [Retry 1/5].


SentenceTransformer(
  (0): Transformer({'max_seq_length': 256, 'do_lower_case': False, 'architecture': 'BertModel'})
  (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
  (2): Normalize()
)

In [24]:
import torch

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cpu')

In [25]:
model = model.to(device)
model

SentenceTransformer(
  (0): Transformer({'max_seq_length': 256, 'do_lower_case': False, 'architecture': 'BertModel'})
  (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
  (2): Normalize()
)

**Load documents**

In [26]:
import json 

documents = json.load(open("data/data.json"))

documents[:2]

[{'title': 'Sample Title 1',
  'text': 'This is the first sample document text.',
  'created_on': '2024-09-22'},
 {'title': 'Sample Title 2',
  'text': 'Elasticsearch makes searching and analyzing large amounts of data fast and efficient.',
  'created_on': '2024-10-05'}]

**Embed documents**

In [35]:
from tqdm import tqdm 
from pprint import pprint 

def get_embedding(text):
    return model.encode(text)

# using bulk api
operations = []

for document in tqdm(documents, total=len(documents)):
    operations.append({"index": {"_index": "my_index"}}) # action
    operations.append({
        **document,
        'embedding': get_embedding(document['text'])
    }) # source

response = es.bulk(operations=operations)
pprint(response.body)

100%|██████████| 5/5 [00:00<00:00, 27.83it/s]


{'errors': False,
 'items': [{'index': {'_id': '1nWvRZoBkw1BrtJkeaZj',
                      '_index': 'my_index',
                      '_primary_term': 1,
                      '_seq_no': 0,
                      '_shards': {'failed': 0, 'successful': 1, 'total': 2},
                      '_version': 1,
                      'result': 'created',
                      'status': 201}},
           {'index': {'_id': '13WvRZoBkw1BrtJkeaZj',
                      '_index': 'my_index',
                      '_primary_term': 1,
                      '_seq_no': 1,
                      '_shards': {'failed': 0, 'successful': 1, 'total': 2},
                      '_version': 1,
                      'result': 'created',
                      'status': 201}},
           {'index': {'_id': '2HWvRZoBkw1BrtJkeaZj',
                      '_index': 'my_index',
                      '_primary_term': 1,
                      '_seq_no': 2,
                      '_shards': {'failed': 0, 'successful': 1, '

We indexed all documents with an additional field embedding. Let's retrieve the documents to verify that the text was converted to a dense vector

In [40]:
response = es.search(
    index="my_index",
    body={
        "_source": ["embedding", "text", "title"],
        "query": {
            "match_all": {}
        }
    }
)
pprint(response["hits"]["hits"])

[{'_id': '1nWvRZoBkw1BrtJkeaZj',
  '_index': 'my_index',
  '_score': 1.0,
  '_source': {'embedding': [-0.043552242,
                            0.064408414,
                            -0.0050801663,
                            0.03445185,
                            0.04063344,
                            0.014603244,
                            -0.019641688,
                            0.049041085,
                            0.035828885,
                            0.011970674,
                            0.04181135,
                            0.082541056,
                            -0.00032649384,
                            -0.037260294,
                            -0.009786697,
                            0.03912472,
                            0.03093672,
                            -0.07445886,
                            0.061509013,
                            0.06640519,
                            0.06550293,
                            0.096725576,
                      

In [41]:
response = es.indices.get_mapping(index="my_index")
pprint(response.body)

{'my_index': {'mappings': {'properties': {'created_on': {'type': 'date'},
                                          'embedding': {'dims': 384,
                                                        'index': True,
                                                        'index_options': {'ef_construction': 100,
                                                                          'm': 16,
                                                                          'rescore_vector': {'oversample': 3.0},
                                                                          'type': 'bbq_hnsw'},
                                                        'similarity': 'cosine',
                                                        'type': 'dense_vector'},
                                          'text': {'fields': {'keyword': {'ignore_above': 256,
                                                                          'type': 'keyword'}},
                                              