In [None]:
!pip install -qU elasticsearch sentence-transformers

In [1]:
# get the Elasticsearch client
from elasticsearch import Elasticsearch, exceptions
from getpass import getpass
import time

ELASTIC_CLOUD_ID = getpass("Elastic Cloud ID: ")
ELASTIC_API_KEY = getpass("Elastic Api Key: ")

client = Elasticsearch(cloud_id=ELASTIC_CLOUD_ID, api_key=ELASTIC_API_KEY,)

Elastic Cloud ID:  ········
Elastic Api Key:  ········


In [None]:
# delete model if already downloaded and deployed
try:
  client.ml.delete_trained_model(model_id=".elser_model_2",force=True)
  print("Model deleted successfully, We will proceed with creating one")
except exceptions.NotFoundError:
  print("Model doesn't exist, but We will proceed with creating one")

# Creates the ELSER model configuration. Automatically downloads the model if it doesn't exist. 
client.ml.put_trained_model(
    model_id=".elser_model_2",
    input={
      "field_names": ["text_field"]
    }
  )

while True:
    status = client.ml.get_trained_models(
        model_id=".elser_model_2",
        include="definition_status"
    )
    
    if (status["trained_model_configs"][0]["fully_defined"]):
        break
    time.sleep(5)

# Start trained model deployment if not already deployed
client.ml.start_trained_model_deployment(
  model_id=".elser_model_2",
  number_of_allocations=1,
  wait_for="starting"
)

while True:
  status = client.ml.get_trained_models_stats(
    model_id=".elser_model_2",
  )
  if (status["trained_model_stats"][0]["deployment_stats"]["state"] == "started"):
    print("ELSER Model has been successfully deployed.")
    break
  else:
    print("ELSER Model is currently being deployed.")
  time.sleep(5)

In [4]:
import json
from urllib.request import urlopen
from sentence_transformers import SentenceTransformer

# these tests need book_index to exist ahead of time
client.indices.delete(index="books", ignore_unavailable=True)

mappings = {
    "properties": {
        "title_vector": {
            "type": "dense_vector",
            "dims": 384,
            "index": "true",
            "similarity": "cosine"
        }
    }
}
client.indices.create(index='books', mappings=mappings)

url = "https://raw.githubusercontent.com/elastic/elasticsearch-labs/main/notebooks/search/data.json"
response = urlopen(url)
books = json.loads(response.read())

model = SentenceTransformer('all-MiniLM-L6-v2')
operations = []
for book in books:
    operations.append({"index": {"_index": "books"}})
    # Transforming the title into an embedding using the model
    book["title_vector"] = model.encode(book["title"]).tolist()
    operations.append(book)
client.bulk(index="books", operations=operations, refresh=True)

ObjectApiResponse({'errors': False, 'took': 69, 'items': [{'index': {'_index': 'books', '_id': 'd8CPPI0BUo9hijNs3Yig', '_version': 1, 'result': 'created', 'forced_refresh': True, '_shards': {'total': 2, 'successful': 2, 'failed': 0}, '_seq_no': 0, '_primary_term': 1, 'status': 201}}, {'index': {'_index': 'books', '_id': 'eMCPPI0BUo9hijNs3Yig', '_version': 1, 'result': 'created', 'forced_refresh': True, '_shards': {'total': 2, 'successful': 2, 'failed': 0}, '_seq_no': 1, '_primary_term': 1, 'status': 201}}, {'index': {'_index': 'books', '_id': 'ecCPPI0BUo9hijNs3Yig', '_version': 1, 'result': 'created', 'forced_refresh': True, '_shards': {'total': 2, 'successful': 2, 'failed': 0}, '_seq_no': 2, '_primary_term': 1, 'status': 201}}, {'index': {'_index': 'books', '_id': 'esCPPI0BUo9hijNs3Yig', '_version': 1, 'result': 'created', 'forced_refresh': True, '_shards': {'total': 2, 'successful': 2, 'failed': 0}, '_seq_no': 3, '_primary_term': 1, 'status': 201}}, {'index': {'_index': 'books', '_id