See blog here: https://discuss.elastic.co/t/dec-18th-2023-en-the-most-magical-time-of-the-year-using-semantic-search-to-find-the-most-festive-harry-potter-moments/347615 

In [1]:
import eland as ed
import pandas as pd
from elasticsearch import Elasticsearch
from elasticsearch.helpers import bulk
import configparser
import re
import pandas as pd 
from json import loads

from getpass import getpass  # For securely getting user input

# Prompt the user to enter their Elastic Cloud ID and API Key securely
ELASTIC_CLOUD_ID = getpass("Elastic Cloud ID: ")
ELASTIC_API_KEY = getpass("Elastic API Key: ")

# Create an Elasticsearch client using the provided credentials
client = Elasticsearch(
    cloud_id=ELASTIC_CLOUD_ID,  # cloud id can be found under deployment management
    api_key=ELASTIC_API_KEY # API keys can be generated under management / security
)


client.info()

ObjectApiResponse({'name': 'instance-0000000000', 'cluster_name': 'fdcc4e10e5a34385884a3eda9350099a', 'cluster_uuid': '1v8os-EZTPmrZoF6uXeWKA', 'version': {'number': '8.9.0', 'build_flavor': 'default', 'build_type': 'docker', 'build_hash': '8aa461beb06aa0417a231c345a1b8c38fb498a0d', 'build_date': '2023-07-19T14:43:58.555259655Z', 'build_snapshot': False, 'lucene_version': '9.7.0', 'minimum_wire_compatibility_version': '7.17.0', 'minimum_index_compatibility_version': '7.0.0'}, 'tagline': 'You Know, for Search'})

### Getting the books as text file and processing them into the right format

In [12]:
hp_books = pd.read_csv("data/Harry_Potter_all_books_preprocessed.txt", sep=".", header=None)
hp_books = hp_books.T

hp_books.rename(columns = {0:'text_field'}, inplace = True)

docs = hp_books.to_json(orient = "records")
hp_books = loads(docs)

hp_books[0:5]

index = "hp_books"
settings = {}
mappings = {
    "_meta" : {
        "created_by" : "Iulia Feroli"
    },
    "properties" : {
        "text_field" : {
            "type" : "text"
        }
    }
}

client.indices.create(index=index, settings=settings, mappings=mappings)
response = bulk(client = client, index = index, actions = iter(hp_books), stats_only = True )

In [3]:
#test search
index = "hp_books"
response = client.search(index = index, query={
    "match" : {
        "text_field" : "Christmas"
    }
})

print("We get back {total} results, here are the top ones:".format(total=response["hits"]['total']['value']))
for hit in response["hits"]["hits"]:
    print(hit['_source']['text_field'])

We get back 203 results, here are the top ones:
meeting before Christmas 
Merry Christmas said George 
See you at Christmas 
Come on Hermione its Christmas 
So Ive come for Christmas 
A Very Merry Christmas to you 
Christmas is a time for family 
Flaming Christmas puddings followed the turkey 
Christmas morning dawned cold and white 
Merry Christmas !See ?said Ron quietly 


### Connecting to Elastic and creating a new enriched data index

In [27]:
#making sure the models are here - we will use the trxt classifier and elser at the same time
from elasticsearch.client import MlClient
models = MlClient.get_trained_models(client)
for model in models["trained_model_configs"]:
    print(model["model_id"])

.elser_model_1
distilbert-base-uncased-finetuned-sst-2-english
lang_ident_model_1
sentence-transformers__msmarco-minilm-l-12-v3


In [67]:
client.indices.create(
  index="hp_books_enriched",
  mappings={
    "properties": {
      "text_field": {
        "type": "text"
       },
      "sentiment": {
          "properties": {
            "model_id": {
              "type": "text",
              "fields": {
                "keyword": {
                  "type": "keyword",
                  "ignore_above": 256
                }
              }
            },
            "predicted_value": {
              "type": "text",
              "fields": {
                "keyword": {
                  "type": "keyword",
                  "ignore_above": 256
                }
              }
            },
            "prediction_probability": {
              "type": "float"
            }
          }
        },
      "ml.tokens": { 
        "type": "rank_features" 
      }
      }
    })

ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'hp_books_enriched'})

We can run both models in a single pipeline this time

In [68]:
#creating a pipeline
client.ingest.put_pipeline(
    id="sentiment_and_elser", 
    processors=[
    {
      "inference": {
        "model_id": "distilbert-base-uncased-finetuned-sst-2-english",
        "target_field" : "sentiment",
        "field_map": {
          "Sentence": "text_field"
        }
      }
    },
    {
      "inference": {
        "model_id": ".elser_model_1",
        "target_field": "ml",
        "field_map": {
          "Sentence": "text_field"
        },
        "inference_config": {
          "text_expansion": {
            "results_field": "tokens"
          }
        }
      }
    }
  ]
)
client.reindex(body={
      "source": {
          "index": "hp_books"},
      "dest": {"index": "hp_books_enriched", "pipeline" : "sentiment_and_elser"}
    }, wait_for_completion=False)

  client.reindex(body={


ObjectApiResponse({'task': 'JqYuDbWsRueybLrxY3c9Cg:76870757'})

### Tesing our new fields

In [107]:
result = client.search(
    index='hp_books_enriched', 
    size=5,
    query={
        "text_expansion": {
            "ml.tokens": {
                "model_id":".elser_model_1",
                "model_text":"christmas"
            }
        }
    },
    request_timeout=30
)

for element in result["hits"]["hits"]:
        print("{}".format(element["_source"]["text_field"]))

Most unfortunate that it should happen on Christmas Day 
Christmas morning dawned cold and white 
What a jolly holiday its going to be 
Td invite you for Christmas but 
We can do all our Christmas shopping there !said Hermione 


  result = client.search(


In [106]:
result = client.search(
    index='hp_books_enriched', 
    size=5,
    query={
        "bool": {
            "should": [{
                "text_expansion": {
                    "ml.tokens": {
                        "model_id":".elser_model_1",
                        "model_text":"celebrating the christmas holidays"
                    }
                },
            }],
            "must":[
            {
                "match" : {
                    "sentiment.predicted_value": "NEGATIVE"
                }
            }]}})
    

for element in result["hits"]["hits"]:
        print(element["_source"]["text_field"])

Most unfortunate that it should happen on Christmas Day 
I have a lot to do before the holidays 
I wouldnt fancy having to go and tell the Irish theyve got to stop celebrating 
Christmas morning dawned cold and white 
Surely you want to go home for the holidays ?No said Riddle at once 


In [89]:
query={
    "match" : {
      "sentiment.predicted_value": "POSITIVE"
    },
    "match" : {
        "text_field": "Christmas"
    }
  }

response = client.search(index = "hp_books_enriched",query=query, sort="sentiment.prediction_probability:desc")

print("The most positive sentences in the series:")
for hit in response["hits"]["hits"]:
    print(hit['_source']["text_field"] )

The most positive sentences in the series:
Merry Christmas !he said happily raising his tankard 
A powerful and delicious smell of cooking pervaded the corridors and by Christmas Eve it had grown so strong that even Scabbers poked his nose out of the shelter of Rons pocket to sniff hopefully at the air 
Merry Christmas !See ?said Ron quietly 
Christmas spirit was definitely thin on the ground in the Gryffindor common room that morning 
Most unfortunate that it should happen on Christmas Day 
Merry Christmas !said Dumbledore as Harry Ron and Hermione approached the table 
What could possibly do that to a ghost ?people asked each other what terrible power could harm someone who was already dead ?There was almost a stampede to book seats on the Hogwarts Express so that students could go home for Christmas 
Thick streamers of holly and mistletoe were strung along the corridors mysterious lights shone from inside every suit of armor and the Great Hall was filled with its usual twelve Christ