## Named Entity Recognition

Want to learn in way more detail what this is all about
[https://towardsdatascience.com/named-entity-recognition-with-bert-in-pytorch-a454405e0b6a](https://towardsdatascience.com/named-entity-recognition-with-bert-in-pytorch-a454405e0b6a)

In [None]:
! pip install -q python_dotenv
! pip install -q torch==2.2.1 torchvision==0.17.1 torchaudio==2.2.1
! pip install -q seqeval==1.2.2
! pip install -q transformers==4.39.1
! pip install -q eland==8.12.1 sentence-transformers==2.6.1 requests==2.31.0


## RESTART YOUR ENVIRONMENT.  

This may be as simple as closing and relaunching visual studio

In [None]:
model_id= "distilbert-base-cased-finetuned-conll03-english"
es_model_id = f"elastic__{model_id}"

In [None]:
import os
from dotenv import load_dotenv
load_dotenv(".env", override=True)



domain=os.environ["ELASTIC_DOMAIN"]
port=os.environ["ELASTIC_PORT"]
protocol=os.environ["ELASTIC_PROTOCOL"]
user=os.environ["ELASTIC_USER"]
password=os.environ["ELASTIC_PASSWORD"]

es_url = f"{protocol}://{user}:{password}@{domain}:{port}"
print(es_url)

In [None]:
import elasticsearch
from pathlib import Path
from eland.ml.pytorch import PyTorchModel
from eland.ml.pytorch.transformers import TransformerModel
import requests

# Load a Hugging Face transformers model directly from the model hub
tm = TransformerModel(model_id=f"elastic/{model_id}", task_type="ner")


# Export the model in a TorchScrpt representation which Elasticsearch uses
tmp_path = "models"
Path(tmp_path).mkdir(parents=True, exist_ok=True)
model_path, config, vocab_path = tm.save(tmp_path)

e = None

# Import model into Elasticsearch
es = elasticsearch.Elasticsearch(es_url, timeout=300)  # 5 minute timeout
ptm = PyTorchModel(es, tm.elasticsearch_model_id())
try:
  ptm.import_model(model_path=model_path, config_path=None, vocab_path=vocab_path, config=config)
except Exception as error:
  # Handle the BadRequestError exception here
  if error.meta.status == 400 and error.message == "resource_already_exists_exception":
    print("Done -- the model was already loaded")
  else:
    print("An error occurred:", str(error))


def deploy_model(model_id,es_url):
  url = f"{es_url}/_ml/trained_models/{model_id}/deployment/_start"
  response = requests.post(url)
  if response.status_code == 200:
    print("Model Deployed")
  else:
    print("Error deploying model: ", response.text)

deploy_model(es_model_id,es_url)


Now you need to deploy the model

* If you are running in Elastic Cloud, make sure you have at least 1 ML node configured
* In Kibana go to Stack Management > Machine Learning 
* Synchronize the saved objects

![deploy](img/sync.jpg)

* Refresh the page and go to the Trained Model Tab.
* Start the model, default settings will be fine


In [None]:
lyrics = """
My name is Jonas
I'm carryin' the wheel
Thanks for all you've shown us
But this is how we feel """

import requests
import json

def use_hosted_ml_post(json_payload, url):
  json_string = json.dumps(json_payload)
  headers = {'Content-Type': 'application/json'}
  response = requests.post(es_url+url, data=json_string, headers=headers)
  if response.status_code == 200:
    print("JSON posted successfully to your URL")
    result = response.json()
    print(json.dumps(result, indent=4))
  else:
    print("Error posting JSON:", response.text)

payload = {
  "docs": [
    {
      "text_field": lyrics
    }
  ]
}
url = f"/_ml/trained_models/{es_model_id}/deployment/_infer"
use_hosted_ml_post(payload,url)

In [None]:
inference = {
       "inference": {
         "model_id": es_model_id,
         "field_map": {
           "message": "text_field"
         }
       }
    }



es.ingest.put_pipeline(id='week5_ner', processors=[inference])

In [None]:
docs = [
  {
      "_source": {
          "message": lyrics
      }
  }
]

import json
# pretty printing JSON objects
def json_pretty(input_object):
  print(json.dumps(input_object, indent=4))

json_pretty(es.ingest.simulate(id='week5_ner', docs=docs).body)

In [None]:
script_extract = {
       "script": {
         "lang": "painless",
         "source": """
String msg = ctx['message'];
String predicted = ctx['ml']['inference']['predicted_value'];
ctx['message_ner'] = ctx['ml']['inference']['predicted_value'];
"""
       }
    }


es.ingest.put_pipeline(id='week5_ner', processors = [inference, script_extract])

In [None]:
json_pretty(es.ingest.simulate(id='week5_ner', docs=docs).body)

In [None]:
remove = {
       "remove": {
         "field": [
           "ml"
         ],
         "ignore_missing": True,
         "ignore_failure": True
       }
    }


es.ingest.put_pipeline(id='week5_ner', processors= [inference, script_extract,remove])
json_pretty(es.ingest.simulate(id='week5_ner', docs=docs).body)
