# Elasticsearch

> **Doporučené video**
>
> https://www.youtube.com/watch?v=UWR9G-U88X0
>
> https://www.youtube.com/watch?v=C3tlMqaNSaI

https://www.elastic.co

```shell
docker network create elastic
docker run --name es01 --net elastic -p 9200:9200 -p 9300:9300 -it docker.elastic.co/elasticsearch/elasticsearch:8.2.0
docker run --name es01 -m 262144 -p 9200:9200 -p 9300:9300 -it docker.elastic.co/elasticsearch/elasticsearch:8.2.0
```


https://www.elastic.co/guide/en/elasticsearch/client/python-api/current/overview.html

sysctl -w vm.max_map_count=262144

Definiční soubor (docker-compose.yaml) pro sestavu služeb (Elasticsearch + Kibana) je níže. Komunikace s Elasticsearch je nezabezpečená (http, nikoliv https)

```yaml
version: "3.4"

services:
  es01:
    image: docker.elastic.co/elasticsearch/elasticsearch:8.2.0
    ports:
      - 9200:9200
    environment:
      - ELASTIC_PASSWORD=example
      - KIBANA_PASSWORD=example

      - node.name=es01
      - cluster.initial_master_nodes=es01
      - xpack.security.enabled=false

    volumes:
      - ./certs:/usr/share/elasticsearch/config/certs
      - ./esdata01:/usr/share/elasticsearch/data

    deploy:
      resources:
        limits:
          memory: 256M

  kibana:
    image: docker.elastic.co/kibana/kibana:8.2.0
    volumes:
      - ./certs:/usr/share/kibana/config/certs
    ports:
      - 5601:5601
    deploy:
      resources:
        limits:
          memory: 256M
    environment:
      - SERVERNAME=kibana
      - ELASTICSEARCH_HOSTS=http://es01:9200
      - ELASTICSEARCH_USERNAME=kibana_system
      - ELASTICSEARCH_PASSWORD=example
```

Pokud je nutná zabezpečená komunikace, což je doporučeno pro deployment, a není zde alternativní řešení, jako je např. proxy (nginx), je nutné vygenerovat certifikáty. Skript níže takové generování provádí v rámci docker služby do specifického adresáře, který je sdílen formou volume a může být poskytnut do služby Elasticsearch. Jako image pro službu je použit elasticsearch:8.2.0, který toto umožňuje a je použit pro spuštění služby Elasticsearch. Díky tomu nedochází ke stažení dalšího image kontejneru (šetří se prostředky na úložiště pro počítač hostující docker).

```yaml
version: "3.4"

services:
  setup:
    image: docker.elastic.co/elasticsearch/elasticsearch:8.2.0
    volumes:
      - ./certs:/usr/share/elasticsearch/config/certs
    user: "0"
    command: >
      bash -c '
        printenv
        
        echo "Creating CA";
        bin/elasticsearch-certutil ca --silent --pem -out config/certs/ca.zip;
        unzip config/certs/ca.zip -d config/certs;

        echo "Creating certs";
        echo -ne \
        "instances:\n"\
        "  - name: es01\n"\
        "    dns:\n"\
        "      - es01\n"\
        "      - localhost\n"\
        "    ip:\n"\
        "      - 127.0.0.1\n"\
        > config/certs/instances.yml;
        bin/elasticsearch-certutil cert --silent --pem -out config/certs/certs.zip --in config/certs/instances.yml --ca-cert config/certs/ca/ca.crt --ca-key config/certs/ca/ca.key;
        unzip config/certs/certs.zip -d config/certs;
 
        echo "Setting file permissions 1"
        chown -R root:root config/certs;
        echo "Setting file permissions 2"
        find . -type d -exec chmod 750 \{\} \;;
        echo "Setting file permissions 3"
        find . -type f -exec chmod 640 \{\} \;;
      '

```

## Single node setup

```yaml
version: "3.4"

services:
  es01:
    image: docker.elastic.co/elasticsearch/elasticsearch:8.2.0
    ports:
      - 9200:9200
    environment:
      - ELASTIC_PASSWORD=example
      - KIBANA_PASSWORD=example

      - node.name=es01
      - xpack.security.enabled=false
      - discovery.type=single-node

    volumes:
      - ./certs:/usr/share/elasticsearch/config/certs
      - ./esdata01:/usr/share/elasticsearch/data

    deploy:
      resources:
        limits:
          memory: 256M
    ulimits:
      memlock:
        soft: -1
        hard: -1

  kibana:
    image: docker.elastic.co/kibana/kibana:8.2.0
    volumes:
      - ./certs:/usr/share/kibana/config/certs
    ports:
      - 5601:5601
    deploy:
      resources:
        limits:
          memory: 256M
    environment:
      - SERVERNAME=kibana
      - ELASTICSEARCH_HOSTS=http://es01:9200
      - ELASTICSEARCH_USERNAME=kibana_system
      - ELASTICSEARCH_PASSWORD=example
```

## HTTP

https://www.elastic.co/guide/en/elasticsearch/reference/current/docs.html

In [12]:
esHost = 'http://192.168.1.100:9200'

In [15]:
import requests
response = requests.get(f'{esHost}/test-index')
print(response.text)

{"test-index":{"aliases":{},"mappings":{"properties":{"author":{"type":"text","fields":{"keyword":{"type":"keyword","ignore_above":256}}},"text":{"type":"text","fields":{"keyword":{"type":"keyword","ignore_above":256}}},"timestamp":{"type":"date"}}},"settings":{"index":{"routing":{"allocation":{"include":{"_tier_preference":"data_content"}}},"number_of_shards":"1","provided_name":"test-index","creation_date":"1654106349497","number_of_replicas":"1","uuid":"p1Xk7raWTOm6Jvc4NuTaFw","version":{"created":"8020099"}}}}}


In [17]:
import requests
response = requests.put(f'{esHost}/school-index')
print(response.text)

{"acknowledged":true,"shards_acknowledged":true,"index":"school-index"}


In [33]:
doc = {
    '_type': 'book',
    'text': 'Elasticsearch: cool. bonsai cool.',
    'timestamp': f'{datetime.now()}',
}
import requests
response = requests.post(f'{esHost}/school-index/_doc', json=doc)
print(response.text)

{"_index":"school-index","_id":"PZrBIIEBmQJwmuCeW7Au","_version":1,"result":"created","_shards":{"total":2,"successful":1,"failed":0},"_seq_no":0,"_primary_term":1}


In [34]:
import json

responseJson = json.loads(response.text)
print(responseJson)

{'_index': 'school-index', '_id': 'PZrBIIEBmQJwmuCeW7Au', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 0, '_primary_term': 1}


In [35]:
import requests
response = requests.get(f'{esHost}/school-index/_doc/{responseJson["_id"]}')
print(response.json())

{'_index': 'school-index', '_id': 'PZrBIIEBmQJwmuCeW7Au', '_version': 1, '_seq_no': 0, '_primary_term': 1, 'found': True, '_source': {'_type': 'book', 'text': 'Elasticsearch: cool. bonsai cool.', 'timestamp': '2022-06-01 19:32:31.642219'}}


## Instalace knihovny pro Python

In [1]:
!pip install elasticsearch[async]

Collecting elasticsearch[async]
  Downloading elasticsearch-8.2.2-py3-none-any.whl (378 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m378.7/378.7 kB[0m [31m3.3 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hCollecting elastic-transport<9,>=8
  Downloading elastic_transport-8.1.2-py3-none-any.whl (59 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m59.3/59.3 kB[0m [31m5.3 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: elastic-transport, elasticsearch
Successfully installed elastic-transport-8.1.2 elasticsearch-8.2.2


## CRUD ?

### Connection

In [None]:
from datetime import datetime
from elasticsearch import Elasticsearch
es = Elasticsearch('http://192.168.1.100:9200')

doc = {
    'author': 'kimchy',
    'text': 'Elasticsearch: cool. bonsai cool.',
    'timestamp': datetime.now(),
}

### Create / Update

In [18]:
from datetime import datetime
from elasticsearch import Elasticsearch
es = Elasticsearch('http://192.168.1.100:9200')

doc = {
    '_type': 'book',
    'author': 'kimchy',
    'text': 'Elasticsearch: cool. bonsai cool.',
    'timestamp': datetime.now(),
}

resp = es.index(index="test-index", id=1, document=doc)
print(resp['result'])



created


**Pure update**

In [38]:
doc = {
    'author': 'author_name',
    'text': 'Interensting modified content...',
    'timestamp': datetime.now(),
}
resp = es.update(index="test-index", id=1, doc=doc) # FIHA doc!
print(resp['result'])

updated


### Read

In [10]:
resp = es.get(index="test-index", id=1)
print(resp['_source'])

es.indices.refresh(index="test-index")

resp = es.search(index="test-index", query={"match_all": {}})
print("Got %d Hits:" % resp['hits']['total']['value'])
for hit in resp['hits']['hits']:
    print('id:', hit["_id"])
    print("%(timestamp)s %(author)s: %(text)s" % hit["_source"])
    

{'author': 'kimchy', 'text': 'Elasticsearch: cool. bonsai cool.', 'timestamp': '2022-06-01T18:08:13.830525'}
Got 1 Hits:
1
2022-06-01T18:08:13.830525 kimchy: Elasticsearch: cool. bonsai cool.


### Delete

In [11]:
resp = es.delete(index="test-index", id=1)
print(resp)


{'_index': 'test-index', '_id': '1', '_version': 3, 'result': 'deleted', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 2, '_primary_term': 1}


## Full

https://www.ludekvesely.cz/serial-elasticsearch-3-prvni-kroky-ukladani-a-zobrazeni-dat/

### Smazání indexu

In [50]:
import requests
response = requests.delete(f'{esHost}/products')
print(response.text)

{"acknowledged":true}


### Vytvoření indexu s explicitním mapováním

In [52]:
doc = {
    "settings" : {
        "index" : {
            "number_of_shards" : 1, 
            "number_of_replicas" : 0 
        }
    },
    "mappings": {
        "properties": {
          "age":    { "type": "integer" },  
          "email":  { "type": "keyword"  }, 
          "name":   { "type": "text"  }     
        }
    }
}
import requests
response = requests.put(f'{esHost}/products', json=doc)
print(response.text)

{"acknowledged":true,"shards_acknowledged":true,"index":"products"}


### Vlastnosti indexu

In [53]:
import requests
response = requests.get(f'{esHost}/products')
print(response.text)

{"products":{"aliases":{},"mappings":{"properties":{"age":{"type":"integer"},"email":{"type":"keyword"},"name":{"type":"text"}}},"settings":{"index":{"routing":{"allocation":{"include":{"_tier_preference":"data_content"}}},"number_of_shards":"1","provided_name":"products","creation_date":"1654113299808","number_of_replicas":"0","uuid":"KG-OghRWQ5mPSXZoVHafYw","version":{"created":"8020099"}}}}}


In [60]:
import requests
response = requests.get(f'{esHost}/products/_mapping')
print(response.text)

{"products":{"mappings":{"properties":{"age":{"type":"integer"},"email":{"type":"keyword"},"name":{"type":"text"}}}}}


### Vložení dokumentu do indexu

Explicitní index?

In [55]:
doc = {
    "age": 26,
    "email": "someone@somewhere.world",  
    "name": "Someone John"
}

import requests
response = requests.post(f'{esHost}/products/_doc', json=doc)
print(response.text)

{"_index":"products","_id":"PprZIIEBmQJwmuCeJLDf","_version":1,"result":"created","_shards":{"total":1,"successful":1,"failed":0},"_seq_no":0,"_primary_term":1}


In [56]:
doc = {
    "age": "26",
    "email": "some@somewhere.world",  
    "name": 4785
}

import requests
response = requests.post(f'{esHost}/products/_doc', json=doc)
print(response.text)

{"_index":"products","_id":"P5rZIIEBmQJwmuCexrDy","_version":1,"result":"created","_shards":{"total":1,"successful":1,"failed":0},"_seq_no":1,"_primary_term":1}


In [59]:
import requests
id = response.json()['_id']
response = requests.get(f'{esHost}/products/_doc/{id}')
print(response.text)

{"_index":"products","_id":"P5rZIIEBmQJwmuCexrDy","_version":2,"_seq_no":2,"_primary_term":1,"found":true,"_source":{"age": "26", "email": "some@somewhere.world", "name": 4785}}


## Analyze

In [61]:
doc = {
  "analyzer": "standard",
  "text": "The 2 QUICK Brown-Foxes jumped over the lazy dog's bone."
}

import requests
response = requests.post(f'{esHost}/_analyze', json=doc)
print(response.text)

{"tokens":[{"token":"the","start_offset":0,"end_offset":3,"type":"<ALPHANUM>","position":0},{"token":"2","start_offset":4,"end_offset":5,"type":"<NUM>","position":1},{"token":"quick","start_offset":6,"end_offset":11,"type":"<ALPHANUM>","position":2},{"token":"brown","start_offset":12,"end_offset":17,"type":"<ALPHANUM>","position":3},{"token":"foxes","start_offset":18,"end_offset":23,"type":"<ALPHANUM>","position":4},{"token":"jumped","start_offset":24,"end_offset":30,"type":"<ALPHANUM>","position":5},{"token":"over","start_offset":31,"end_offset":35,"type":"<ALPHANUM>","position":6},{"token":"the","start_offset":36,"end_offset":39,"type":"<ALPHANUM>","position":7},{"token":"lazy","start_offset":40,"end_offset":44,"type":"<ALPHANUM>","position":8},{"token":"dog's","start_offset":45,"end_offset":50,"type":"<ALPHANUM>","position":9},{"token":"bone","start_offset":51,"end_offset":55,"type":"<ALPHANUM>","position":10}]}


In [62]:
import requests
response = requests.get(f'{esHost}/products')
responseJson = response.json()
print(responseJson)

{'products': {'aliases': {}, 'mappings': {'properties': {'age': {'type': 'integer'}, 'email': {'type': 'keyword'}, 'name': {'type': 'text'}}}, 'settings': {'index': {'routing': {'allocation': {'include': {'_tier_preference': 'data_content'}}}, 'number_of_shards': '1', 'provided_name': 'products', 'creation_date': '1654113299808', 'number_of_replicas': '0', 'uuid': 'KG-OghRWQ5mPSXZoVHafYw', 'version': {'created': '8020099'}}}}}


In [72]:
import requests
response = requests.delete(f'{esHost}/products')
print(response.text)

doc = {
  "settings": {
    "analysis": {
      "analyzer": {
        "my_english_analyzer": {
          "type": "standard",
          "max_token_length": 5,
          "stopwords": "_english_"
        }
      }
    }
  }
}

import requests
response = requests.put(f'{esHost}/products', json=doc)
responseJson = response.json()
print(responseJson)

{"acknowledged":true}
{'acknowledged': True, 'shards_acknowledged': True, 'index': 'products'}


In [73]:
doc = {
  "analyzer": "my_english_analyzer",
  "text": "The 2 QUICK Brown-Foxes jumped over the lazy dog's bone."
}
import requests
response = requests.post(f'{esHost}/products/_analyze', json=doc)
responseJson = response.json()
print(responseJson)

{'tokens': [{'token': '2', 'start_offset': 4, 'end_offset': 5, 'type': '<NUM>', 'position': 1}, {'token': 'quick', 'start_offset': 6, 'end_offset': 11, 'type': '<ALPHANUM>', 'position': 2}, {'token': 'brown', 'start_offset': 12, 'end_offset': 17, 'type': '<ALPHANUM>', 'position': 3}, {'token': 'foxes', 'start_offset': 18, 'end_offset': 23, 'type': '<ALPHANUM>', 'position': 4}, {'token': 'jumpe', 'start_offset': 24, 'end_offset': 29, 'type': '<ALPHANUM>', 'position': 5}, {'token': 'd', 'start_offset': 29, 'end_offset': 30, 'type': '<ALPHANUM>', 'position': 6}, {'token': 'over', 'start_offset': 31, 'end_offset': 35, 'type': '<ALPHANUM>', 'position': 7}, {'token': 'lazy', 'start_offset': 40, 'end_offset': 44, 'type': '<ALPHANUM>', 'position': 9}, {'token': "dog's", 'start_offset': 45, 'end_offset': 50, 'type': '<ALPHANUM>', 'position': 10}, {'token': 'bone', 'start_offset': 51, 'end_offset': 55, 'type': '<ALPHANUM>', 'position': 11}]}


### Pipeline

https://www.elastic.co/guide/en/elasticsearch/reference/current/ingest.html

# Tests

version: "3.4"

services:
  es01:
    image: docker.elastic.co/elasticsearch/elasticsearch:8.2.0
    ports:
      - 9200:9200
    environment:
      - ELASTIC_PASSWORD=example
      - KIBANA_PASSWORD=example

      - node.name=es01
      - cluster.initial_master_nodes=es01
      - xpack.security.enabled=false
      - discovery.type=single-node

    volumes:
      - ./certs:/usr/share/elasticsearch/config/certs
      - ./esdata01:/usr/share/elasticsearch/data

    deploy:
      resources:
        limits:
          memory: 256M
    ulimits:
      memlock:
        soft: -1
        hard: -1

  kibana:
    image: docker.elastic.co/kibana/kibana:8.2.0
    volumes:
      - ./certs:/usr/share/kibana/config/certs
    ports:
      - 5601:5601
    deploy:
      resources:
        limits:
          memory: 256M
    environment:
      - SERVERNAME=kibana
      - ELASTICSEARCH_HOSTS=http://es01:9200
      - ELASTICSEARCH_USERNAME=kibana_system
      - ELASTICSEARCH_PASSWORD=example