# ElasticSearch 

Image: https://hub.docker.com/r/bitnami/elasticsearch#!&_escaped_fragment_=

Ports: 9200, 9201, 9300
Mount: -v /path/to/elasticsearch-data-persistence:/bitnami/elasticsearch/data

curl -XPUT -H "Content-Type: application/json" http://localhost:9200/_cluster/settings -d '{ "transient": { "cluster.routing.allocation.disk.threshold_enabled": false } }'

curl -XPUT -H "Content-Type: application/json" http://localhost:9200/_all/_settings -d '{"index.blocks.read_only_allow_delete": null}


http://localhost:9200/?pretty

Array mapping: https://stackoverflow.com/questions/67581594/elasticsearch-mapping-for-array

Habr guide: https://habr.com/ru/articles/280488/'

# Kibana

Image: https://github.com/bitnami/containers/tree/main/bitnami/kibana


docker network create kibana_network

docker run -d -p 9200:9200 --name elasticsearch --net=kibana_network bitnami/elasticsearch

docker run -d -p 5601:5601 --name kibana --net=kibana_network  -e KIBANA_ELASTICSEARCH_URL=elasticsearch   bitnami/kiba


docker run -v /path/to/kibana-persistence:/bitnami/kibana bitnami/kibana:latest
na

# Conatiners

## elasticsearch

```bash
docker run -d -p 9200:9200 -v "C:\Users\sergey.astakhov\Desktop\volumes\elastic_search_volume":/bitnami/elasticsearch/data --name elasticsearch --net=kibana_network bitnami/elasticsearch
```

## kibana

```bash
docker run -d -p 5601:5601 -v C:\Users\sergey.astakhov\Desktop\volumes\kibana_volume:/bitnami/kibana --name kibana --net=kibana_network -e KIBANA_ELASTICSEARCH_URL=elasticsearch bitnami/kibana
```

All orders: http://localhost:9200/order/_search?pretty

All masters: http://localhost:9200/master/_search?pretty

Dashboard: http://localhost:5601/app/dashboards#/view/bdcf6af3-3542-4936-94fd-b0f04e3dcc86?_g=(filters:!(),refreshInterval:(pause:!t,value:60000),time:(from:'2023-12-15T00:00:00.000Z',to:'2024-02-21T00:00:00.000Z'))

In [156]:
# %%html
# <iframe src="http://localhost:5601/app/dashboards#/view/bdcf6af3-3542-4936-94fd-b0f04e3dcc86?embed=true&_g=(refreshInterval:(pause:!t,value:60000),time:(from:'2023-12-15T00:00:00.000Z',to:'2024-02-21T00:00:00.000Z'))&_a=()" height="600" width="800"></iframe>

In [4]:
# import requests

In [5]:
# r = requests.get('http://127.0.0.1:5601/api/kibana/dashboards/export?dashboard=bdcf6af3-3542-4936-94fd-b0f04e3dcc86')

In [6]:
# import json
# with open('../dashboard.json', 'w') as fp:
#     json.dump(r.json(), fp)

In [7]:
# r = requests.put('http://localhost:9200/_cluster/settings', json={ "transient": { "cluster.routing.allocation.disk.threshold_enabled": False } })

In [8]:
# r.json()

In [6]:
# r = requests.put('http://localhost:9200/_all/_settings', json={"index.blocks.read_only_allow_delete": None})

In [11]:
# r.json()

In [3]:
import warnings
warnings.filterwarnings('ignore')
warnings.filterwarnings('error', category=DeprecationWarning)

In [4]:
import json
from elasticsearch import Elasticsearch

In [5]:
client = Elasticsearch([{"host": "127.0.0.1", "scheme": "http", "port": 9200}])

client.ping()

True

# Index 1 - orders

In [69]:
indexName = "order"
if client.indices.exists(index=indexName):
    client.indices.delete(index=indexName)

In [70]:
analyzer_settings = {
  "settings": {
    "analysis": {
      "filter": {
        "ru_stop": {
          "type": "stop",
          "stopwords": "_russian_"
        },
        "snow_ru_stemmer": {
          "type": "snowball",
          "language": "russian"
        }
      },
      "analyzer": {
        "custom_analyzer": {
          "type": "custom",
          "tokenizer": "standard",
          "filter": [
            "lowercase",
            "ru_stop",
            "snow_ru_stemmer"
          ]
        }
      }
    }
  }
}

In [71]:
mappings_order = {
"mappings" : {
  "properties" : {
    "order_customer_desc" : {
      "type" : "text",
      "analyzer": "custom_analyzer"
    },
    "order_customer_id" : {
      "type" : "long"
    },
    "order_date" : {
      "type" : "date"
    },
    "order_details_desc" : {
      "type" : "text",
      "analyzer": "custom_analyzer"
    },
    "order_due_date" : {
      "type" : "date"
    },
    "order_fact_completion_date" : {
      "type" : "date"
    },
    "order_id" : {
      "type" : "long"
    },
    "order_master_id" : {
      "type" : "long"
    },
    "order_parts" : {
      "type" : "text",
      "fielddata" : True,
      "fields" : {
        "keyword" : {
          "type" : "keyword"
#           ,"analyzer": "custom_analyzer"
        }
      }
      ,"analyzer": "custom_analyzer"
    },
    "order_price" : {
      "type" : "float"
    },
    "repair_types" : {
      "type" : "text",
      "fields" : {
        "keyword" : {
          "type" : "keyword"
#           ,"analyzer": "custom_analyzer"
        }
      }
      ,"analyzer": "custom_analyzer"
    }
  }
}
}


client.indices.create(index=indexName, 
                            # doc_type="diseases", 
                           # include_type_name="true", 
                           body={**analyzer_settings
                                 , **mappings_order
                                }
                          )

ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'order'})

In [72]:
with open('../orders.json', 'r') as f:
    dataStore = json.load(f)

for data in dataStore:
    try:
        client.index(
            index=data["index"],
            # doc_type=data["doc_type"],
            id=data["id"],
            body=data["body"]
        )
    except Exception as e:
        print(e, end="")
print("index 'order' data inserted")

index 'order' data inserted


# Query 1 - orders

In [73]:
indexName = "order"

searchBody = {
#   "size": searchSize,
  "_source": True,
  "aggs": {
    "over_months": {
      "date_histogram": {
        "field": "order_date",
        "calendar_interval": "month",
        "format": "yyyy-MM-dd" # control the output format
      },
        "aggs":{
            "over_parts": {
              "terms": {
                "field": "order_parts.keyword"
              }
            }
        }
    }
  }
}

result = client.search(index=indexName, body=searchBody)

In [74]:
result['aggregations']['over_months']['buckets']

[{'key_as_string': '2023-12-01',
  'key': 1701388800000,
  'doc_count': 13,
  'over_parts': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 'usb-разъем', 'doc_count': 5},
    {'key': 'дисплей', 'doc_count': 4},
    {'key': 'корпус', 'doc_count': 4},
    {'key': 'аккумулятор', 'doc_count': 3}]}},
 {'key_as_string': '2024-01-01',
  'key': 1704067200000,
  'doc_count': 48,
  'over_parts': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 'usb-разъем', 'doc_count': 23},
    {'key': 'дисплей', 'doc_count': 22},
    {'key': 'корпус', 'doc_count': 18},
    {'key': 'аккумулятор', 'doc_count': 16}]}},
 {'key_as_string': '2024-02-01',
  'key': 1706745600000,
  'doc_count': 37,
  'over_parts': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 'дисплей', 'doc_count': 19},
    {'key': 'usb-разъем', 'doc_count': 14},
    {'key': 'аккумулятор', 'doc_count': 14},
    {'key': 'корпус', 'doc_

In [76]:
indexName = "order"

searchBody = {
#   "size": searchSize,
  "_source": True,
  "aggs": {
    "over_months": {
      "date_histogram": {
        "field": "order_date",
        "calendar_interval": "month",
        "format": "yyyy-MM-dd" # control the output format
      },
        "aggs":{
            "over_parts": {
              "terms": {
                "field": "order_parts"
              }
            }
        }
    }
  }
}

result = client.search(index=indexName, body=searchBody)

In [77]:
result['aggregations']['over_months']['buckets']

[{'key_as_string': '2023-12-01',
  'key': 1701388800000,
  'doc_count': 13,
  'over_parts': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 'usb', 'doc_count': 5},
    {'key': 'разъ', 'doc_count': 5},
    {'key': 'диспл', 'doc_count': 4},
    {'key': 'корпус', 'doc_count': 4},
    {'key': 'аккумулятор', 'doc_count': 3}]}},
 {'key_as_string': '2024-01-01',
  'key': 1704067200000,
  'doc_count': 48,
  'over_parts': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 'usb', 'doc_count': 23},
    {'key': 'разъ', 'doc_count': 23},
    {'key': 'диспл', 'doc_count': 22},
    {'key': 'корпус', 'doc_count': 18},
    {'key': 'аккумулятор', 'doc_count': 16}]}},
 {'key_as_string': '2024-02-01',
  'key': 1706745600000,
  'doc_count': 37,
  'over_parts': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 'диспл', 'doc_count': 19},
    {'key': 'usb', 'doc_count': 14},
    {'key': 'аккумулятор

# Index 2 - masters

In [56]:
indexName = "master"
if client.indices.exists(index=indexName):
    client.indices.delete(index=indexName)

In [57]:
mappings_master = {
"mappings" : {
      "properties" : {
        "master_desc" : {
          "type" : "text",
          "analyzer": "custom_analyzer"
        },
        "master_feedbacks" : {
          "type" : "text",
          "fields" : {
            "keyword" : {
              "type" : "keyword"
            }
          }
          ,"analyzer": "custom_analyzer"
        },
        "master_id" : {
          "type" : "long"
        }
      }
}
}

client.indices.create(index=indexName, 
                            # doc_type="diseases", 
                           # include_type_name="true", 
                           body={**analyzer_settings
                                 , **mappings_master
                                }
                          )

ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'master'})

In [58]:
with open('../masters.json', 'r') as f:
    dataStore = json.load(f)

for data in dataStore:
    try:
        client.index(
            index=data["index"],
            # doc_type=data["doc_type"],
            id=data["id"],
            body=data["body"]
        )
    except Exception as e:
        print(e, end="")
print("index 'master' data inserted")

index 'master' data inserted


# Query 2 - masters

In [63]:
indexName = "master"

searchBody = {
  "_source": True, #["master_desc"], # True - all data
  "query": {
    "match": {
      "master_desc": "стажа"
    }
  }
#   ,"size": searchSize
}

result = client.search(index=indexName, body=searchBody)

In [64]:
result['hits']

{'total': {'value': 8, 'relation': 'eq'},
 'max_score': 0.8435577,
 'hits': [{'_index': 'master',
   '_type': '_doc',
   '_id': '22653',
   '_score': 0.8435577,
   '_source': {'master_id': 22653,
    'master_desc': 'Акулина Рудольфовна Никитина, Стаж Работы: 15 Л./Г..',
    'master_feedbacks': ['ворчливый, аккуратный.',
     'ворчливый, медлительный.']}},
  {'_index': 'master',
   '_type': '_doc',
   '_id': '300714',
   '_score': 0.8435577,
   '_source': {'master_id': 300714,
    'master_desc': 'Вероника Петровна Силина, Стаж Работы: 12 Л./Г..',
    'master_feedbacks': ['ворчливый, медлительный.',
     'ворчливый, аккуратный.']}},
  {'_index': 'master',
   '_type': '_doc',
   '_id': '405064',
   '_score': 0.8435577,
   '_source': {'master_id': 405064,
    'master_desc': 'Хохлов Олег Харлампьевич, Стаж Работы: 3 Л./Г..',
    'master_feedbacks': ['ворчливый, медлительный.',
     'ворчливый, аккуратный.',
     'ворчливый, аккуратный.']}},
  {'_index': 'master',
   '_type': '_doc',
   '_id

In [None]:
# analyzer snowball filter

In [None]:
# analyzer for 2 fields

In [None]:
# kibana console

In [None]:
GET order/_search
{
  "aggs": {
    "over_months": {
      "date_histogram": {
        "field": "order_date",
        "calendar_interval": "month",
        "format": "yyyy-MM-dd" 
      },
        "aggs":{
            "over_parts": {
              "terms": {
                "field": "order_parts.keyword"
              }
            }
        }
    }
  }
}

In [None]:
GET master/_search
{
  "query": {
    "match": {
      "master_desc": "стажа"
    }
  }
}