In [392]:
print(f"ELK Upgrade Index Query Verification Logic")

ELK Upgrade Index Query Verification Logic


In [393]:
from elasticsearch import Elasticsearch
import os
import json
import pandas as pd
from dotenv import load_dotenv
import warnings
warnings.filterwarnings("ignore")

In [394]:
''' pip install python-dotenv'''
load_dotenv() # will search for .env file in local folder and load variables 

True

In [395]:
def get_headers():
    ''' Elasticsearch Header '''
    return {
            'Content-type': 'application/json', 
            'Authorization' : '{}'.format(os.getenv('BASIC_AUTH')),
            # 'Connection': 'close'
    }

In [396]:
source_es_host = "http://localhost:9201"
target_es_host = "http://localhost:9202"

In [397]:
def get_es_instance(host):
    es_client = Elasticsearch(hosts="{}".format(host), headers=get_headers(), timeout=5,  verify_certs=False)
    return es_client

''' Source cluster '''
es_obj_s_client = get_es_instance(source_es_host)
''' Target cluster '''
es_obj_t_client = get_es_instance(target_es_host)

In [398]:
resp = es_obj_s_client.cluster.health()
print(json.dumps(resp, indent=2))

{
  "cluster_name": "docker-cluster",
  "status": "yellow",
  "timed_out": false,
  "number_of_nodes": 1,
  "number_of_data_nodes": 1,
  "active_primary_shards": 127,
  "active_shards": 127,
  "relocating_shards": 0,
  "initializing_shards": 0,
  "unassigned_shards": 127,
  "delayed_unassigned_shards": 0,
  "number_of_pending_tasks": 0,
  "number_of_in_flight_fetch": 0,
  "task_max_waiting_in_queue_millis": 0,
  "active_shards_percent_as_number": 50.0
}


In [399]:
df = pd.DataFrame.from_dict([resp])
df.head(10)

Unnamed: 0,cluster_name,status,timed_out,number_of_nodes,number_of_data_nodes,active_primary_shards,active_shards,relocating_shards,initializing_shards,unassigned_shards,delayed_unassigned_shards,number_of_pending_tasks,number_of_in_flight_fetch,task_max_waiting_in_queue_millis,active_shards_percent_as_number
0,docker-cluster,yellow,False,1,1,127,127,0,0,127,0,0,0,0,50.0


In [400]:
resp = es_obj_t_client.cluster.health()
print(json.dumps(resp, indent=2))

{
  "cluster_name": "docker-elasticsearch",
  "status": "yellow",
  "timed_out": false,
  "number_of_nodes": 1,
  "number_of_data_nodes": 1,
  "active_primary_shards": 81,
  "active_shards": 81,
  "relocating_shards": 0,
  "initializing_shards": 0,
  "unassigned_shards": 57,
  "delayed_unassigned_shards": 0,
  "number_of_pending_tasks": 0,
  "number_of_in_flight_fetch": 0,
  "task_max_waiting_in_queue_millis": 0,
  "active_shards_percent_as_number": 58.69565217391305
}


In [401]:
df = pd.DataFrame.from_dict([resp])
df.head(10)

Unnamed: 0,cluster_name,status,timed_out,number_of_nodes,number_of_data_nodes,active_primary_shards,active_shards,relocating_shards,initializing_shards,unassigned_shards,delayed_unassigned_shards,number_of_pending_tasks,number_of_in_flight_fetch,task_max_waiting_in_queue_millis,active_shards_percent_as_number
0,docker-elasticsearch,yellow,False,1,1,81,81,0,0,57,0,0,0,0,58.695652


### Verify all ES indices since all ES indices were created from ES v.5  to ES v.8 with transformation the custom mappings
* Verify all ES indices with query DSL

In [402]:
# ''' extact a list of indices from the source cluster'''
''' localhost '''
source_idx_lists = ['es_pipeline_upload_test_wm']

# source_idx_lists = es_obj_s_client.indices.get("*")
# source_idx_lists = ['wx_order_02072022_22_2_1']

In [403]:
# print(source_idx_lists)

In [404]:
result_dict = {}

In [405]:
def get_es_version_v5_info(es_client):
    es_version = es_client.info()
    if '5.' in es_version['version']['number']:
        return True
    return False
    

In [406]:
def es_search(es_client):
    ''' search to the ES cluster '''
    try:
        query={
            "query": {
                "match_all": {}
            }
        }
    
        ''' if the version of ES is v8 '''
        if not get_es_version_v5_info(es_client):
            query.update({"track_total_hits": True})
    
        print(f"{es_client}, query : {json.dumps(query, indent=2)}")
    
        for idx in source_idx_lists:
            idx_cnt = []
            resp = es_client.search(index=idx, body=query)
            if not get_es_version_v5_info(es_client):
                ''' es v.8 '''
                idx_cnt.append(resp['hits']['total']['value'])
            else:
                idx_cnt.append(resp['hits']['total'])
            if not idx in result_dict.keys():
                result_dict.update({idx : idx_cnt})
            else:
                print(result_dict.get(idx))
                idx_cnt_get_lists = result_dict.get(idx)
                idx_cnt_get_lists.extend(idx_cnt)
                result_dict.update({idx : idx_cnt_get_lists})
        print(json.dumps(result_dict, indent=2))
    except Exception as e:
        print(e)

In [407]:
''' Call func for source cluster '''
es_search(es_obj_s_client)
''' Call func for target cluster '''
es_search(es_obj_t_client)

<Elasticsearch([{'host': 'localhost', 'port': 9201}])>, query : {
  "query": {
    "match_all": {}
  }
}
{
  "es_pipeline_upload_test_wm": [
    0
  ]
}
<Elasticsearch([{'host': 'localhost', 'port': 9202}])>, query : {
  "query": {
    "match_all": {}
  },
  "track_total_hits": true
}
[0]
{
  "es_pipeline_upload_test_wm": [
    0,
    0
  ]
}


In [408]:
print(f"Verify all ES indices : {json.dumps(result_dict, indent=2)}")

Verify all ES indices : {
  "es_pipeline_upload_test_wm": [
    0,
    0
  ]
}
