In [162]:
print(f"ELK Upgrade Index Query Verification Logic")

ELK Upgrade Index Query Verification Logic


In [163]:
from elasticsearch import Elasticsearch
import os
import json
import pandas as pd
from dotenv import load_dotenv
import warnings
warnings.filterwarnings("ignore")

In [164]:
''' pip install python-dotenv'''
load_dotenv() # will search for .env file in local folder and load variables 

True

In [165]:
def get_headers():
    ''' Elasticsearch Header '''
    return {
            'Content-type': 'application/json', 
            'Authorization' : '{}'.format(os.getenv('BASIC_AUTH')),
            # 'Connection': 'close'
    }

In [166]:
source_es_host = "http://localhost:9201"
target_es_host = "http://localhost:9202"

In [167]:
def get_es_instance(host):
    es_client = Elasticsearch(hosts="{}".format(host), headers=get_headers(), timeout=5,  verify_certs=False)
    return es_client

''' Source cluster '''
es_obj_s_client = get_es_instance(source_es_host)
''' Target cluster '''
es_obj_t_client = get_es_instance(target_es_host)

In [168]:
resp = es_obj_s_client.cluster.health()
print(json.dumps(resp, indent=2))

{
  "cluster_name": "docker-cluster",
  "status": "yellow",
  "timed_out": false,
  "number_of_nodes": 1,
  "number_of_data_nodes": 1,
  "active_primary_shards": 129,
  "active_shards": 129,
  "relocating_shards": 0,
  "initializing_shards": 0,
  "unassigned_shards": 129,
  "delayed_unassigned_shards": 0,
  "number_of_pending_tasks": 0,
  "number_of_in_flight_fetch": 0,
  "task_max_waiting_in_queue_millis": 0,
  "active_shards_percent_as_number": 50.0
}


In [169]:
df = pd.DataFrame.from_dict([resp])
df.head(10)

Unnamed: 0,cluster_name,status,timed_out,number_of_nodes,number_of_data_nodes,active_primary_shards,active_shards,relocating_shards,initializing_shards,unassigned_shards,delayed_unassigned_shards,number_of_pending_tasks,number_of_in_flight_fetch,task_max_waiting_in_queue_millis,active_shards_percent_as_number
0,docker-cluster,yellow,False,1,1,129,129,0,0,129,0,0,0,0,50.0


In [170]:
resp = es_obj_t_client.cluster.health()
print(json.dumps(resp, indent=2))

{
  "cluster_name": "docker-elasticsearch",
  "status": "yellow",
  "timed_out": false,
  "number_of_nodes": 1,
  "number_of_data_nodes": 1,
  "active_primary_shards": 84,
  "active_shards": 84,
  "relocating_shards": 0,
  "initializing_shards": 0,
  "unassigned_shards": 57,
  "delayed_unassigned_shards": 0,
  "number_of_pending_tasks": 0,
  "number_of_in_flight_fetch": 0,
  "task_max_waiting_in_queue_millis": 0,
  "active_shards_percent_as_number": 59.57446808510638
}


In [171]:
df = pd.DataFrame.from_dict([resp])
df.head(10)

Unnamed: 0,cluster_name,status,timed_out,number_of_nodes,number_of_data_nodes,active_primary_shards,active_shards,relocating_shards,initializing_shards,unassigned_shards,delayed_unassigned_shards,number_of_pending_tasks,number_of_in_flight_fetch,task_max_waiting_in_queue_millis,active_shards_percent_as_number
0,docker-elasticsearch,yellow,False,1,1,84,84,0,0,57,0,0,0,0,59.574468


### Verify all ES indices since all ES indices were created from ES v.5  to ES v.8 with transformation the custom mappings
* Verify all ES indices with query DSL

In [172]:
# ''' extact a list of indices from the source cluster'''
''' localhost '''
source_idx_lists = ['es_pipeline_upload_test_wm']

# source_idx_lists = es_obj_s_client.indices.get("*")
# source_idx_lists = ['wx_order_02072022_22_2_1']

In [173]:
# print(source_idx_lists)

In [174]:
result_dict = {}
df_result_dict = {}

In [175]:
def get_es_version_v5_info(es_client):
    es_version = es_client.info()
    if '5.' in es_version['version']['number']:
        return True
    return False
    

In [176]:
def es_search(es_client):
    ''' search to the ES cluster '''
    try:
        # same query is excuting to source es cluster and target es cluster to verify the custom mappings
        query={
            "query": {
                "match_all": {}
            }
        }
    
        ''' if the version of ES is v8 '''
        ''' add this option to get the exact number of search results '''
        if not get_es_version_v5_info(es_client):
            query.update({"track_total_hits": True})
    
        print(f"{es_client}, query : {json.dumps(query, indent=2)}")
    
        for idx in source_idx_lists:
            idx_cnt = []
            resp = es_client.search(index=idx, body=query)
            if not get_es_version_v5_info(es_client):
                ''' es v.8 '''
                idx_cnt.append(resp['hits']['total']['value'])
            else:
                idx_cnt.append(resp['hits']['total'])
            if not idx in result_dict.keys():
                result_dict.update({idx : idx_cnt})
            else:
                print(result_dict.get(idx))
                idx_cnt_get_lists = result_dict.get(idx)
                idx_cnt_get_lists.extend(idx_cnt)
                result_dict.update({idx : idx_cnt_get_lists})
        print(json.dumps(result_dict, indent=2))
    except Exception as e:
        print(e)

In [177]:
''' Call func for source cluster '''
es_search(es_obj_s_client)
''' Call func for target cluster '''
es_search(es_obj_t_client)

<Elasticsearch([{'host': 'localhost', 'port': 9201}])>, query : {
  "query": {
    "match_all": {}
  }
}
{
  "es_pipeline_upload_test_wm": [
    0
  ]
}
<Elasticsearch([{'host': 'localhost', 'port': 9202}])>, query : {
  "query": {
    "match_all": {}
  },
  "track_total_hits": true
}
[0]
{
  "es_pipeline_upload_test_wm": [
    0,
    0
  ]
}


In [178]:
print(f"Verify all ES indices : {json.dumps(result_dict, indent=2)}")

Verify all ES indices : {
  "es_pipeline_upload_test_wm": [
    0,
    0
  ]
}


In [179]:
''' make df from result_dict '''
indices_name, source_cnt, target_cnt, is_same = [], [], [], []
for k, v in result_dict.items():
    indices_name.append(k)
    if int(v[0]) == int(v[1]):
        is_same.append(True)
    else:
        is_same.append(False)
    source_cnt.append(v[0])
    target_cnt.append(v[1])
        
result_dict = {
    'Indices' : indices_name,
    'Sourc_cnt' : source_cnt,
    'Target_cnt' : target_cnt,
    'Same?' : is_same,
}

In [180]:
''' sample '''
''' https://docs.kanaries.net/ko/topics/Pandas/pandas-add-column '''
# data = {
#     'Name': ['Alice', 'Bob', 'Charlie', 'David'],
#     'Age': [25, 30, 35, 40]
# }
 
df = pd.DataFrame(result_dict)
display(df)

Unnamed: 0,Indices,Sourc_cnt,Target_cnt,Same?
0,es_pipeline_upload_test_wm,0,0,True
