In [28]:
print(f"ES Mapping Compare via REST API")

ES Mapping Compare via REST API


In [29]:
#!pip install gradio

In [30]:
import time
import gradio as gr

In [None]:
from elasticsearch import Elasticsearch
import os
import json
import pandas as pd
import jsondiff
import logging
from dotenv import load_dotenv
import socket
import requests
import warnings
warnings.filterwarnings("ignore")

In [32]:
''' pip install python-dotenv'''
load_dotenv() # will search for .env file in local folder and load variables 

True

In [33]:
js_func = """
function refresh() {
    const url = new URL(window.location);

    if (url.searchParams.get('__theme') !== 'dark') {
        url.searchParams.set('__theme', 'dark');
        window.location.href = url.href;
    }
}
"""

In [34]:
def get_headers():
    ''' Elasticsearch Header '''
    return {
            'Content-type': 'application/json', 
            'Authorization' : '{}'.format(os.getenv('BASIC_AUTH')),
            # 'Connection': 'close'
    }

In [35]:
smart_suit_envs = {
    "QA-01" : "http://localhost:9201",
    "QA-22" : "http://localhost:9202"
}

In [36]:
def port_verify(port):
    sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    result = sock.connect_ex(('127.0.0.1',port))
    if result == 0:
       print("Port is open")
    else:
       print("Port is not open")
    sock.close()

In [37]:
''' https://www.elastic.co/guide/en/elasticsearch/client/python-api/current/migration.html '''
''' In 8.0.0 responses are no longer the raw deserialized response body and instead an object with two properties, meta and body '''
def es_get_health(source_es, target_es):
    if source_es in smart_suit_envs.keys() and source_es in smart_suit_envs.keys():
        es_client = Elasticsearch(hosts="{}".format(smart_suit_envs.get(source_es)), headers=get_headers(), timeout=5,  verify_certs=False)
        # return json.dumps(es_client.cluster.health().body, indent=2), es_client.cluster.health()['status']
        return json.dumps(es_client.cluster.health(), indent=2), es_client.cluster.health()['status']
    else:
        return {},{}

In [38]:
def get_es_instance(host):
    es_client = Elasticsearch(hosts="{}".format(host), headers=get_headers(), timeout=5,  verify_certs=False)
    return es_client


In [39]:
response = {}
all_same_mapping = []
source_idx_cnt, target_idx_cnt = 0, 0

In [40]:
def compare_mapping(index_name, diff):
    ''' compare diff using jsondiff library '''
    if not diff:
        all_same_mapping.append(True)
        response.update({index_name : {'diff' : 'Same mapping'}})
    else:
        all_same_mapping.append(False)
        response.update({index_name : {'diff' : 'Different mapping', 'result' : diff}})
    return response, all_same_mapping

In [41]:
def es_version_verify(es_client):
    # print(es_client.info()['version']['number'], type(es_client.info()['version']['number']))
    ''' if es_client v.5.X '''
    if "5." in es_client.info()['version']['number']:
        return True
    else:
        return False

In [42]:
def lookup_type_in_indices(key):
    ''' lookup type we want to compare from the source es cluster '''
    if "OM_" in key or "WX_" in key or "ES_" in key or "ARCHIVE_" in key:
        return True
    return False

In [43]:
def get_mapping_from_properties(mapping, es_v5=False):
    if es_v5:
        return {"properties" : v2.get("properties") for k, v in mapping.items() for k1, v1 in v.items() for k2, v2 in v1.items() if lookup_type_in_indices(k2)} 
    else:
        return {'properties': v2 for k, v in mapping.items() for k1, v1 in v.items() for k2, v2 in v1.items() }
            

In [44]:
def lookup(es_obj_s_client, es_obj_t_client, source_idx_lists):
    global source_idx_cnt, target_idx_cnt
    try:
        for index_name in source_idx_lists:
            ''' real index '''
            source_idx_cnt +=1
            if index_name.startswith("wx_") or index_name.startswith("om_") or index_name.startswith("es_") or index_name.startswith("archive_es_"):
                # print('OKD : {}'.format(index_name))
                source_mapping = es_obj_s_client.indices.get_mapping(index=index_name)
                target_mapping = es_obj_t_client.indices.get_mapping(index=index_name)

                ''' Determin what version of es client is running '''
                # print(es_version_verify(es_obj_s_client), es_version_verify(es_obj_t_client))
                ''' get es_version as argument '''
                # print(f"source es version : {es_version_verify(es_obj_s_client)}")
                source_mappings = get_mapping_from_properties(source_mapping, es_v5=es_version_verify(es_obj_s_client))
                ''' get es_version as argument '''
                # print(f"target es version : {es_version_verify(es_obj_t_client)}")
                target_mappings = get_mapping_from_properties(target_mapping, es_v5=es_version_verify(es_obj_t_client))
        
                # Compare JSON objects using jsondiff
                diff = jsondiff.diff(source_mappings, target_mappings, marshal=True, syntax="symmetric")
                # diff = jsondiff.diff(source_mappings, target_mappings, marshal=True)
                
                ''' Compare mapping the specific index_name between source/target cluster '''
                compare_mapping(index_name, diff)
                target_idx_cnt += 1
    except Exception as e:
        # print(e)
        pass
    

In [45]:
def es_mapping_result(source,target):
    # print(source, target)
    global es_obj_s_client, es_obj_t_client
    ''' --------------------'''
    ''' Compare the custom mappings via the internal functions '''
    print(smart_suit_envs.get(source), smart_suit_envs.get(target))
    ''' Source cluster '''
    es_obj_s_client = get_es_instance(f"{smart_suit_envs.get(source)}")
    ''' Target cluster '''
    es_obj_t_client = get_es_instance(f"{smart_suit_envs.get(target)}")

    resp = es_obj_s_client.cluster.health()
    print(json.dumps(resp, indent=2))

    # source_idx_lists = ['om_whorder_02072022_22_2_1']
    source_idx_lists = list(es_obj_s_client.indices.get("*"))
    # print(source_idx_lists)

    ''' lookup all ES indices for compring between two clusters '''
    lookup(es_obj_s_client, es_obj_t_client, source_idx_lists)
    ''' --------------------'''
 
    return json.dumps(response,indent=2), all(all_same_mapping)
    # return response, all(all_same_mapping)

In [46]:
def call_rest_api(source, target):
    global source_idx_cnt, target_idx_cnt, response
    ''' --------------------'''
    ''' Compare the custom mappings via API'''
    http_urls = "http://{}:8001/index/all_indices_mapping_compare?source_cluster={}&target_cluster={}".format("localhost", smart_suit_envs.get(source), smart_suit_envs.get(target))
    resp = requests.get(url=http_urls, timeout=600)
    logging.info(f"resp status code : {resp.status_code}")
    
    if not resp.status_code == 200:
        response = {}
        source_idx_cnt = -1
        target_idx_cnt = -1
        return
                
    source_idx_cnt = resp.json()['The number of ES indices in the source es cluster']
    target_idx_cnt = resp.json()['The number of ES indices in the target es cluster that have the same index name as the source cluster']
    
    # return resp.json(), resp.json()['mappings_same']
    response = resp.json()
    

In [47]:
css = """
#warning {background-color: #FFCCCB}
.feedback textarea {font-size: 14px !important}
"""

In [48]:
PORT = 8092

In [49]:
port_verify(PORT)

Port is not open


In [50]:
# Gradio is an open-source Python package that allows you to quickly build a demo or web application for your machine learning model, API, or any arbitrary Python function. 
# You can then share a link to your demo or web application in just a few seconds using Gradio's built-in sharing features.
''' https://www.gradio.app/guides/quickstart '''
''' https://www.gradio.app/docs/gradio/label '''
''' Prerequisite: Gradio requires Python 3.10 or higher. --> We recommend installing Gradio using pip, pip install --upgrade gradio'''
# http://127.0.0.1:7880/?__theme=dark
# app = gr.Interface(fn=es_get_health, inputs=["text"], outputs="text")
with gr.Blocks(js=js_func, css=css) as app:
    gr.Markdown("# Compare the ES Custom Mappings")
    # host = gr.Textbox(value = "localhost:9201", show_label=True, label="host", elem_classes="feedback")
    source_es =  gr.Dropdown(label="Source ES Cluster", choices=["QA-01", "Pakistan"], value="QA-01")
    target_es =  gr.Dropdown(label="Target ES Cluster", choices=["QA-01", "QA-22"], value="QA-22")
    result_es_mapping_json = gr.Textbox(value = "", show_label=True, label="ES Mapping Json", elem_classes="feedback")
    result_es_mapping = gr.Label(value = "", show_label=True, label="ES Mapping Result")
    # gr.Interface(fn=es_get_health, inputs=["text"], outputs=["text", result_es_mapping])
    # gr.Interface(fn=es_get_health, inputs=[source_es, target_es], outputs=[result_es_mapping_json, result_es_mapping])
    gr.Interface(fn=es_mapping_result, inputs=[source_es,target_es], outputs=[result_es_mapping_json, result_es_mapping])
        
# app.launch(server_port=PORT)


In [51]:
''' Run the internal functions '''
es_mapping_result("QA-01", "QA-22")

''' Call the REST API '''
# call_rest_api("QA-01", "QA-22")

''' is Same Mapping? '''
is_same_mapping =  all(all_same_mapping) if all_same_mapping else api_is_same_mapping

http://localhost:9201 http://localhost:9202
{
  "cluster_name": "docker-cluster",
  "status": "yellow",
  "timed_out": false,
  "number_of_nodes": 1,
  "number_of_data_nodes": 1,
  "active_primary_shards": 118,
  "active_shards": 118,
  "relocating_shards": 0,
  "initializing_shards": 0,
  "unassigned_shards": 118,
  "delayed_unassigned_shards": 0,
  "number_of_pending_tasks": 0,
  "number_of_in_flight_fetch": 0,
  "task_max_waiting_in_queue_millis": 0,
  "active_shards_percent_as_number": 50.0
}


In [52]:
''' compare the mappings for a given ES index between the two env’s '''
print(json.dumps(response, indent=2))

''' I’m not sure if you have an easier way to compare the mappings between two env’s for a given ES index, but this online tool works for me '''
''' https://www.textcompare.org/ '''

{
  "archive_es_del_queue_active_test": {
    "diff": "Same mapping"
  },
  "es_pipeline_upload_test_om": {
    "diff": "Same mapping"
  },
  "es_pipeline_upload_test_wm": {
    "diff": "Same mapping"
  }
}


' https://www.textcompare.org/ '

In [53]:
# print(f"IDx Count is the Same? {source_idx_cnt}, {target_idx_cnt}")
df = pd.DataFrame({"source_es_cluster" : [es_obj_s_client], "source_idx_cnt" : [target_idx_cnt], "es_obj_s_client" : [es_obj_t_client], "target_idx_cnt" : [target_idx_cnt]})
df.head(10)

Unnamed: 0,source_es_cluster,source_idx_cnt,es_obj_s_client,target_idx_cnt
0,"<Elasticsearch([{'host': 'localhost', 'port': ...",3,"<Elasticsearch([{'host': 'localhost', 'port': ...",3


In [54]:
print('**')
print(f"Same mappings for all indices between {es_obj_s_client} and {es_obj_t_client}?")
if response:
    print(f"Same Mapping? {all(all_same_mapping)}")
print('**')

**
Same mappings for all indices between <Elasticsearch([{'host': 'localhost', 'port': 9201}])> and <Elasticsearch([{'host': 'localhost', 'port': 9202}])>?
Same Mapping? True
**
