In [1955]:
print(f"ES Mapping Compare via REST API")

ES Mapping Compare via REST API


In [1956]:
#!pip install gradio

In [1957]:
import time
import gradio as gr

In [1958]:
from elasticsearch import Elasticsearch
import os
import json
import pandas as pd
import jsondiff
import logging
from dotenv import load_dotenv
import socket
import warnings
warnings.filterwarnings("ignore")

In [1959]:
js_func = """
function refresh() {
    const url = new URL(window.location);

    if (url.searchParams.get('__theme') !== 'dark') {
        url.searchParams.set('__theme', 'dark');
        window.location.href = url.href;
    }
}
"""

In [1960]:
def get_headers():
    ''' Elasticsearch Header '''
    return {
            'Content-type': 'application/json', 
            'Authorization' : '{}'.format(os.getenv('BASIC_AUTH')),
            # 'Connection': 'close'
    }

In [1961]:
smart_suit_envs = {
    "QA-01" : "http://localhost:9201",
    "QA-22" : "http://localhost:9202"
}

In [1962]:
def port_verify(port):
    sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    result = sock.connect_ex(('127.0.0.1',port))
    if result == 0:
       print("Port is open")
    else:
       print("Port is not open")
    sock.close()

In [1963]:
''' https://www.elastic.co/guide/en/elasticsearch/client/python-api/current/migration.html '''
''' In 8.0.0 responses are no longer the raw deserialized response body and instead an object with two properties, meta and body '''
def es_get_health(source_es, target_es):
    if source_es in smart_suit_envs.keys() and source_es in smart_suit_envs.keys():
        es_client = Elasticsearch(hosts="{}".format(smart_suit_envs.get(source_es)), headers=get_headers(), timeout=5,  verify_certs=False)
        # return json.dumps(es_client.cluster.health().body, indent=2), es_client.cluster.health()['status']
        return json.dumps(es_client.cluster.health(), indent=2), es_client.cluster.health()['status']
    else:
        return {},{}

In [1964]:
def get_es_instance(host):
    es_client = Elasticsearch(hosts="{}".format(host), headers=get_headers(), timeout=5,  verify_certs=False)
    return es_client


In [1965]:
response = {}
all_same_mapping = []

In [1966]:
def compare_mapping(index_name, diff):
    ''' compare diff using jsondiff library '''
    if not diff:
        all_same_mapping.append(True)
        response.update({index_name : {'diff' : 'Same mapping'}})
    else:
        all_same_mapping.append(False)
        response.update({index_name : {'diff' : 'Different mapping', 'result' : diff}})
    return response, all_same_mapping

In [1967]:
def es_version_verify(es_client):
    # print(es_client.info()['version']['number'], type(es_client.info()['version']['number']))
    ''' if es_client v.5.X '''
    if "5." in es_client.info()['version']['number']:
        return True
    else:
        return False

In [1968]:
def get_mapping_from_properties(mapping, es_v5=False):
    if es_v5:
        return {"properties" : v2.get("properties") for k, v in mapping.items() for k1, v1 in v.items() for k2, v2 in v1.items() }    
    else:
        return {'properties': v2 for k, v in target_mapping.items() for k1, v1 in v.items() for k2, v2 in v1.items() }
            

In [1969]:
def lookup(es_obj_s_client, es_obj_t_client, source_idx_lists):
    try:
        for index_name in source_idx_lists:
            ''' real index '''
            if index_name.startswith("wx_") or index_name.startswith("om_") or index_name.startswith("es_") or index_name.startswith("archive_es_"):
                print('OKD : {}'.format(index_name))
                source_mapping = es_obj_s_client.indices.get_mapping(index=index_name)
                target_mapping = es_obj_t_client.indices.get_mapping(index=index_name)

                ''' Determin what version of es client is running '''
                # print(es_version_verify(es_obj_s_client), es_version_verify(es_obj_t_client))
                ''' get es_version as argument '''
                # print(f"source es version : {es_version_verify(es_obj_s_client)}")
                source_mappings = get_mapping_from_properties(source_mapping, es_v5=es_version_verify(es_obj_s_client))
                ''' get es_version as argument '''
                # print(f"target es version : {es_version_verify(es_obj_t_client)}")
                target_mappings = get_mapping_from_properties(target_mapping, es_v5=es_version_verify(es_obj_t_client))
        
                # Compare JSON objects using jsondiff
                # diff = jsondiff.diff(source_mappings, target_mappings, marshal=True, syntax="symmetric")
                diff = jsondiff.diff(source_mappings, target_mappings, marshal=True)
                
                ''' Compare mapping the specific index_name between source/target cluster '''
                compare_mapping(index_name, diff)
    except Exception as e:
        # print(e)
        pass
    

In [1970]:
def es_mapping_result(source, target):
    # print(source, target)
    ''' --------------------'''
    ''' Compare the custom mappings via the internal functions '''
    print(smart_suit_envs.get(source), smart_suit_envs.get(target))
    ''' Source cluster '''
    es_obj_s_client = get_es_instance(f"{smart_suit_envs.get(source)}")
    ''' Target cluster '''
    es_obj_t_client = get_es_instance(f"{smart_suit_envs.get(target)}")

    resp = es_obj_s_client.cluster.health()
    print(json.dumps(resp, indent=2))

    # source_idx_lists = ['om_whorder_02072022_22_2_1']
    source_idx_lists = list(es_obj_s_client.indices.get("*"))
    # print(source_idx_lists)

    ''' lookup all ES indices for compring between two clusters '''
    lookup(es_obj_s_client, es_obj_t_client, source_idx_lists)
    ''' --------------------'''

    ''' --------------------'''
    ''' Compare the custom mappings via API'''
    
    ''' --------------------'''
    
    return json.dumps(response,indent=2), all(all_same_mapping)

In [1971]:
css = """
#warning {background-color: #FFCCCB}
.feedback textarea {font-size: 14px !important}
"""

In [1972]:
PORT = 8092

In [1973]:
port_verify(PORT)

Port is open


In [1974]:
''' compare the mappings for a given ES index between the two env’s '''
print(json.dumps(response, indent=2))

''' I’m not sure if you have an easier way to compare the mappings between two env’s for a given ES index, but this online tool works for me '''
''' https://www.textcompare.org/ '''

{}


' https://www.textcompare.org/ '

In [1975]:
print('**')
print(f"Same mappings for all indices between {es_obj_s_client} and {es_obj_t_client}?")
print(f"Same Mapping? {all(all_same_mapping)}")
print('**')

**
Same mappings for all indices between None and None?
Same Mapping? True
**


In [1976]:
# Gradio is an open-source Python package that allows you to quickly build a demo or web application for your machine learning model, API, or any arbitrary Python function. 
# You can then share a link to your demo or web application in just a few seconds using Gradio's built-in sharing features.
''' https://www.gradio.app/guides/quickstart '''
''' https://www.gradio.app/docs/gradio/label '''
''' Prerequisite: Gradio requires Python 3.10 or higher. --> We recommend installing Gradio using pip, pip install --upgrade gradio'''
# http://127.0.0.1:7880/?__theme=dark
# app = gr.Interface(fn=es_get_health, inputs=["text"], outputs="text")
with gr.Blocks(js=js_func, css=css) as app:
    gr.Markdown("# Compare the ES Custom Mappings")
    # host = gr.Textbox(value = "localhost:9201", show_label=True, label="host", elem_classes="feedback")
    source_es =  gr.Dropdown(label="Source ES Cluster", choices=["QA-01", "Pakistan"], value="QA-01")
    target_es =  gr.Dropdown(label="Target ES Cluster", choices=["QA-01", "QA-22"], value="QA-22")
    result_es_mapping_json = gr.Textbox(value = "", show_label=True, label="ES Mapping Json", elem_classes="feedback")
    result_es_mapping = gr.Label(value = "", show_label=True, label="ES Mapping Result")
    # gr.Interface(fn=es_get_health, inputs=["text"], outputs=["text", result_es_mapping])
    # gr.Interface(fn=es_get_health, inputs=[source_es, target_es], outputs=[result_es_mapping_json, result_es_mapping])
    gr.Interface(fn=es_mapping_result, inputs=[source_es,target_es], outputs=[result_es_mapping_json, result_es_mapping])
        
# app.launch(server_port=PORT)