In [681]:
print(f"ELK Upgrade Index Migration Logic -> Create ES indices with the custom mappings based on ES v.8")

ELK Upgrade Index Migration Logic -> Create ES indices with the custom mappings based on ES v.8


In [682]:
from elasticsearch import Elasticsearch
import os
import json
import pandas as pd
from dotenv import load_dotenv
import warnings
warnings.filterwarnings("ignore")

In [683]:
''' pip install python-dotenv'''
load_dotenv() # will search for .env file in local folder and load variables 

True

In [684]:
def get_headers():
    ''' Elasticsearch Header '''
    return {
            'Content-type': 'application/json', 
            'Authorization' : '{}'.format(os.getenv('BASIC_AUTH')),
            # 'Connection': 'close'
    }

In [685]:
source_es_host = "http://localhost:9201"
target_es_host = "http://localhost:9202"

In [686]:
def get_es_instance(host):
    es_client = Elasticsearch(hosts="{}".format(host), headers=get_headers(), timeout=5,  verify_certs=False)
    return es_client

''' Source cluster '''
es_obj_s_client = get_es_instance(source_es_host)
''' Target cluster '''
es_obj_t_client = get_es_instance(target_es_host)

In [687]:
resp = es_obj_s_client.cluster.health()
print(json.dumps(resp, indent=2))

{
  "cluster_name": "docker-cluster",
  "status": "yellow",
  "timed_out": false,
  "number_of_nodes": 1,
  "number_of_data_nodes": 1,
  "active_primary_shards": 126,
  "active_shards": 126,
  "relocating_shards": 0,
  "initializing_shards": 0,
  "unassigned_shards": 126,
  "delayed_unassigned_shards": 0,
  "number_of_pending_tasks": 0,
  "number_of_in_flight_fetch": 0,
  "task_max_waiting_in_queue_millis": 0,
  "active_shards_percent_as_number": 50.0
}


In [688]:
df = pd.DataFrame.from_dict([resp])
df.head(10)

Unnamed: 0,cluster_name,status,timed_out,number_of_nodes,number_of_data_nodes,active_primary_shards,active_shards,relocating_shards,initializing_shards,unassigned_shards,delayed_unassigned_shards,number_of_pending_tasks,number_of_in_flight_fetch,task_max_waiting_in_queue_millis,active_shards_percent_as_number
0,docker-cluster,yellow,False,1,1,126,126,0,0,126,0,0,0,0,50.0


In [689]:
resp = es_obj_t_client.cluster.health()
print(json.dumps(resp, indent=2))

{
  "cluster_name": "docker-elasticsearch",
  "status": "yellow",
  "timed_out": false,
  "number_of_nodes": 1,
  "number_of_data_nodes": 1,
  "active_primary_shards": 81,
  "active_shards": 81,
  "relocating_shards": 0,
  "initializing_shards": 0,
  "unassigned_shards": 57,
  "delayed_unassigned_shards": 0,
  "number_of_pending_tasks": 0,
  "number_of_in_flight_fetch": 0,
  "task_max_waiting_in_queue_millis": 0,
  "active_shards_percent_as_number": 58.69565217391305
}


In [690]:
df = pd.DataFrame.from_dict([resp])
df.head(10)

Unnamed: 0,cluster_name,status,timed_out,number_of_nodes,number_of_data_nodes,active_primary_shards,active_shards,relocating_shards,initializing_shards,unassigned_shards,delayed_unassigned_shards,number_of_pending_tasks,number_of_in_flight_fetch,task_max_waiting_in_queue_millis,active_shards_percent_as_number
0,docker-elasticsearch,yellow,False,1,1,81,81,0,0,57,0,0,0,0,58.695652


### Create ES Indices to ES v.8 from ES v.5 with transformation the custom mappings
* Create ES Indices between two clusters
* Custom mappings with ES v.7

In [691]:
def get_es_api_alias(es_client):
    ''' get all alias and set to dict'''
    ''' https://localhost:9201/_cat/aliases?format=json '''
    get_alias_old_cluster = es_client.indices.get_alias()
    # print(f"Get alias from old cluster : {json.dumps(get_alias_old_cluster, indent=2)}")

    reset_alias_dict = {}
    for k in get_alias_old_cluster.keys():
        if get_alias_old_cluster.get(k).get('aliases'):
            each_alias = list(get_alias_old_cluster.get(k).get('aliases').keys())
            # print(each_alias)
            reset_alias_dict.update({k : each_alias})
    # print(json.dumps(reset_alias_dict, indent=2))
    # print(f"get_es_api_alias : {reset_alias_dict}")

    return reset_alias_dict

In [692]:
def try_delete_create_index(es_t_client, index, mapping):
        try:
            # logging.info(mapping)
            if es_t_client.indices.exists(index):
                raise Exception("Index has already exist")
                # logging.info('Alreday exists : {}'.format(index))
                # es_t_client.indices.delete(index)
                
            # now create a new index
            es_t_client.indices.create(index=index, body=mapping)
            # es_client.indices.put_alias(index, "omnisearch_search")
            es_t_client.indices.refresh(index=index)
            logging.info("Successfully created: {}".format(index))
            
        except Exception as e:
            logging.error(e)
            pass

In [693]:
''' extact a list of indices from the source cluster'''
source_idx_lists = es_obj_s_client.indices.get("*")

In [694]:
# print(source_idx_lists)

In [695]:
''' get Alias'''
get_alias_dict = get_es_api_alias(es_obj_s_client)

In [696]:
print(get_alias_dict)

{'es_pipeline_upload_test03152018_01': ['es_pipeline_upload_test']}


In [697]:
def try_create_index(es_client, index, mapping):
    try:
        # logging.info(mapping)
        if es_client.indices.exists(index):
            raise Exception("raise Exception : Index has already exist")
            
        # now create a new index
        es_client.indices.create(index=index, body=mapping)
        # es_client.indices.put_alias(index, "omnisearch_search")
        es_client.indices.refresh(index=index)
        print("Successfully created: {} to {}".format(index, es_client))
            
    except Exception as e:
        print(f"try_create_index : ", e)
        pass

In [698]:
def recursive_lookup(k, d):
    if k in d:
        return d[k]
    for v in d.values():
        if isinstance(v, dict):
           return recursive_lookup(k, v)
    return None

In [699]:
def get_mapping_from_es_v5_to_es_8(es_client, es_t_client, each_index):
    try:
        def replace_mapping_type_for_field(each_mapping):
            ''' replace mapping if it needs'''
             # if 'CONSOLIDATIONID' in json.dumps(each_mapping):
             #     logging.info(f"each_index - {each_index}, each_mapping : {each_mapping}")
                    
            return json.loads(json.dumps(each_mapping).replace('"CONSOLIDATIONID": {"type": "integer"}','"CONSOLIDATIONID": {"type": "double"}')
                                #   .replace('"PRINTBATCHID": {"type": "integer"}','"PRINTBATCHID": {"type": "double"}')
                                  )
    
        def get_recursive_nested_all(d):
            if isinstance(d, list):
                for i in d:
                    get_recursive_nested_all(i)
            elif isinstance(d, dict):
                for k, v in d.items():
                    if not isinstance(v, (list, dict)):
                        # print("%%%%", k, v)
                        if k == "format" and v == "MM/dd/yyyy hh:mm:ss.SSSSSS a Z":
                            d[k] = "MM/dd/yyyy hh:mm:ss.SSSSSS a z||MM/dd/yyyy hh:mm:ss.SSSSSS a Z"
                    else:
                        get_recursive_nested_all(v)
    
            ''' unnecessary field should removed by this function like "query"'''
            if 'query' in d:
                del d['query']
            return d
    
        # logging.info(f"index : {each_index}, mapping = {json.dumps(es_client.indices.get_mapping(index=each_index), indent=2)}")
        each_mapping = es_client.indices.get_mapping(index=each_index)
        ''' replace mapping type for specific field'''
        each_mapping = replace_mapping_type_for_field(each_mapping)
            
        get_only_properties = recursive_lookup("properties", each_mapping)
        # logging.info(f"mappings_get : {json.dumps(get_only_properties, indent=2)}")
    
        idx_json.update({
                            "mappings" : {
                                "properties" : get_recursive_nested_all(get_only_properties)
                            }
                     }
                )
            
        # print('\n----')
        # print(f"idx_json : {json.dumps(idx_json, indent=2)}")
        # print('\n----')
                                
        ''' Get the mappings with a specific index from source cluster and create index into new cluster as ts'''
        try_create_index(es_t_client, "{}{}".format(Index_Prefix, each_index), idx_json)
    except Exception as e:
        print(f"get_mapping_from_es_v5_to_es_8 : ", e)

In [700]:
idx_json = {}
migrated_total_indices_cnt = 0
is_update_aliase = True
Index_Prefix = ""

In [701]:
def work(source_idx_lists, es_s_client, es_t_client):
    global migrated_total_indices_cnt
    ''' create index with mappping from ES v.5 after transforming'''
    for each_index in source_idx_lists:
        ''' exclude system indices in the source cluster such as .monitoring-es-7-2024.07.12'''
        if '.' not in each_index:
            if str(each_index).startswith("om_") or str(each_index).startswith("wx_") or str(each_index).startswith("es_") or str(each_index).startswith("archive_"):
                    # print(f"idx each_index : {each_index}")
                    get_mapping_from_es_v5_to_es_8(es_s_client, es_t_client, each_index)
                    migrated_total_indices_cnt +=1

    ''' update aliase to ES v.8'''
    if is_update_aliase:
        for each_index in source_idx_lists:
                # print(f"aliase each_index : {each_index}")
                if es_t_client.indices.exists("{}{}".format(Index_Prefix, each_index)):
                    if each_index in get_alias_dict.keys():
                        # print(f"Aliase Printout : {each_index}, {get_alias_dict.get(each_index)}")
                        response = es_t_client.indices.put_alias("{}{}".format(Index_Prefix, each_index), ''.join(get_alias_dict.get(each_index)))
                        print(f"response : {response}")
                        print(f"Success with indics : {each_index}, alias : {''.join(get_alias_dict.get(each_index))}")

In [702]:
work(source_idx_lists, es_obj_s_client, es_obj_t_client)

try_create_index :  raise Exception : Index has already exist
try_create_index :  raise Exception : Index has already exist
try_create_index :  raise Exception : Index has already exist
try_create_index :  raise Exception : Index has already exist
response : {'acknowledged': True}
Success with indics : es_pipeline_upload_test03152018_01, alias : es_pipeline_upload_test


In [703]:
print(f"Migrated Indices : {migrated_total_indices_cnt}")           

Migrated Indices : 4
