# Elasticsearch Sandbox

In [2]:
!pip install 'elasticsearch==7.*'

Collecting elasticsearch==7.*
[?25l  Downloading https://files.pythonhosted.org/packages/86/3c/047e4985f81af98b71f19e318a6207187987bcd8af73b1edd4470cdee76b/elasticsearch-7.1.0-py2.py3-none-any.whl (83kB)
[K     |████████████████████████████████| 92kB 1.5MB/s eta 0:00:01
Installing collected packages: elasticsearch
Successfully installed elasticsearch-7.1.0


In [3]:
from datetime import datetime
from elasticsearch import Elasticsearch
from elasticsearch import helpers
es = Elasticsearch([{'host': 'elasticsearch', 'port': 9200}])

In [5]:
def test_connect_elasticsearch(es):
    if es.ping():
        print('Yay Connect')
    else:
        print('Awww it could not connect!')
    return es

In [6]:
test_connect_elasticsearch(es)

Yay Connect


<Elasticsearch([{'host': 'elasticsearch', 'port': 9200}])>

In [98]:
def indices_list_elasticsearch(es):
    es_raw_indices = es.indices.get_alias("*")
    es_indices_list = list(es_raw_indices.keys())
    return es_indices_list

['.monitoring-beats-7-2019.12.03',
 'apm-7.4.0-transaction-000001',
 'apm-7.4.0-error-000001',
 '.apm-agent-configuration',
 'heartbeat-7.4.0-2019.12.03-000001',
 'apm-7.4.0-span-000001',
 'parse-2019.12-00',
 'syslog-2019.12-00',
 'packetbeat-7.4.0-2019.12.03-000001',
 '.monitoring-kibana-7-2019.12.03',
 '.kibana_1',
 'metricbeat-7.4.0-2019.12.03-000001',
 'docker-2019.12-00',
 '.monitoring-es-7-2019.12.03',
 'apm-7.4.0-onboarding-2019.12.03',
 'apm-7.4.0-metric-000001',
 '.kibana_task_manager_1',
 'send-2019.12-00',
 '.monitoring-logstash-7-2019.12.03']

In [100]:
result = es.get(index="docker-2019.12-00", id='6cFCym4BHG4B21aa8Ebq')['_source']

In [122]:
def elastic_index_id_list(index_str):
    #GET ELASTICSEARCH INDEX ID LIST FROM LAST 15 MINUTES
    e_idx=helpers.scan(es,query={"query":{"range":{"@timestamp": {'gte':'now-15m', 'lt':'now'}}}},scroll='1m',index=index_str)#like others so far
    #LIST OF INDEX IDs
    elastic_ids=[e_id['_id'] for e_id in e_idx]
    return elastic_ids




In [124]:

es_timestamp = result['@timestamp']
es_stream = result['stream']
es_error = result['error']
es_host = result['host']
es_agent = result['agent']
es_log = result['log']
es_docker = result['docker']
es_input = result['input']
es_container = result['container']
es_ecs = result['ecs']


def docker_logs(es_timestamp, es_stream, es_error, es_host, es_agent, es_log, es_docker, es_input, es_container, es_ecs):
    es_timestamp = es_timestamp
    es_stream = es_stream
    es_error_msg = es_error['message']
    es_host_containerized = es_host['containerized']
    es_host_hostname = es_host['hostname']
    es_host_name = es_host['name']
    es_host_architecture = es_host['architecture']

    es_host_os = es_host['os']
    es_host_os_family = es_host_os['family']
    es_host_os_name = es_host_os['name']
    es_host_os_kernel = es_host_os['kernel']
    es_host_os_codename = es_host_os['codename']
    es_host_os_platform = es_host_os['platform']
    es_host_os_version = es_host_os['version']
    es_agent_type = es_agent['type']
    es_agent_hostname = es_agent['hostname']
    es_agent_version = es_agent['version']
    es_log_file = es_log['file']
    es_log_file_path = es_log_file['path']
    es_log_offset = es_log['offset']    
    es_docker_container_labels = es_docker['container']['labels']
    es_docker_cl_org_label_schema_version = es_docker_container_labels['org_label-schema_version']
    es_docker_cl_com_docker_compose_config_hash = es_docker_container_labels['com_docker_compose_config-hash']
    es_docker_cl_com_docker_compose_oneoff = es_docker_container_labels['com_docker_compose_oneoff']
    es_docker_cl_org_label_schema_vcs_ref = es_docker_container_labels['org_label-schema_vcs-ref']
    es_docker_cl_description = es_docker_container_labels['description']
    es_docker_cl_com_docker_compose_service = es_docker_container_labels['com_docker_compose_service']
    es_docker_cl_license = es_docker_container_labels['license']
    es_docker_cl_com_docker_compose_version = es_docker_container_labels['com_docker_compose_version']
    es_docker_cl_org_label_schema_build_date = es_docker_container_labels['org_label-schema_build-date']
    es_docker_cl_org_label_schema_name = es_docker_container_labels['org_label-schema_name']
    es_docker_cl_org_label_schema_url = es_docker_container_labels['org_label-schema_url']
    es_docker_cl_org_label_schema_schema_version = es_docker_container_labels['org_label-schema_schema-version']
    es_docker_cl_com_docker_compose_container_number = es_docker_container_labels['com_docker_compose_container-number']
    es_docker_cl_com_docker_compose_project = es_docker_container_labels['com_docker_compose_project']
    es_docker_cl_org_label_schema_license = es_docker_container_labels['org_label-schema_license']
    es_docker_cl_org_label_schema_vendor = es_docker_container_labels['org_label-schema_vendor']
    es_docker_cl_org_label_schema_vcs_url = es_docker_container_labels['org_label-schema_vcs-url']
    es_input_type = es_input['type']
    es_container_name = es_container['name']
    es_container_image = es_container['image']['name']
    es_container_id = es_container['id']
    es_ecs_version = es_ecs['version']


    docker_logs_dict = {
        'es_timestamp':es_timestamp,
        'es_stream':es_stream,
        'es_error_msg':es_error_msg,
        'es_host_containerized':es_host_containerized,
        'es_host_hostname':es_host_hostname,
        'es_host_name':es_host_name,
        'es_host_architecture':es_host_architecture,
        'es_host_os_family':es_host_os_family,
        'es_host_os_name':es_host_os_name,
        'es_host_os_kernel':es_host_os_kernel,
        'es_host_os_codename':es_host_os_codename,
        'es_host_os_platform':es_host_os_platform,
        'es_host_os_version':es_host_os_version,
        'es_agent_type':es_agent_type,
        'es_agent_hostname':es_agent_hostname,
        'es_agent_version':es_agent_version,
        'es_log_file':es_log_file,
        'es_log_file_path':es_log_file_path,
        'es_log_offset':es_log_offset,
        'es_docker_container_labels':es_docker_container_labels,
        'es_docker_cl_org_label_schema_version':es_docker_cl_org_label_schema_version,
        'es_docker_cl_com_docker_compose_config_hash':es_docker_cl_com_docker_compose_config_hash,
        'es_docker_cl_com_docker_compose_oneoff':es_docker_cl_com_docker_compose_oneoff,
        'es_docker_cl_org_label_schema_vcs_ref':es_docker_cl_org_label_schema_vcs_ref,
        'es_docker_cl_description':es_docker_cl_description,
        'es_docker_cl_com_docker_compose_service':es_docker_cl_com_docker_compose_service,
        'es_docker_cl_license':es_docker_cl_license,
        'es_docker_cl_com_docker_compose_version':es_docker_cl_com_docker_compose_version,
        'es_docker_cl_org_label_schema_build_date':es_docker_cl_org_label_schema_build_date,
        'es_docker_cl_org_label_schema_name':es_docker_cl_org_label_schema_name,
        'es_docker_cl_org_label_schema_url':es_docker_cl_org_label_schema_url,
        'es_docker_cl_org_label_schema_schema_version':es_docker_cl_org_label_schema_schema_version,
        'es_docker_cl_com_docker_compose_container_number':es_docker_cl_com_docker_compose_container_number,
        'es_docker_cl_com_docker_compose_project':es_docker_cl_com_docker_compose_project,
        'es_docker_cl_org_label_schema_license':es_docker_cl_org_label_schema_license,
        'es_docker_cl_org_label_schema_vendor':es_docker_cl_org_label_schema_vendor,
        'es_docker_cl_org_label_schema_vcs_url':es_docker_cl_org_label_schema_vcs_url,
        'es_input_type':es_input_type,
        'es_container_name':es_container_name,
        'es_container_image':es_container_image,
        'es_container_id':es_container_id
    }
    docker_df = pd.DataFrame.from_dict(docker_logs_dict,orient='index').transpose()
    return docker_df
    

In [84]:
print("TIMESTAMP")
print(es_timestamp)

TIMESTAMP
2019-12-03T05:36:52.948Z


In [49]:
print("STREAM")
print(type(es_stream))
print(es_stream)

STREAM
<class 'str'>
stderr


In [52]:
print("ERROR")
print(type(es_error))
print("--Error Message")
es_error_msg = es_error['message']
print(es_error_msg)

ERROR
<class 'dict'>
--Error Message
Failed to rename fields in processor: could not fetch value for key: elasticsearch.message, Error: key not found


In [55]:
print("HOST")
print(type(es_host))
es_host_containerized = es_host['containerized']
es_host_hostname = es_host['hostname']
es_host_name = es_host['name']
es_host_architecture = es_host['architecture']

es_host_os = es_host['os']
es_host_os_family = es_host_os['family']
es_host_os_name = es_host_os['name']
es_host_os_kernel = es_host_os['kernel']
es_host_os_codename = es_host_os['codename']
es_host_os_platform = es_host_os['platform']
es_host_os_version = es_host_os['version']

print(es_host_os_version)

HOST
<class 'dict'>
7 (Core)


In [58]:
print("AGENT")
print(type(es_agent))
es_agent_type = es_agent['type']
es_agent_hostname = es_agent['hostname']
es_agent_version = es_agent['version']
print(es_agent_type)

AGENT
<class 'dict'>
filebeat


In [60]:
print("LOG")
print(type(es_log))
es_log_file = es_log['file']
es_log_file_path = es_log_file['path']
es_log_offset = es_log['offset']
print(es_log_file_path)

LOG
<class 'dict'>
/var/lib/docker/containers/c8a161aed505f3eec98df230c46a46d734a5ca234044b4a1239565ad8b5c16f9/c8a161aed505f3eec98df230c46a46d734a5ca234044b4a1239565ad8b5c16f9-json.log


In [102]:
print("DOCKER")
print(type(es_docker))
es_docker_container_labels = es_docker['container']['labels']
es_docker_cl_org_label_schema_version = es_docker_container_labels['org_label-schema_version']
es_docker_cl_com_docker_compose_config_hash = es_docker_container_labels['com_docker_compose_config-hash']
es_docker_cl_com_docker_compose_oneoff = es_docker_container_labels['com_docker_compose_oneoff']
es_docker_cl_org_label_schema_vcs_ref = es_docker_container_labels['org_label-schema_vcs-ref']
es_docker_cl_description = es_docker_container_labels['description']
es_docker_cl_com_docker_compose_service = es_docker_container_labels['com_docker_compose_service']
es_docker_cl_license = es_docker_container_labels['license']
es_docker_cl_com_docker_compose_version = es_docker_container_labels['com_docker_compose_version']
es_docker_cl_org_label_schema_build_date = es_docker_container_labels['org_label-schema_build-date']
es_docker_cl_org_label_schema_name = es_docker_container_labels['org_label-schema_name']
es_docker_cl_org_label_schema_url = es_docker_container_labels['org_label-schema_url']
es_docker_cl_org_label_schema_schema_version = es_docker_container_labels['org_label-schema_schema-version']
es_docker_cl_com_docker_compose_container_number = es_docker_container_labels['com_docker_compose_container-number']
es_docker_cl_com_docker_compose_project = es_docker_container_labels['com_docker_compose_project']
es_docker_cl_org_label_schema_license = es_docker_container_labels['org_label-schema_license']
es_docker_cl_org_label_schema_vendor = es_docker_container_labels['org_label-schema_vendor']
es_docker_cl_org_label_schema_vcs_url = es_docker_container_labels['org_label-schema_vcs-url']
print(es_docker_cl_org_label_schema_version )

DOCKER
<class 'dict'>
7.4.0


In [103]:
print("INPUT")
print(type(es_input))
es_input_type = es_input['type']
print(es_input_type)

INPUT
<class 'dict'>
container


In [104]:
print("CONTAINER")
print(type(es_container))
es_container_name = es_container['name']
es_container_image = es_container['image']['name']
es_container_id = es_container['id']
print(es_container_id)

CONTAINER
<class 'dict'>
c8a161aed505f3eec98df230c46a46d734a5ca234044b4a1239565ad8b5c16f9


In [105]:
print("ECS")
print(type(es_ecs))
es_ecs_version = es_ecs['version']

ECS
<class 'dict'>


## Combine Features into DataFrame

In [106]:
import pandas as pd

In [107]:
es_docker_dict = {
    'es_timestamp':es_timestamp,
    'es_stream':es_stream,
    'es_error_msg':es_error_msg,
    'es_host_containerized':es_host_containerized,
    'es_host_hostname':es_host_hostname,
    'es_host_name':es_host_name,
    'es_host_architecture':es_host_architecture,
    'es_host_os_family':es_host_os_family,
    'es_host_os_name':es_host_os_name,
    'es_host_os_kernel':es_host_os_kernel,
    'es_host_os_codename':es_host_os_codename,
    'es_host_os_platform':es_host_os_platform,
    'es_host_os_version':es_host_os_version,
    'es_agent_type':es_agent_type,
    'es_agent_hostname':es_agent_hostname,
    'es_agent_version':es_agent_version,
    'es_log_file':es_log_file,
    'es_log_file_path':es_log_file_path,
    'es_log_offset':es_log_offset,
    'es_docker_container_labels':es_docker_container_labels,
    'es_docker_cl_org_label_schema_version':es_docker_cl_org_label_schema_version,
    'es_docker_cl_com_docker_compose_config_hash':es_docker_cl_com_docker_compose_config_hash,
    'es_docker_cl_com_docker_compose_oneoff':es_docker_cl_com_docker_compose_oneoff,
    'es_docker_cl_org_label_schema_vcs_ref':es_docker_cl_org_label_schema_vcs_ref,
    'es_docker_cl_description':es_docker_cl_description,
    'es_docker_cl_com_docker_compose_service':es_docker_cl_com_docker_compose_service,
    'es_docker_cl_license':es_docker_cl_license,
    'es_docker_cl_com_docker_compose_version':es_docker_cl_com_docker_compose_version,
    'es_docker_cl_org_label_schema_build_date':es_docker_cl_org_label_schema_build_date,
    'es_docker_cl_org_label_schema_name':es_docker_cl_org_label_schema_name,
    'es_docker_cl_org_label_schema_url':es_docker_cl_org_label_schema_url,
    'es_docker_cl_org_label_schema_schema_version':es_docker_cl_org_label_schema_schema_version,
    'es_docker_cl_com_docker_compose_container_number':es_docker_cl_com_docker_compose_container_number,
    'es_docker_cl_com_docker_compose_project':es_docker_cl_com_docker_compose_project,
    'es_docker_cl_org_label_schema_license':es_docker_cl_org_label_schema_license,
    'es_docker_cl_org_label_schema_vendor':es_docker_cl_org_label_schema_vendor,
    'es_docker_cl_org_label_schema_vcs_url':es_docker_cl_org_label_schema_vcs_url,
    'es_input_type':es_input_type,
    'es_container_name':es_container_name,
    'es_container_image':es_container_image,
    'es_container_id':es_container_id
}

In [114]:
es_docker_df = pd.DataFrame.from_dict(es_docker_dict,orient='index').transpose()

In [142]:
complete_df = pd.DataFrame()
for elastic_index_name in indices_list_elasticsearch(es):
    if elastic_index_name.startswith('.'):
        print("SKIPPING" + elastic_index_name)
    else:
        if elastic_index_name.startswith('docker'):
            print("TEST")
            for record_id in elastic_index_id_list(elastic_index_name):
                print(record_id)
                result = es.get(index=elastic_index_name, id=record_id)['_source']
                
                es_timestamp = result['@timestamp']
                es_stream = result['stream']
                print(result.keys())
                try:
                    es_error = result['error']
                except:
                    es_error['message'] = 'NA'
                es_host = result['host']
                es_agent = result['agent']
                es_log = result['log']
                es_docker = result['docker']
                es_input = result['input']
                es_container = result['container']
                es_ecs = result['ecs']
                
                x_df = docker_logs(es_timestamp, es_stream, es_error, es_host, es_agent, es_log, es_docker, es_input, es_container, es_ecs)
                #print(x_df)
                complete_df.append(x_df)
            
        #print(complete_df)

SKIPPING.monitoring-kibana-7-2019.12.03
SKIPPING.monitoring-logstash-7-2019.12.03
SKIPPING.kibana_1
SKIPPING.monitoring-es-7-2019.12.03
SKIPPING.monitoring-beats-7-2019.12.03
SKIPPING.kibana_task_manager_1
SKIPPING.apm-agent-configuration
TEST
mse5ym4BHG4B21aa2igE
dict_keys(['@timestamp', 'log', 'container', 'ecs', 'docker', 'error', 'host', 'stream', 'input', 'agent'])
m8e5ym4BHG4B21aa2igE
dict_keys(['@timestamp', 'stream', 'message', 'container', 'docker', 'ecs', 'host', 'log', 'input', 'agent'])


KeyError: 'org_label-schema_version'

In [116]:
es_docker_df

Unnamed: 0,es_timestamp,es_stream,es_error_msg,es_host_containerized,es_host_hostname,es_host_name,es_host_architecture,es_host_os_family,es_host_os_name,es_host_os_kernel,...,es_docker_cl_org_label_schema_schema_version,es_docker_cl_com_docker_compose_container_number,es_docker_cl_com_docker_compose_project,es_docker_cl_org_label_schema_license,es_docker_cl_org_label_schema_vendor,es_docker_cl_org_label_schema_vcs_url,es_input_type,es_container_name,es_container_image,es_container_id
0,2019-12-03T05:36:52.948Z,stderr,Failed to rename fields in processor: could no...,True,filebeat_docker_for_elasticsearch,filebeat_docker_for_elasticsearch,x86_64,redhat,CentOS Linux,4.9.184-linuxkit,...,1.0,1,aiops,GPLv2,Elastic,github.com/elastic/beats,container,filebeat_for_elasticsearch,docker.elastic.co/beats/filebeat:7.4.0,c8a161aed505f3eec98df230c46a46d734a5ca234044b4...


### TO DO
- [ ] Loops - Index + ID
- [ ] Feature Engineering (Automated)
- [ ] Core ML Data - Anomaly Detection ("Normal Logs")
- [ ] Ingest Anomalies and stage for cluster analytics - two tier - known issues filter, and unknown issues classifier

In [None]:
es_docker_df.index = es_docker_df['es_timestamp']