# Examon database connection

In [1]:
%matplotlib inline
import os
import re
import numpy as np
import pandas as pd
from datetime import datetime, timedelta 
from examon.examon import Examon, ExamonQL
from itables import show
import itables.options as opt
opt.maxBytes = 0

<IPython.core.display.Javascript object>

#### Examon setup

In [2]:
KAIROSDB_SERVER = '130.186.13.80'
KAIROSDB_PORT = '3000'
USER = 'Kleto000'
PWD = '6$nhZ764'

ex = Examon(KAIROSDB_SERVER, port=KAIROSDB_PORT, user=USER, password=PWD, verbose=False, proxy=True)
sq = ExamonQL(ex)

<urllib.request.Request object at 0x3ffd1792b6a0>


### Node evaluation

Total metric count:

In [3]:
def get_metric_count_from_plugin(plugin):
    metrics = sq.DESCRIBE(tag_key = 'plugin', tag_value=plugin).execute()
    return metrics['name'].shape[0]

def get_metric_count_from_plugin_list(plugin_list):
    count = 0
    for plugin in plugin_list:
        count += get_metric_count_from_plugin(plugin)
    return count

metric_count = get_metric_count_from_plugin_list(['ganglia_pub', 'confluent_pub', 'ipmi_pub'])

print("total metric count:",metric_count)


total metric count: 1341


In [4]:
def extract_features_from_plugin(plugin):
    metrics = sq.DESCRIBE(tag_key = 'plugin', tag_value=plugin).execute()
    return metrics['name'].values

def extract_nodes_from_feature(feature):
    df = sq.DESCRIBE(metric=feature).execute()
    return df.values[3][2]

def get_feature_count_per_node_from_plugin(plugin):
    df = pd.DataFrame(data=None, columns=['node', 'features_count'])
    df = df.set_index('node')
    
    features = extract_features_from_plugin(plugin)
    for feature in features:
        if 'ganglia_pub' == plugin:
            nodes = extract_nodes_from_ganglia_feature(feature)
        else:
            nodes = extract_nodes_from_feature(feature)
        for node in nodes:
            if(node in df.index):
                df['features_count'][node] = df['features_count'][node] + 1
            else:
                df.loc[node] = {"features_count": 1}
    return df

def extract_nodes_from_ganglia_feature(feature):
    df = sq.DESCRIBE(metric=feature).execute()
    raw_nodes = df.iloc[4,2]
    return parse_ganglia_nodes(raw_nodes)

# the format is different w.r.t. the one used for all other plugins
# we need a parsing step
def parse_ganglia_nodes(raw_nodes):
    nodes = []
    regex = re.compile("^((master[0-9]{2})|(r[0-9]{3}c[0-9]{2}s[0-9]{2})|(r[0-9]{3}u[0-9]{2}(l|s)[0-9]{2}))$")  # regular expression corrected
    for raw_node in raw_nodes:
        if '.' in raw_node:
            raw_node = raw_node.split('.')[0]
        if(regex.search(raw_node)):
            nodes.append(raw_node)
    return nodes

In [5]:
parse_ganglia_nodes(['r183c16s04-hfi','r000u26s04','r000u26s04-hfi'])

['r000u26s04']

Printing the features count for each plugin:

In [6]:
ganglia_count = get_feature_count_per_node_from_plugin('ganglia_pub')

In [7]:
ganglia_count = ganglia_count.reset_index()
ganglia_nodes = ganglia_count['node']
show(ganglia_nodes, scrollX=True)

node


In [8]:
show(ganglia_count, scrollX=True)

node,features_count


In [10]:
ipmi_count = get_feature_count_per_node_from_plugin('ipmi_pub')

In [11]:
show(ipmi_count, scrollX=True)

Unnamed: 0_level_0,features_count
node,Unnamed: 1_level_1


In [7]:
confluent_count = get_feature_count_per_node_from_plugin('confluent_pub')

In [8]:
show(confluent_count, scrollX=True)

Unnamed: 0_level_0,features_count
node,Unnamed: 1_level_1


In [14]:
ganglia_count = get_feature_count_per_node_from_plugin('ganglia_pub')

In [15]:
show(ganglia_count, scrollX=True)

Unnamed: 0_level_0,features_count
node,Unnamed: 1_level_1


I nodi che compaiono qui che hanno più di 999 features (il totale di ganglia) non sono reali, nel senso che derivano da un preprocessing necessario poichè ganglia non salva i nodi nel formato 'r033c02s05' ma per esempio 'r033c02s05.galileo.cineca.it'.
Estraendo i nodi da ganglia, questi vengono preprocessati per rimuovere il suffisso non necessario (che non permetterebbe il matching con i dati degli altri plugin).
I nodi con count maggiore derivano dall'unione di più nodi 'reali' che attraverso il preprocessing vengono evidentemente fatti unificare. In ogni caso, nessuno di questi è presente negli altri plugin, quindi possono essere scartati.

In [16]:
ganglia_count = ganglia_count[ganglia_count['features_count'] <= 999]

In [17]:
show(ganglia_count, scrollX=True)

Unnamed: 0_level_0,features_count
node,Unnamed: 1_level_1


(E' necessario scartare anche il nodo 'r064u15x01', poichè non è presente in nagios. In ogni caso verrà filtrato durante il merging)

Merging all features information:

In [18]:
ganglia_count = ganglia_count.reset_index()
confluent_count = confluent_count.reset_index()
ipmi_count = ipmi_count.reset_index()

In [19]:
show(ganglia_count, scrollX=True)

node,features_count


In [20]:
merged_data = pd.merge(ganglia_count, confluent_count, on=['node']).set_index(['node']).sum(axis=1)
merged_data = merged_data.reset_index()
merged_data = pd.merge(merged_data, ipmi_count, on=['node']).set_index(['node']).sum(axis=1)

In [21]:
merged_data = merged_data.to_frame()
merged_data.rename(columns={ merged_data.columns[0]: "features_count" }, inplace = True)

In [22]:
show(merged_data, scrollX=True)

Unnamed: 0_level_0,features_count
node,Unnamed: 1_level_1


In [23]:
ganglia_count = ganglia_count.set_index('node').rename(columns={'features_count':'ganglia'})
ipmi_count = ipmi_count.set_index('node').rename(columns={'features_count':'ipmi'})
confluent_count = confluent_count.set_index('node').rename(columns={'features_count':'confluent'})

In [25]:
all_data = ganglia_count.join([ipmi_count, confluent_count], how='inner')

In [28]:
show(all_data, scrollX=True)

Unnamed: 0_level_0,ganglia,ipmi,confluent,total
node,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1


In [27]:
all_data.insert(len(all_data.columns), 'total', 0)

In [29]:
for index in all_data.index:
    all_data.loc[index, 'total'] = all_data.loc[index, 'ganglia'] + \
                                   all_data.loc[index, 'ipmi'] + \
                                   all_data.loc[index, 'confluent']

In [31]:
show(all_data)

Unnamed: 0_level_0,ganglia,ipmi,confluent,total
node,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1


### Plugin nagios_pub
Extracting data from nagios_pub when a critical state is matched (state=2):
<ul>
    <li>DOWN<sup>(*)</sup>,</li>
    <li>DOWN<sup>(*)</sup>+DRAIN,</li>
    <li>IDLE<sup>(*)</sup>+DRAIN,</li>
    <li>ALLOCATED+DRAIN,</li>
    <li>MIXED+COMPLETING+DRAIN,</li>
    <li>or any other combination of them</li>
</ul>

Considered time: 1h 30m

In [22]:
opt.classes = ["display", "wrap"]

data = sq.SELECT('node','state') \
         .FROM('plugin_output') \
         .WHERE(plugin='nagios_pub', state='2', node='r183c12s04') \
         .TSTART('1-09-2019 00:00:00') \
         .TSTOP( '15-10-2019 00:00:00') \
         .execute()

In [23]:
print(data.df_table.shape)
show(data.df_table, scrollX=True)

(928, 5)


timestamp,value,name,node,state


In [24]:
data = data.df_table
values = data[['value']]

In [30]:
critical_2 = data[data['state'] == '2']
critical_2 = critical_2.reset_index(drop=True)
show(critical_2, scrollX=True)

timestamp,value,name,node,state


In [26]:
show(values.value.unique(), scrollX=True)

0


In [31]:
criticalities = data[(data['state'] == '2') & ((data['value'].str.contains("DRAIN")) | (data['value'].str.contains("DOWN")))]
criticalities = criticalities.reset_index(drop=True)
show(criticalities, scrollX=True)

timestamp,value,name,node,state
