# Full data extraction

In [1]:
from examon.examon import ExamonQL
from itables import show
import itables.options as opt
opt.maxBytes = 0

# local library
from library import examon_utils as eu
from library import nagios_sampling as ns
from library import features_extraction as fe

<IPython.core.display.Javascript object>

In [2]:
sq = eu.create_examon_connection()

<urllib.request.Request object at 0x7ffbab5edd10>


In [3]:
node='r183c11s04'
t_start='03-10-2019 10:00:00'
t_stop='03-10-2019 13:00:00'

## 1) Collecting data from plugins

In [4]:
from library import date_utils as du
first_timestamp = du.parse_timestamp(t_start)
last_timestamp = du.parse_timestamp(t_stop)
all_timestamps = du.generate_timestamps(first_timestamp, last_timestamp, minute_step=60*24)
print(first_timestamp)
print(last_timestamp)
print(all_timestamps)

2019-10-03 10:00:00+02:00
2019-10-03 13:00:00+02:00
0   2019-10-03 10:00:00+02:00
1   2019-10-04 10:00:00+02:00
dtype: datetime64[ns, Europe/Rome]


#### Nagios:

In [5]:
nagios_data = ns.extract_data_from_nagios(sq=sq, node=node, t_start=t_start, t_stop=t_stop)

In [6]:
show(nagios_data, scrollX=True)

timestamp,label


In [7]:
nagios_data.iloc[0]['timestamp']

Timestamp('2019-10-03 10:00:00+0200', tz='Europe/Rome')

#### Ganglia:

In [8]:
ganglia_data = fe.extract_data_from_plugin(sq=sq, plugin_name='ganglia_pub', node=node, t_start=t_start, t_stop=t_stop)

In [9]:
show(ganglia_data, scrollX=True)

timestamp,avg:boottime,var:boottime,avg:bytes_in,var:bytes_in,avg:bytes_out,var:bytes_out,avg:core_freq_avg,var:core_freq_avg,avg:core_freq_max,var:core_freq_max,avg:core_freq_median,var:core_freq_median,avg:core_freq_min,var:core_freq_min,avg:cpu_aidle,var:cpu_aidle,avg:cpu_idle,var:cpu_idle,avg:cpu_nice,var:cpu_nice,avg:cpu_num,var:cpu_num,avg:cpu_speed,var:cpu_speed,avg:cpu_steal,var:cpu_steal,avg:cpu_system,var:cpu_system,avg:cpu_user,var:cpu_user,avg:cpu_wio,var:cpu_wio,avg:disk_free,var:disk_free,avg:disk_total,var:disk_total,avg:load_fifteen,var:load_fifteen,avg:load_five,var:load_five,avg:load_one,var:load_one,avg:mem_buffers,var:mem_buffers,avg:mem_cached,var:mem_cached,avg:mem_free,var:mem_free,avg:mem_shared,var:mem_shared,avg:mem_total,var:mem_total,avg:part_max_used,var:part_max_used,avg:pkts_in,var:pkts_in,avg:pkts_out,var:pkts_out,avg:proc_run,var:proc_run,avg:proc_total,var:proc_total,avg:swap_free,var:swap_free,avg:swap_total,var:swap_total


In [10]:
ganglia_data.iloc[0]['timestamp']

Timestamp('2019-10-03 10:05:00+0200', tz='Europe/Rome')

##### Manual extraction:

In [11]:
raw_data = fe.extract_data_from_examon(sq=sq, plugin_name="ganglia_pub", node=node, t_start=t_start, t_stop=t_stop)
show(raw_data, scrollX=True)

timestamp,name,value


In [12]:
raw_data = du.remove_microsecods_and_align_to_5_second(raw_data)
show(raw_data, scrollX=True)

timestamp,name,value


In [13]:
from library import compact as ct
compact_data = ct.compact_features(raw_data)
show(compact_data, scrollX=True)

timestamp,boottime,bytes_in,bytes_out,core_freq_avg,core_freq_max,core_freq_median,core_freq_min,cpu_aidle,cpu_idle,cpu_nice,cpu_num,cpu_speed,cpu_steal,cpu_system,cpu_user,cpu_wio,disk_free,disk_total,gexec,load_fifteen,load_five,load_one,machine_type,mem_buffers,mem_cached,mem_free,mem_shared,mem_total,os_name,os_release,part_max_used,pkts_in,pkts_out,proc_run,proc_total,swap_free,swap_total


In [14]:
import pandas as pd
compact_data = compact_data.where(pd.notnull(compact_data), None) # substitute np.nan with None
show(compact_data, scrollX=True)

timestamp,boottime,bytes_in,bytes_out,core_freq_avg,core_freq_max,core_freq_median,core_freq_min,cpu_aidle,cpu_idle,cpu_nice,cpu_num,cpu_speed,cpu_steal,cpu_system,cpu_user,cpu_wio,disk_free,disk_total,gexec,load_fifteen,load_five,load_one,machine_type,mem_buffers,mem_cached,mem_free,mem_shared,mem_total,os_name,os_release,part_max_used,pkts_in,pkts_out,proc_run,proc_total,swap_free,swap_total


In [15]:
filled_data = fe.fill_all_none_values(compact_data)
show(filled_data, scrollX=True)

timestamp,boottime,bytes_in,bytes_out,core_freq_avg,core_freq_max,core_freq_median,core_freq_min,cpu_aidle,cpu_idle,cpu_nice,cpu_num,cpu_speed,cpu_steal,cpu_system,cpu_user,cpu_wio,disk_free,disk_total,gexec,load_fifteen,load_five,load_one,machine_type,mem_buffers,mem_cached,mem_free,mem_shared,mem_total,os_name,os_release,part_max_used,pkts_in,pkts_out,proc_run,proc_total,swap_free,swap_total


In [16]:
from library import split_features as sf
numerical_data = sf.get_numerical_features(filled_data)
show(numerical_data, scrollX=True)

timestamp,boottime,bytes_in,bytes_out,core_freq_avg,core_freq_max,core_freq_median,core_freq_min,cpu_aidle,cpu_idle,cpu_nice,cpu_num,cpu_speed,cpu_steal,cpu_system,cpu_user,cpu_wio,disk_free,disk_total,load_fifteen,load_five,load_one,mem_buffers,mem_cached,mem_free,mem_shared,mem_total,part_max_used,pkts_in,pkts_out,proc_run,proc_total,swap_free,swap_total


In [17]:
categorical_data = sf.get_categorical_features(filled_data)
show(categorical_data, scrollX=True)

timestamp,gexec,machine_type,os_name,os_release


In [18]:
categorical_parsed_data = fe.parse_categorical_data(categorical_data.dropna())
show(categorical_parsed_data, scrollX=True)

timestamp


In [19]:
numerical_parsed_data = fe.parse_numerical_data(numerical_data.dropna())
show(numerical_parsed_data, scrollX=True)

timestamp,avg:boottime,var:boottime,avg:bytes_in,var:bytes_in,avg:bytes_out,var:bytes_out,avg:core_freq_avg,var:core_freq_avg,avg:core_freq_max,var:core_freq_max,avg:core_freq_median,var:core_freq_median,avg:core_freq_min,var:core_freq_min,avg:cpu_aidle,var:cpu_aidle,avg:cpu_idle,var:cpu_idle,avg:cpu_nice,var:cpu_nice,avg:cpu_num,var:cpu_num,avg:cpu_speed,var:cpu_speed,avg:cpu_steal,var:cpu_steal,avg:cpu_system,var:cpu_system,avg:cpu_user,var:cpu_user,avg:cpu_wio,var:cpu_wio,avg:disk_free,var:disk_free,avg:disk_total,var:disk_total,avg:load_fifteen,var:load_fifteen,avg:load_five,var:load_five,avg:load_one,var:load_one,avg:mem_buffers,var:mem_buffers,avg:mem_cached,var:mem_cached,avg:mem_free,var:mem_free,avg:mem_shared,var:mem_shared,avg:mem_total,var:mem_total,avg:part_max_used,var:part_max_used,avg:pkts_in,var:pkts_in,avg:pkts_out,var:pkts_out,avg:proc_run,var:proc_run,avg:proc_total,var:proc_total,avg:swap_free,var:swap_free,avg:swap_total,var:swap_total


#### Confluent

In [20]:
confluent_data = fe.extract_data_from_plugin(sq=sq, plugin_name='confluent_pub', node=node, t_start=t_start, t_stop=t_stop)

In [21]:
show(confluent_data, scrollX=True)

timestamp,avg:Ambient_Temp,var:Ambient_Temp,avg:CMOS_Battery,var:CMOS_Battery,avg:CPU_1_DTS,var:CPU_1_DTS,avg:CPU_1_Temp,var:CPU_1_Temp,avg:CPU_2_DTS,var:CPU_2_DTS,avg:CPU_2_Temp,var:CPU_2_Temp,avg:CPU_Power,var:CPU_Power,avg:CPU_Utilization,var:CPU_Utilization,avg:DC_Energy,var:DC_Energy,avg:Exhaust_Temp,var:Exhaust_Temp,avg:Fan1A_Tach,var:Fan1A_Tach,avg:Fan1B_Tach,var:Fan1B_Tach,avg:Fan2A_Tach,var:Fan2A_Tach,avg:Fan2B_Tach,var:Fan2B_Tach,avg:Fan3A_Tach,var:Fan3A_Tach,avg:Fan3B_Tach,var:Fan3B_Tach,avg:Fan4A_Tach,var:Fan4A_Tach,avg:Fan4B_Tach,var:Fan4B_Tach,avg:Fan5A_Tach,var:Fan5A_Tach,avg:Fan5B_Tach,var:Fan5B_Tach,avg:IO_Utilization,var:IO_Utilization,avg:Mem_Power,var:Mem_Power,avg:Mem_Utilization,var:Mem_Utilization,avg:SysBrd_12V,var:SysBrd_12V,avg:SysBrd_3_3V,var:SysBrd_3_3V,avg:SysBrd_5V,var:SysBrd_5V,avg:Sys_Power,var:Sys_Power,avg:Sys_Utilization,var:Sys_Utilization,avg:System_Air_Flow,var:System_Air_Flow


In [22]:
confluent_data.iloc[0]['timestamp']

Timestamp('2019-10-03 10:05:00+0200', tz='Europe/Rome')

## 2) Data merging

In [23]:
import pandas as pd

In [24]:
def merge_data_on_timestamp(df_list):
    dfs = []
    for df in df_list:
        dfs.append(df.set_index('timestamp'))
    
    if(len(dfs) == 0):
        return pd.DataFrame(data=None, columns=[])
    else:      
        merged_data = dfs.pop(0)
        if(len(dfs) > 0):
            merged_data = merged_data.join(dfs, how='outer')
        return merged_data

In [25]:
merged_data = merge_data_on_timestamp([confluent_data, ganglia_data, nagios_data])

In [26]:
show(merged_data, scrollX=True)

Unnamed: 0_level_0,avg:Ambient_Temp,var:Ambient_Temp,avg:CMOS_Battery,var:CMOS_Battery,avg:CPU_1_DTS,var:CPU_1_DTS,avg:CPU_1_Temp,var:CPU_1_Temp,avg:CPU_2_DTS,var:CPU_2_DTS,avg:CPU_2_Temp,var:CPU_2_Temp,avg:CPU_Power,var:CPU_Power,avg:CPU_Utilization,var:CPU_Utilization,avg:DC_Energy,var:DC_Energy,avg:Exhaust_Temp,var:Exhaust_Temp,avg:Fan1A_Tach,var:Fan1A_Tach,avg:Fan1B_Tach,var:Fan1B_Tach,avg:Fan2A_Tach,var:Fan2A_Tach,avg:Fan2B_Tach,var:Fan2B_Tach,avg:Fan3A_Tach,var:Fan3A_Tach,avg:Fan3B_Tach,var:Fan3B_Tach,avg:Fan4A_Tach,var:Fan4A_Tach,avg:Fan4B_Tach,var:Fan4B_Tach,avg:Fan5A_Tach,var:Fan5A_Tach,avg:Fan5B_Tach,var:Fan5B_Tach,avg:IO_Utilization,var:IO_Utilization,avg:Mem_Power,var:Mem_Power,avg:Mem_Utilization,var:Mem_Utilization,avg:SysBrd_12V,var:SysBrd_12V,avg:SysBrd_3_3V,var:SysBrd_3_3V,avg:SysBrd_5V,var:SysBrd_5V,avg:Sys_Power,var:Sys_Power,avg:Sys_Utilization,var:Sys_Utilization,avg:System_Air_Flow,var:System_Air_Flow,avg:boottime,var:boottime,avg:bytes_in,var:bytes_in,avg:bytes_out,var:bytes_out,avg:core_freq_avg,var:core_freq_avg,avg:core_freq_max,var:core_freq_max,avg:core_freq_median,var:core_freq_median,avg:core_freq_min,var:core_freq_min,avg:cpu_aidle,var:cpu_aidle,avg:cpu_idle,var:cpu_idle,avg:cpu_nice,var:cpu_nice,avg:cpu_num,var:cpu_num,avg:cpu_speed,var:cpu_speed,avg:cpu_steal,var:cpu_steal,avg:cpu_system,var:cpu_system,avg:cpu_user,var:cpu_user,avg:cpu_wio,var:cpu_wio,avg:disk_free,var:disk_free,avg:disk_total,var:disk_total,avg:load_fifteen,var:load_fifteen,avg:load_five,var:load_five,avg:load_one,var:load_one,avg:mem_buffers,var:mem_buffers,avg:mem_cached,var:mem_cached,avg:mem_free,var:mem_free,avg:mem_shared,var:mem_shared,avg:mem_total,var:mem_total,avg:part_max_used,var:part_max_used,avg:pkts_in,var:pkts_in,avg:pkts_out,var:pkts_out,avg:proc_run,var:proc_run,avg:proc_total,var:proc_total,avg:swap_free,var:swap_free,avg:swap_total,var:swap_total,label
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1,Unnamed: 102_level_1,Unnamed: 103_level_1,Unnamed: 104_level_1,Unnamed: 105_level_1,Unnamed: 106_level_1,Unnamed: 107_level_1,Unnamed: 108_level_1,Unnamed: 109_level_1,Unnamed: 110_level_1,Unnamed: 111_level_1,Unnamed: 112_level_1,Unnamed: 113_level_1,Unnamed: 114_level_1,Unnamed: 115_level_1,Unnamed: 116_level_1,Unnamed: 117_level_1,Unnamed: 118_level_1,Unnamed: 119_level_1,Unnamed: 120_level_1,Unnamed: 121_level_1,Unnamed: 122_level_1,Unnamed: 123_level_1,Unnamed: 124_level_1,Unnamed: 125_level_1


In [27]:
merged_data = merged_data.dropna()

In [28]:
show(merged_data, scrollX=True)

Unnamed: 0_level_0,avg:Ambient_Temp,var:Ambient_Temp,avg:CMOS_Battery,var:CMOS_Battery,avg:CPU_1_DTS,var:CPU_1_DTS,avg:CPU_1_Temp,var:CPU_1_Temp,avg:CPU_2_DTS,var:CPU_2_DTS,avg:CPU_2_Temp,var:CPU_2_Temp,avg:CPU_Power,var:CPU_Power,avg:CPU_Utilization,var:CPU_Utilization,avg:DC_Energy,var:DC_Energy,avg:Exhaust_Temp,var:Exhaust_Temp,avg:Fan1A_Tach,var:Fan1A_Tach,avg:Fan1B_Tach,var:Fan1B_Tach,avg:Fan2A_Tach,var:Fan2A_Tach,avg:Fan2B_Tach,var:Fan2B_Tach,avg:Fan3A_Tach,var:Fan3A_Tach,avg:Fan3B_Tach,var:Fan3B_Tach,avg:Fan4A_Tach,var:Fan4A_Tach,avg:Fan4B_Tach,var:Fan4B_Tach,avg:Fan5A_Tach,var:Fan5A_Tach,avg:Fan5B_Tach,var:Fan5B_Tach,avg:IO_Utilization,var:IO_Utilization,avg:Mem_Power,var:Mem_Power,avg:Mem_Utilization,var:Mem_Utilization,avg:SysBrd_12V,var:SysBrd_12V,avg:SysBrd_3_3V,var:SysBrd_3_3V,avg:SysBrd_5V,var:SysBrd_5V,avg:Sys_Power,var:Sys_Power,avg:Sys_Utilization,var:Sys_Utilization,avg:System_Air_Flow,var:System_Air_Flow,avg:boottime,var:boottime,avg:bytes_in,var:bytes_in,avg:bytes_out,var:bytes_out,avg:core_freq_avg,var:core_freq_avg,avg:core_freq_max,var:core_freq_max,avg:core_freq_median,var:core_freq_median,avg:core_freq_min,var:core_freq_min,avg:cpu_aidle,var:cpu_aidle,avg:cpu_idle,var:cpu_idle,avg:cpu_nice,var:cpu_nice,avg:cpu_num,var:cpu_num,avg:cpu_speed,var:cpu_speed,avg:cpu_steal,var:cpu_steal,avg:cpu_system,var:cpu_system,avg:cpu_user,var:cpu_user,avg:cpu_wio,var:cpu_wio,avg:disk_free,var:disk_free,avg:disk_total,var:disk_total,avg:load_fifteen,var:load_fifteen,avg:load_five,var:load_five,avg:load_one,var:load_one,avg:mem_buffers,var:mem_buffers,avg:mem_cached,var:mem_cached,avg:mem_free,var:mem_free,avg:mem_shared,var:mem_shared,avg:mem_total,var:mem_total,avg:part_max_used,var:part_max_used,avg:pkts_in,var:pkts_in,avg:pkts_out,var:pkts_out,avg:proc_run,var:proc_run,avg:proc_total,var:proc_total,avg:swap_free,var:swap_free,avg:swap_total,var:swap_total,label
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1,Unnamed: 102_level_1,Unnamed: 103_level_1,Unnamed: 104_level_1,Unnamed: 105_level_1,Unnamed: 106_level_1,Unnamed: 107_level_1,Unnamed: 108_level_1,Unnamed: 109_level_1,Unnamed: 110_level_1,Unnamed: 111_level_1,Unnamed: 112_level_1,Unnamed: 113_level_1,Unnamed: 114_level_1,Unnamed: 115_level_1,Unnamed: 116_level_1,Unnamed: 117_level_1,Unnamed: 118_level_1,Unnamed: 119_level_1,Unnamed: 120_level_1,Unnamed: 121_level_1,Unnamed: 122_level_1,Unnamed: 123_level_1,Unnamed: 124_level_1,Unnamed: 125_level_1


# Full preprocessing:

In [1]:
from examon.examon import ExamonQL
import pandas as pd
from itables import show
import itables.options as opt
opt.maxBytes = 0

# local library
from library import examon_utils as eu
from library import features_extraction as fe
from library import normalization_utils as nu
from library import nagios_sampling as ns

<IPython.core.display.Javascript object>

In [2]:
sq = eu.create_examon_connection()

<urllib.request.Request object at 0x7f0db06c2790>


In [4]:
node='r183c09s04'
t_start='31-10-2019 00:00:00'
t_stop='01-11-2019 00:00:00'

In [5]:
data = fe.extract_data_from_examon_plugins_with_nagios(sq=sq, node=node, t_start=t_start, t_stop=t_stop)
data

Unnamed: 0,timestamp,avg:boottime,var:boottime,avg:bytes_in,var:bytes_in,avg:bytes_out,var:bytes_out,avg:core_freq_avg,var:core_freq_avg,avg:core_freq_max,...,PCIe_3_Temp_0,PCIe_5_Temp_0,PCIe_6_Temp_0,Phy_Presence_Jmp_0,Power_Supply_1_0,Power_Supply_2_0,SysBrd_Vol_Fault_0,avg:state,var:state,plugin_output_0
0,2019-10-31 00:15:00+01:00,1.564061e+09,0.0,333.798667,1.465262e+03,1587.248000,2.683795e+05,1635.716667,1263.136389,2092.000000,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0
1,2019-10-31 00:30:00+01:00,1.564061e+09,0.0,1195.556167,1.930579e+06,1311.924833,2.469323e+06,1530.133333,116.615556,1959.783333,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0
2,2019-10-31 00:45:00+01:00,1.564061e+09,0.0,412.988500,8.441209e+04,110.869333,1.880861e+01,1536.233333,14.578889,2011.583333,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0
3,2019-10-31 01:00:00+01:00,1.564061e+09,0.0,1535.330000,1.255135e+06,1241.385000,7.857685e+05,1529.533333,31.382222,1948.533333,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0
4,2019-10-31 01:15:00+01:00,1.564061e+09,0.0,4418.228333,4.143857e+06,3107.066667,3.674392e+06,1711.516667,36248.383056,2053.516667,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
90,2019-10-31 22:45:00+01:00,1.564061e+09,0.0,1061.129000,1.322894e+06,2591.632000,4.100615e+06,2015.016667,4959.716389,2093.500000,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0
91,2019-10-31 23:00:00+01:00,1.564061e+09,0.0,277.334500,3.447865e+02,1394.484500,1.564605e+03,2016.133333,3290.348889,2094.100000,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0
92,2019-10-31 23:15:00+01:00,1.564061e+09,0.0,1323.658500,2.049835e+06,2731.052500,1.695473e+06,2019.900000,3797.990000,2099.800000,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0
93,2019-10-31 23:30:00+01:00,1.564061e+09,0.0,305.170500,1.139409e+03,1924.076500,9.191227e-01,1995.683333,5816.383056,2094.833333,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0


In [6]:
data_2 = fe.extract_data_from_examon_plugins(sq=sq, node=node, t_start=t_start, t_stop=t_stop)
data_2

Unnamed: 0,timestamp,avg:boottime,var:boottime,avg:bytes_in,var:bytes_in,avg:bytes_out,var:bytes_out,avg:core_freq_avg,var:core_freq_avg,avg:core_freq_max,...,M2_Temp_0,PCH_Overtemp_0,PCIe_1_Temp_0,PCIe_3_Temp_0,PCIe_5_Temp_0,PCIe_6_Temp_0,Phy_Presence_Jmp_0,Power_Supply_1_0,Power_Supply_2_0,SysBrd_Vol_Fault_0
0,2019-10-31 00:05:00+01:00,1.564061e+09,0.0,2033.392000,2.014880e+06,2293.104000,2.212938e+06,2100.000000,0.000000,2100.000000,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
1,2019-10-31 00:10:00+01:00,1.564061e+09,0.0,1476.563333,3.480907e+06,877.809000,2.616600e+05,1941.683333,15474.183056,2099.500000,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
2,2019-10-31 00:15:00+01:00,1.564061e+09,0.0,333.798667,1.465262e+03,1587.248000,2.683795e+05,1635.716667,1263.136389,2092.000000,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
3,2019-10-31 00:20:00+01:00,1.564061e+09,0.0,159.542833,7.642840e+03,123.599167,2.342662e+03,1551.250000,181.387500,2099.483333,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
4,2019-10-31 00:25:00+01:00,1.564061e+09,0.0,68.367167,6.247730e+02,66.338500,2.741943e+01,1519.966667,417.798889,1797.250000,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
281,2019-10-31 23:30:00+01:00,1.564061e+09,0.0,305.170500,1.139409e+03,1924.076500,9.191227e-01,1995.683333,5816.383056,2094.833333,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
282,2019-10-31 23:35:00+01:00,1.564061e+09,0.0,1002.844667,2.698292e+06,2078.734167,2.385257e+05,2006.433333,3041.312222,2099.600000,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
283,2019-10-31 23:40:00+01:00,1.564061e+09,0.0,1193.291667,9.028864e+04,1884.990833,2.350849e+04,2008.966667,3201.498889,2099.700000,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
284,2019-10-31 23:45:00+01:00,1.564061e+09,0.0,209.925833,1.921241e+03,1439.029167,4.305014e+04,1993.750000,4065.520833,2099.000000,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [None]:
data.copy().transpose()

### Adding Nagios labels:

In [5]:
labels = ns.extract_data_from_nagios(sq=sq, node=node, t_start=t_start, t_stop=t_stop)
labels[labels.label ==1]

Unnamed: 0,timestamp,label
0,2019-10-31 00:00:00+01:00,1
1,2019-10-31 00:05:00+01:00,1
2,2019-10-31 00:10:00+01:00,1
3,2019-10-31 00:15:00+01:00,1
4,2019-10-31 00:20:00+01:00,1
...,...,...
3161,2019-11-10 23:25:00+01:00,1
3162,2019-11-10 23:30:00+01:00,1
3163,2019-11-10 23:35:00+01:00,1
3164,2019-11-10 23:40:00+01:00,1


In [6]:
final_data = fe.merge_data_on_timestamp([data.reset_index(), labels])
final_data

Unnamed: 0,timestamp,index,avg:boottime,var:boottime,avg:bytes_in,var:bytes_in,avg:bytes_out,var:bytes_out,avg:core_freq_avg,var:core_freq_avg,...,PCH_Overtemp_0,PCIe_1_Temp_0,PCIe_3_Temp_0,PCIe_5_Temp_0,PCIe_6_Temp_0,Phy_Presence_Jmp_0,Power_Supply_1_0,Power_Supply_2_0,SysBrd_Vol_Fault_0,label
0,2019-10-31 00:05:00+01:00,0.0,1.572431e+09,0.0,559.573500,3.367660e+05,275.283500,1.649554e+02,1532.400000,53.240000,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1
1,2019-10-31 00:10:00+01:00,1.0,1.572431e+09,0.0,2944.783500,3.957726e+05,215.238500,3.134199e+02,1517.466667,16.848889,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1
2,2019-10-31 00:15:00+01:00,2.0,1.572431e+09,0.0,229.028000,1.329044e+04,148.443000,2.044383e+03,1511.900000,234.290000,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1
3,2019-10-31 00:20:00+01:00,3.0,1.572431e+09,0.0,730.012000,6.770874e+01,341.882000,3.859292e+02,1529.233333,39.812222,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1
4,2019-10-31 00:25:00+01:00,4.0,1.572431e+09,0.0,740.062333,8.570264e+04,247.279333,3.630834e+03,1518.216667,262.703056,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3077,2019-11-10 23:10:00+01:00,3077.0,1.572431e+09,0.0,478.135333,1.959661e+02,409.831333,1.367479e+02,1508.116667,45.636389,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1
3078,2019-11-10 23:15:00+01:00,3078.0,1.572431e+09,0.0,384.664667,9.936649e+03,312.423333,1.027163e+04,1523.700000,30.210000,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1
3079,2019-11-10 23:20:00+01:00,3079.0,1.572431e+09,0.0,308.640500,2.774136e+02,243.587667,6.308152e+02,1515.500000,17.650000,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1
3080,2019-11-10 23:25:00+01:00,3080.0,1.572431e+09,0.0,641.471833,9.384093e+04,546.373667,7.255493e+04,1532.950000,227.847500,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1


## Saving data on csv

In [7]:

final_data.to_csv(path_or_buf="final_data_"+node+"_2.csv", index=False)

In [37]:
final_data[final_data.label==1]

Unnamed: 0,timestamp,index,avg:boottime,var:boottime,avg:bytes_in,var:bytes_in,avg:bytes_out,var:bytes_out,avg:core_freq_avg,var:core_freq_avg,...,PCH_Overtemp_0,PCIe_1_Temp_0,PCIe_3_Temp_0,PCIe_5_Temp_0,PCIe_6_Temp_0,Phy_Presence_Jmp_0,Power_Supply_1_0,Power_Supply_2_0,SysBrd_Vol_Fault_0,label
0,2019-10-31 00:05:00+01:00,0.0,1.572431e+09,0.0,527.640667,2.269334e+05,355.331000,5.611848e+02,1546.600000,61.640000,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1
1,2019-10-31 00:10:00+01:00,1.0,1.572431e+09,0.0,2995.975333,2.729998e+05,223.492000,5.282589e+02,1520.533333,89.415556,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1
2,2019-10-31 00:15:00+01:00,2.0,1.572431e+09,0.0,191.643500,5.194954e+03,105.164000,1.748591e+03,1518.783333,125.369722,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1
3,2019-10-31 00:20:00+01:00,3.0,1.572431e+09,0.0,741.053667,1.048871e+03,423.304500,5.508287e+02,1526.750000,61.987500,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1
4,2019-10-31 00:25:00+01:00,4.0,1.572431e+09,0.0,522.085000,5.237678e+04,247.273167,1.062263e+03,1531.600000,265.040000,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1
5,2019-10-31 00:30:00+01:00,5.0,1.572431e+09,0.0,2251.713333,4.717462e+04,492.520500,1.534289e+03,1524.183333,189.949722,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1
6,2019-10-31 00:35:00+01:00,6.0,1.572431e+09,0.0,582.489667,4.782541e+01,191.899833,3.682585e+00,1511.000000,133.200000,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1
7,2019-10-31 00:40:00+01:00,7.0,1.572431e+09,0.0,545.342000,1.505118e+04,206.540833,5.802076e-01,1521.550000,37.647500,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1
8,2019-10-31 00:45:00+01:00,8.0,1.572431e+09,0.0,1466.126167,2.743494e+04,201.439500,3.314326e+01,1545.050000,322.347500,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1
9,2019-10-31 00:50:00+01:00,9.0,1.572431e+09,0.0,413.680667,2.850946e+06,440.763667,2.245861e+06,1538.050000,152.247500,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1


## Reading data from csv

In [17]:
nd = pd.read_csv('normalized_data.csv')
nd

Unnamed: 0,timestamp,avg:boottime,var:boottime,avg:bytes_in,var:bytes_in,avg:bytes_out,var:bytes_out,avg:core_freq_avg,var:core_freq_avg,avg:core_freq_max,...,avg:SysBrd_3_3V,var:SysBrd_3_3V,avg:SysBrd_5V,var:SysBrd_5V,avg:Sys_Power,var:Sys_Power,avg:Sys_Utilization,var:Sys_Utilization,avg:System_Air_Flow,var:System_Air_Flow
0,2019-10-18 10:05:00+02:00,0.0,0.0,0.185306,-0.20972,0.184088,-0.471735,-0.486547,-0.480059,0.316479,...,0.0,1.0,1.0,0.0,0.404032,-0.689081,-0.728272,-0.561649,1.941263,0.485071
1,2019-10-18 10:10:00+02:00,0.0,0.0,0.049687,-0.198722,0.538699,-0.204292,-0.740388,-0.565586,-0.377832,...,0.0,1.0,1.0,0.0,1.327532,-0.360875,-0.907846,-0.568028,2.269364,-0.606339
2,2019-10-18 10:15:00+02:00,0.0,0.0,-0.231193,-0.209874,0.67526,-0.154556,-0.57484,-0.71799,0.536109,...,-1.414214,-1.0,1.0,0.0,0.711865,-0.470277,-0.668414,-0.563775,2.269364,-0.606339
3,2019-10-18 10:20:00+02:00,0.0,0.0,-0.264144,-0.209976,-0.188985,-0.483304,-1.283019,0.134885,0.634115,...,-1.414214,-1.0,1.0,0.0,0.404032,-0.689081,-1.08742,-0.551015,2.159997,0.121268
4,2019-10-18 10:25:00+02:00,0.0,0.0,-0.263982,-0.209955,-0.55496,-0.366118,-0.298926,-1.053882,-0.321153,...,0.0,1.0,1.0,0.0,-0.827303,0.623744,1.666048,1.735098,1.722529,-0.606339
5,2019-10-18 10:30:00+02:00,0.0,0.0,-0.272381,-0.209988,-1.359322,-0.420543,2.184299,3.219612,2.201039,...,0.0,1.0,1.0,0.0,-0.827303,0.623744,0.768178,3.106767,1.722529,-0.606339
6,2019-10-18 10:35:00+02:00,0.0,0.0,-0.275486,-0.209988,-0.25981,-0.483158,-0.697161,-0.311037,-0.597461,...,-1.414214,-1.0,1.0,0.0,-0.827303,-0.470277,-1.147278,-0.499977,1.722529,-0.606339
7,2019-10-18 10:40:00+02:00,0.0,0.0,-0.27236,-0.209988,-0.374842,-0.461328,-0.47919,-0.582639,-0.482923,...,-1.414214,-1.0,1.0,0.0,-0.416858,0.866859,-0.78813,-0.551015,0.410126,2.304088
8,2019-10-18 10:45:00+02:00,0.0,0.0,-0.275994,-0.209988,-0.978813,-0.467335,-0.588635,-0.323483,-0.386098,...,0.0,1.0,1.0,0.0,-2.058637,1.64483,-0.428982,-0.561649,-0.246076,0.485071
9,2019-10-18 10:50:00+02:00,0.0,0.0,-0.272478,-0.209988,-0.404599,-0.481788,-0.662212,-0.561492,0.223196,...,0.0,1.0,1.0,0.0,0.404032,-0.689081,-0.608556,-0.568028,0.082025,-0.606339


In [18]:
fd = pd.read_csv('final_data.csv')
fd

Unnamed: 0.1,Unnamed: 0,timestamp,avg:boottime,var:boottime,avg:bytes_in,var:bytes_in,avg:bytes_out,var:bytes_out,avg:core_freq_avg,var:core_freq_avg,...,var:SysBrd_3_3V,avg:SysBrd_5V,var:SysBrd_5V,avg:Sys_Power,var:Sys_Power,avg:Sys_Utilization,var:Sys_Utilization,avg:System_Air_Flow,var:System_Air_Flow,label
0,1,2019-10-18 10:05:00+02:00,0.0,0.0,0.185306,-0.20972,0.184088,-0.471735,-0.486547,-0.480059,...,1.0,1.0,0.0,0.404032,-0.689081,-0.728272,-0.561649,1.941263,0.485071,0
1,2,2019-10-18 10:10:00+02:00,0.0,0.0,0.049687,-0.198722,0.538699,-0.204292,-0.740388,-0.565586,...,1.0,1.0,0.0,1.327532,-0.360875,-0.907846,-0.568028,2.269364,-0.606339,0
2,3,2019-10-18 10:15:00+02:00,0.0,0.0,-0.231193,-0.209874,0.67526,-0.154556,-0.57484,-0.71799,...,-1.0,1.0,0.0,0.711865,-0.470277,-0.668414,-0.563775,2.269364,-0.606339,0
3,4,2019-10-18 10:20:00+02:00,0.0,0.0,-0.264144,-0.209976,-0.188985,-0.483304,-1.283019,0.134885,...,-1.0,1.0,0.0,0.404032,-0.689081,-1.08742,-0.551015,2.159997,0.121268,0
4,5,2019-10-18 10:25:00+02:00,0.0,0.0,-0.263982,-0.209955,-0.55496,-0.366118,-0.298926,-1.053882,...,1.0,1.0,0.0,-0.827303,0.623744,1.666048,1.735098,1.722529,-0.606339,0
5,6,2019-10-18 10:30:00+02:00,0.0,0.0,-0.272381,-0.209988,-1.359322,-0.420543,2.184299,3.219612,...,1.0,1.0,0.0,-0.827303,0.623744,0.768178,3.106767,1.722529,-0.606339,0
6,7,2019-10-18 10:35:00+02:00,0.0,0.0,-0.275486,-0.209988,-0.25981,-0.483158,-0.697161,-0.311037,...,-1.0,1.0,0.0,-0.827303,-0.470277,-1.147278,-0.499977,1.722529,-0.606339,0
7,8,2019-10-18 10:40:00+02:00,0.0,0.0,-0.27236,-0.209988,-0.374842,-0.461328,-0.47919,-0.582639,...,-1.0,1.0,0.0,-0.416858,0.866859,-0.78813,-0.551015,0.410126,2.304088,0
8,9,2019-10-18 10:45:00+02:00,0.0,0.0,-0.275994,-0.209988,-0.978813,-0.467335,-0.588635,-0.323483,...,1.0,1.0,0.0,-2.058637,1.64483,-0.428982,-0.561649,-0.246076,0.485071,0
9,10,2019-10-18 10:50:00+02:00,0.0,0.0,-0.272478,-0.209988,-0.404599,-0.481788,-0.662212,-0.561492,...,1.0,1.0,0.0,0.404032,-0.689081,-0.608556,-0.568028,0.082025,-0.606339,0
