In [None]:
import os
os.chdir("../")

from detectmatelibrary.detectors.new_value_combo_detector import NewValueComboDetector, schemas
from detectmatelibrary.parsers.template_matcher import MatcherParser
from detectmatelibrary.readers.log_file import LogFileReader

from detectmatelibrary.common.persistency.event_data_structures.trackers import (
    EventVariableTracker, StabilityTracker
)
from detectmatelibrary.common.persistency.event_data_structures.dataframes import (
    EventDataFrame, ChunkedEventDataFrame
)
from detectmatelibrary.common.persistency.event_persistency import EventPersistency

import logging
logging.getLogger().setLevel(logging.ERROR)  # Only show errors

In [None]:
import yaml


with open("config/pipeline_config_default.yaml", 'r') as f:
    config = yaml.safe_load(f)

In [None]:
reader = LogFileReader(config=config)
parser = MatcherParser(config=config)
detector = NewValueComboDetector(config=config)

persistency1 = EventPersistency(
    event_data_class=EventVariableTracker,
    event_data_kwargs={"tracker_type": StabilityTracker},
)

persistency2 = EventPersistency(
    event_data_class=EventDataFrame,
)

persistency3 = EventPersistency(
    event_data_class=ChunkedEventDataFrame,
)

In [None]:
for i in range(1000):
    log = reader.process(as_bytes=False)
    parsed_log = parser.process(log)

    persistency1.ingest_event(
        event_id=parsed_log['EventID'],
        event_template=parsed_log['template'],
        variables=parsed_log['variables'],
        log_format_variables=parsed_log['logFormatVariables'],
    )
    persistency2.ingest_event(
        event_id=parsed_log['EventID'],
        event_template=parsed_log['template'],
        variables=parsed_log['variables'],
        log_format_variables=parsed_log['logFormatVariables'],
    )
    persistency3.ingest_event(
        event_id=parsed_log['EventID'],
        event_template=parsed_log['template'],
        variables=parsed_log['variables'],
        log_format_variables=parsed_log['logFormatVariables'],
    )

In [None]:
persistency1.get_events_data()

{0: EventVariableTracker(data={
 	Time: StabilityTracker(classification=Classification(type='RANDOM', reason='Unique set size equals number of samples (642)'), change_series=[1, 1, 1, '...', 1, 1, 1], unique_set={(1642820941.151:1057), (1642847941.596:1245), (1642785421.190:843), ..., (1642833541.196:1147), (1642799821.579:931), (1642822741.217:1074)}, RLE=[(True, 642)])
 	Type: StabilityTracker(classification=Classification(type='STABLE', reason='Segment means of change series [0.03125, 0.0, 0.0, 0.0] are below segment thresholds: [1.1, 0.3, 0.1, 0.01]'), change_series=[1, 1, 1, '...', 0, 0, 0], unique_set={USER_START, CRED_DISP, USER_ACCT, USER_END, CRED_ACQ}, RLE=[(True, 5), (False, 637)])
 	var_0: StabilityTracker(classification=Classification(type='UNSTABLE', reason='No classification matched; variable is unstable'), change_series=[1, 0, 0, '...', 0, 0, 0], unique_set={16092, 14930, 15774, ..., 10883, 13737, 19301}, RLE=[(True, 1), (False, 4), (True, 1), '...', (False, 4), (True, 

In [None]:
persistency2.get_event_data(0)

Unnamed: 0,Time,Type,var_0,var_1,var_2,var_3,var_4,var_5,var_6,var_7,var_8,var_9,var_10
0,(1642723741.072:375),USER_ACCT,10125,0,4294967295,4294967295,PAM:accounting,"""root""","""/usr/sbin/cron""",?,?,cron,success
1,(1642723741.072:376),CRED_ACQ,10125,0,4294967295,4294967295,PAM:setcred,"""root""","""/usr/sbin/cron""",?,?,cron,success
2,(1642723741.080:378),USER_START,10125,0,0,65,PAM:session_open,"""root""","""/usr/sbin/cron""",?,?,cron,success
3,(1642723741.084:379),CRED_DISP,10125,0,0,65,PAM:setcred,"""root""","""/usr/sbin/cron""",?,?,cron,success
4,(1642723741.084:380),USER_END,10125,0,0,65,PAM:session_close,"""root""","""/usr/sbin/cron""",?,?,cron,success
...,...,...,...,...,...,...,...,...,...,...,...,...,...
637,(1642865941.046:1359),USER_END,20191,0,0,192,PAM:session_close,"""root""","""/usr/sbin/cron""",?,?,cron,success
638,(1642867741.073:1362),USER_ACCT,20264,0,4294967295,4294967295,PAM:accounting,"""root""","""/usr/sbin/cron""",?,?,cron,success
639,(1642867741.077:1363),CRED_ACQ,20264,0,4294967295,4294967295,PAM:setcred,"""root""","""/usr/sbin/cron""",?,?,cron,success
640,(1642867741.081:1365),USER_START,20264,0,0,193,PAM:session_open,"""root""","""/usr/sbin/cron""",?,?,cron,success


In [None]:
persistency3.get_events_data()

{0: ChunkedEventDataFrame(df=..., rows=642, chunks=642, variables=['Time', 'Type', 'var_0', 'var_1', 'var_2', 'var_3', 'var_4', 'var_5', 'var_6', 'var_7', 'var_8', 'var_9', 'var_10']),
 2: ChunkedEventDataFrame(df=..., rows=129, chunks=129, variables=['Time', 'Type', 'var_0', 'var_1', 'var_2', 'var_3', 'var_4', 'var_5', 'var_6', 'var_7']),
 1: ChunkedEventDataFrame(df=..., rows=215, chunks=215, variables=['Time', 'Type', 'var_0', 'var_1', 'var_2', 'var_3', 'var_4', 'var_5', 'var_6', 'var_7', 'var_8', 'var_9', 'var_10']),
 3: ChunkedEventDataFrame(df=..., rows=4, chunks=4, variables=['Time', 'Type', 'var_0', 'var_1', 'var_2', 'var_3', 'var_4', 'var_5', 'var_6']),
 4: ChunkedEventDataFrame(df=..., rows=4, chunks=4, variables=['Time', 'Type', 'var_0', 'var_1', 'var_2', 'var_3', 'var_4', 'var_5', 'var_6', 'var_7', 'var_8', 'var_9', 'var_10', 'var_11', 'var_12', 'var_13', 'var_14', 'var_15', 'var_16', 'var_17', 'var_18', 'var_19', 'var_20', 'var_21', 'var_22', 'var_23', 'var_24']),
 5: Chun