In [27]:
import sys
sys.path.append("../")

from config import config
from drain3 import TemplateMiner
from drain3.template_miner_config import TemplateMinerConfig
from drain3.file_persistence import FilePersistence

In [28]:
# Config drain3
persistence = FilePersistence(config.DRAIN3_FILE_PERSISTENCE)
drain_config = TemplateMinerConfig()
drain_config.load(config.DRAIN3_CONFIG)
drain_config.profiling_enabled = True
template_miner = TemplateMiner(persistence, drain_config)

## Training Drain3

In [29]:
def preprocess_logs(line):
    line = line.rstrip()
    line_split = line.split(" ")
    label = line_split[0]
    timestamp = line_split[4]
    content = " ".join(line_split[6:])
    return label, timestamp, content

In [30]:
import requests
from tqdm import tqdm

lines = []
response = requests.get(config.BGL_LOG_URL)
if response.status_code == 200:
    content = response.text
    for line in tqdm(content.splitlines()):
        lines.append(line)
        label, timestamp, content = preprocess_logs(line)
        template_miner.add_log_message(content)

100%|██████████| 2000/2000 [00:00<00:00, 17060.21it/s]


In [31]:
len(lines)

2000

## Parser logs

In [32]:
result = {
    "label": [],
    "timestamp": [],
    "id": [],
    "template": [],
    "parameters": [],
}
numline = 0
for line in tqdm(lines):
    label, timestamp, content = preprocess_logs(line)
    parsed_log = template_miner.match(content)
    if parsed_log is None:
        print(f"Failed to parse: {content}")
        continue
    numline += 1
    template = parsed_log.get_template()
    result["label"].append(label)
    result["timestamp"].append(timestamp)
    result["id"].append(parsed_log.cluster_id)
    result["template"].append(template)
    result["parameters"].append(template_miner.get_parameter_list(template, line))

print(f"Number of parsed lines: {numline}")

100%|██████████| 2000/2000 [00:00<00:00, 10608.39it/s]

Number of parsed lines: 2000





In [33]:
import pandas as pd 

template_df = pd.DataFrame(result)
template_df.head(10)

Unnamed: 0,label,timestamp,id,template,parameters
0,-,2005-06-03-15.42.50.675872,1,RAS KERNEL INFO instruction cache parity error...,[]
1,-,2005-06-03-15.42.53.276129,1,RAS KERNEL INFO instruction cache parity error...,[]
2,-,2005-06-03-15.49.36.156884,1,RAS KERNEL INFO instruction cache parity error...,[]
3,-,2005-06-03-15.49.38.026704,1,RAS KERNEL INFO instruction cache parity error...,[]
4,-,2005-06-03-16.47.20.730545,2,RAS KERNEL INFO <:*:> <:*:> <:*:> <:*:>,[]
5,-,2005-06-03-16.56.14.254137,2,RAS KERNEL INFO <:*:> <:*:> <:*:> <:*:>,[]
6,-,2005-06-03-16.56.55.309974,2,RAS KERNEL INFO <:*:> <:*:> <:*:> <:*:>,[]
7,-,2005-06-03-18.21.59.871925,3,"RAS KERNEL INFO CE sym <:NUM:>, at <:HEX:>, ma...",[]
8,APPREAD,2005-06-04-00.24.32.432192,4,RAS APP FATAL ciod: failed to read message pre...,[]
9,APPREAD,2005-06-04-00.24.36.222560,4,RAS APP FATAL ciod: failed to read message pre...,[]


In [34]:
template_df['template'][7]

'RAS KERNEL INFO CE sym <:NUM:>, at <:HEX:>, mask <:HEX:>'