Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
191 changes: 58 additions & 133 deletions .yamato/script/log_scripts/log_parser.py
Original file line number Diff line number Diff line change
@@ -1,84 +1,36 @@

import argparse
import os
import sys
import requests
import sys
import json
import glob
import re
from utils.execution_log_patterns import execution_log_patterns
from utils.utr_log_patterns import utr_log_patterns
from utils.execution_log import Execution_log
from utils.utr_log import UTR_log
from utils.unity_log import Unity_log
from utils.shared_utils import *

'''
This script runs for extended Yamato reporting. It
1. Parses the execution log and for each command and its output (except successful non-retries and failed tests)
- finds matching patterns from execution_log_patterns
- finds matching patterns from execution_log.py patterns
- for each matched pattern, recursively finds matches also for any pattern with 'redirect' specified
- sends to Yamato extended reporting server
title: command itself
summary: substring(s) of the command output matching the specified pattern(s)
tags: all (distinct) tags beloging to the matched pattern(s)
conclusion: failure/inconclusive/cancelled/success, which applies to the parsed command (not the whole job status)
2. In case non-test related failure from UTR is matched, the script also reads TestResults.json and
- finds matching patterns from utr_log_patterns
- extends the data sent to Yamato extended reporting server in the previous point, by
extending the summary with matched substrings from the utr pattern and by
appending the utr pattern tags to the list of previous tags

By default, the script requires no parameters and uses default execution log location in Yamato.
To run it locally, specify
--local
--execution-log "<path to execution log file>"
'''

def load_json(file_path):
with open(file_path) as f:
json_data = f.readlines()
json_data[0] = json_data[0].replace('let testData = ','') # strip the beginning
json_data[-1] = json_data[-1][:-1] # strip the ; from end
return json.loads(' '.join(json_data))
#return json.load(f)

def get_execution_log():
'''Returns the path to execution log file.'''
path_to_execution_log = os.path.join(os.path.dirname(os.path.dirname(os.getcwd())),'Execution-*.log')
print('Searching for logs in: ', path_to_execution_log)

execution_log_file = glob.glob(path_to_execution_log)[0]
print('Reading log: ', execution_log_file)
return execution_log_file


def read_execution_log(log_file_path):
'''Reads execution logs and returns:
logs: dictionary with keys corresponding to commands, and values containing log output and status
job_succeeded: boolean indicating if the job succeeded
'''

with open(log_file_path, encoding='utf-8') as f:
lines = [l.replace('\n','') for l in f.readlines() if l != '\n'] # remove empty lines and all newline indicators

# all log line idx starting/ending a new command
command_idxs = [i for i,line in enumerate(lines) if '################################### Running next command ###################################' in line]
command_idxs_end = [i for i,line in enumerate(lines) if '############################################################################################' in line]
command_idxs.append(len(lines)) # add dummy idx to handle the last command

# get output (list of lines) for each command
logs = {}
for i, command_idx in enumerate(command_idxs):
if command_idx == len(lines):
break
command = '\n'.join(lines[command_idx+1: command_idxs_end[i]])
output = lines[command_idx+3: command_idxs[i+1]-1]
logs[command] = {}
logs[command]['output'] = output
logs[command]['status'] = 'Failed' if any("Command failed" in line for line in output) else 'Success'

# if the command block succeeded overall
overall_status = [line for line in lines if 'Commands finished with result:' in line][0].split(']')[1].split(': ')[1]
job_succeeded = False if 'Failed' in overall_status else True
return logs, job_succeeded

def parse_failures(logs, local):


def parse_failures(execution_log, logs, local):
'''Parses each command in the execution log (and possibly UTR logs),
recognizes any known errors, and posts additional data to Yamato.'''
for cmd in logs.keys():
Expand All @@ -87,101 +39,74 @@ def parse_failures(logs, local):
# TODO: do we also want to add additional yamato results for these?
if ((logs[cmd]['status'] == 'Success' and not any("Retrying" in line for line in logs[cmd]['output']))
or any("Reason(s): One or more tests have failed." in line for line in logs[cmd]['output'])):
# print('Skipping: ', cmd)
continue

print('\Found failed command: ', cmd)

# check if the error matches any known pattern marked in log_patterns.py
output = '\n'.join(logs[cmd]['output'])
matching_patterns, tags, conclusion = find_matching_patterns(execution_log_patterns, output)
print('\nFound failed command: ', cmd, '\n')

# initialize command data
logs[cmd]['title'] = cmd
logs[cmd]['conclusion'] = conclusion
logs[cmd]['tags'] = tags
logs[cmd]['summary'] = ''
for pattern,match in matching_patterns: # update the command log with input from the matched pattern
logs[cmd]['summary'] += match.group(0) + ' | ' if pattern['tags'][0] != 'unknown' else 'Unknown failure: check logs for more details. '
logs[cmd]['conclusion'] = []
logs[cmd]['tags'] = []
logs[cmd]['summary'] = []



# if it is an UTR non-test related error message not shown in Execution log but in test-results, append that to summary
if 'non-test' in logs[cmd]['tags']:
test_results_match = re.findall(r'(--artifacts_path=)(.+)(test-results)', cmd)[0]
test_results_path = test_results_match[1] + test_results_match[2]
# utr_failures, utr_tags, utr_conclusion = read_hoarder_log(os.path.join(test_results_path,'HoarderData.json'))
utr_failures, utr_tags, utr_conclusion = read_test_results_json(os.path.join(test_results_path,'TestResults.json'))
logs[cmd]['summary'] += utr_failures + ' | '
logs[cmd]['tags'].extend(utr_tags)
logs[cmd]['conclusion'] = utr_conclusion
# check if the error matches any known pattern marked in log_patterns.py, fill the command data for each match
cmd_output = '\n'.join(logs[cmd]['output'])
recursively_match_patterns(logs, cmd, execution_log.get_patterns(), cmd_output)

# post additional results to Yamato
post_additional_results(logs[cmd], local)
return


def find_matching_patterns(patterns, failure_string):
'''Finds a matching pattern from a specified list of pattern objects for a specified failure string (e.g. command output).
Returns the matching pattern object, and the matched substring.'''
matches = []
for pattern in patterns:
match = re.search(pattern['pattern'], failure_string)
if match:

# if a matching patterns was found, skip the general unknown pattern
if len(matches) > 0 and pattern['pattern'] == '.+':
continue
def recursively_match_patterns(logs, cmd, patterns, failure_string):
'''Match the given string against any known patterns. If any of the patterns contains a 'redirect',
parse also the directed log in a recursive fashion.'''
matches = find_matching_patterns(patterns, failure_string)
for pattern, match in matches:

print('Found match for pattern: ', pattern['pattern'])
matches.append((pattern, match))

tags = [pattern['tags'] for (pattern,_) in matches]
flattened_tags = [tag for tag_list in tags for tag in tag_list]

conclusion = get_ruling_conclusion([pattern['conclusion'] for (pattern,_) in matches])

return matches, flattened_tags, conclusion


def get_ruling_conclusion(conclusions):
'''Pick a single conclusion out of several matches in the order of severity'''
if 'failure' in conclusions:
return 'failure'
elif 'inconclusive' in conclusions:
return 'inconclusive'
elif 'cancelled' in conclusions:
return 'cancelled'
elif 'success' in conclusions:
return 'success'
else:
return 'failure'
logs[cmd]['conclusion'].append(pattern['conclusion'])
logs[cmd]['tags'].append(pattern['tags'])
logs[cmd]['summary'].append(match.group(0) if pattern['tags'][0] != 'unknown' else 'Unknown failure: check logs for more details. ')

if pattern.get('redirect'):
test_results_match = re.findall(r'(--artifacts_path=)(.+)(test-results)', cmd)[0]
test_results_path = test_results_match[1] + test_results_match[2]
for redirect in pattern['redirect']:

def read_test_results_json(log_file_path):
'''Reads error messages from TestResults.json.'''
logs = load_json(log_file_path)
error_logs = [log for log in logs if log.get('rootCause')]
if len(error_logs) > 0:
failure_reasons = ' '.join(error_logs[0].get('errors',['']))
if redirect == 'utr_log':
try:
df = UTR_log(test_results_path)
recursively_match_patterns(logs, cmd, df.get_patterns(), df.read_log())
except Exception as e:
print('! Failed to parse UTR TestResults.json: ', e)
elif redirect == 'unity_log':
try:
df = Unity_log(test_results_path)
recursively_match_patterns(logs, cmd, df.get_patterns(), df.read_log())
except Exception as e:
print('! Failed to parse UnityLog.txt', e)

_, tags, conclusion = find_matching_patterns(utr_log_patterns, failure_reasons)
return failure_reasons, tags, conclusion
else:
return [], [], ''
else:
print('! Invalid redirect: ', redirect)


def post_additional_results(cmd, local):
'''Posts additional results to Yamato reporting server'''
'''Posts additional results to Yamato reporting server:
- title: command itself
- summary: concatenated summary of all matched patterns, each capped at 500 char
- tags: non-duplicate tags of all matched patterns
- conclusion: most severe conclusion of all matched patterns
'''

data = {
'title': cmd['title'],
'summary': cmd['summary'][:500] + '...',
'conclusion': cmd['conclusion'],
'tags' : list(set(cmd['tags']))
'summary': ' | '.join(list(set([s[:500] for s in cmd['summary']]))),
'conclusion': get_ruling_conclusion(cmd['conclusion']),
'tags' : list(set(flatten_tags(cmd['tags'])))
}

if local:
print('Posting: ', json.dumps(data,indent=2), '\n')
print('\nPosting: ', json.dumps(data,indent=2), '\n')
else:
server_url = os.environ['YAMATO_REPORTING_SERVER'] + '/result'
headers = {'Content-Type':'application/json'}
Expand All @@ -203,12 +128,12 @@ def main(argv):
try:
args = parse_args(argv)

# read execution logs
execution_log_file = get_execution_log() if not args.execution_log else args.execution_log
logs, job_succeeded = read_execution_log(execution_log_file)
# read execution log
execution_log = Execution_log(args.execution_log)
logs, job_succeeded = execution_log.read_log()

if not job_succeeded:
parse_failures(logs, args.local)
parse_failures(execution_log, logs, args.local)

except Exception as e:
print('Failed to parse logs: ', str(e))
Expand Down
4 changes: 4 additions & 0 deletions .yamato/script/log_scripts/utils/constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# Following constants correspond to which log parser class to use
UTR_LOG = 'utr_log'
UNITY_LOG = 'unity_log'
EXECUTION_LOG = 'execution_log'
122 changes: 122 additions & 0 deletions .yamato/script/log_scripts/utils/execution_log.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
import os
import glob
from .shared_utils import load_json, find_matching_patterns
from .constants import *

COMMAND_START = '################################### Running next command ###################################'
COMMAND_END = '############################################################################################'
AFTER_BLOCK_START = 'Starting After Block'

class Execution_log():
'''Handles log parsing and error matching of the execution log'''

def __init__(self, path_to_log=""):
self.path = glob.glob(os.path.join(os.path.dirname(os.path.dirname(os.getcwd())),'Execution-*.log'))[0] if path_to_log=="" else path_to_log
self.patterns = self.get_patterns()

def get_patterns(self):
'''Returns error patterns to match against. Each pattern has:
pattern: regex to match some string against
tags: tags to be added to Yamato additional results, typically one as identifier, and one as category such as instability, ...
conclusion: success/failure/cancelled/inconclusive (if many patterns are matched for a command, most severe is chosen in the end)'''
return [
# Order: retry blocks must be matched first
# If either retry pattern is matched, the failures are further parsed to match any specific failure patterns written below
{
# This is matched if all retries fail.
'pattern': r'(Failed after)(.+)(retries)',
'tags': ['retry'],
'conclusion': 'failure',
},
{
# This matches both successful/failed retries.
# Successful retries have no concrete pattern to match: the only difference with failed retry is that it does not contain 'Failed after n retries',
# but no working regex for matching multiline against a negative lookahead was found yet.
# Therefore, this pattern must come after failed retry pattern (python logic will handle recognizing this block as a successful retry)
'pattern': r'(Retrying)',
'tags': ['retry'],
'conclusion': 'success',
},
# Order: patterns below can be in any order, and the script can match multiple patterns
{
'pattern': r'(command not found)',
'tags': ['failure'],
'conclusion': 'failure',
},
{
# Or with newlines: r'(packet_write_poll: Connection to)((.|\n)+)(Operation not permitted)((.|\n)+)(lost connection)',
'pattern': r'(packet_write_poll: Connection to)(.+)(Operation not permitted)',
'tags': ['packet_write_poll','instability'],
'conclusion': 'inconclusive',
},
{
# Or: r'(LTO : error: L0496: error during communication with the LTO process: The pipe has been ended)'
'pattern': r'(orbis-ld stderr :LLVM ERROR: out of memory)((.|\n)+)(LLVM ERROR: out of memory)',
'tags': ['oom','instability'],
'conclusion': 'failure',
},
{
'pattern': r'(fatal: not a git repository (or any of the parent directories): .git)',
'tags': ['git'],
'conclusion': 'failure',
},
{
'pattern': r'(LTO : error: L0492: LTOP internal error: bad allocation)',
'tags': ['instability', 'bad-allocation'],
'conclusion': 'failure',
},
{
'pattern': r'Reason\(s\): One or more tests have failed.', # this one is unused right now since yamato does it automatically
'tags': ['tests'],
'conclusion': 'failure',
},
{
'pattern': r'Reason\(s\): One or more non-test related errors or failures occurred.', # if hit this, read hoarder file
'tags': ['non-test'],
'conclusion': 'failure',
'redirect': [
UTR_LOG,
UNITY_LOG
]
},
# Order: this matches everything and must therefore be the last item in the list
# If any previous pattern has been matched, this one is skipped
{
'pattern': r'.+',
'tags': ['unknown'],
'conclusion': 'failure',
}
]

def read_log(self):
'''Reads execution logs and returns:
logs: dictionary with keys corresponding to commands, and values containing log output and status
job_succeeded: boolean indicating if the job succeeded
'''

with open(self.path, encoding='utf-8') as f:
lines = [l.replace('\n','') for l in f.readlines() if l != '\n'] # remove empty lines and all newline indicators

# after block index
after_idx = [i for i,line in enumerate(lines) if AFTER_BLOCK_START in line][0]

# all log line idx starting/ending a new command
cmd_idxs = [i for i,line in enumerate(lines) if COMMAND_START in line]
cmd_idxs_end = [i for i,line in enumerate(lines) if COMMAND_END in line]
cmd_idxs.append(len(lines)) # add dummy idx to handle the last command

# get output (list of lines) for each command
logs = {}
for i, cmd_idx in enumerate(cmd_idxs):
if cmd_idx == len(lines) or cmd_idx >= after_idx:
break
cmd = '\n'.join(lines[cmd_idx+1: cmd_idxs_end[i]])
output = lines[cmd_idx+3: cmd_idxs[i+1]-1]
logs[cmd] = {}
logs[cmd]['output'] = output
logs[cmd]['status'] = 'Failed' if any("Command failed" in line for line in output) else 'Success'

# if the command block succeeded overall
overall_status = [line for line in lines if 'Commands finished with result:' in line][0].split(']')[1].split(': ')[1]
job_succeeded = False if 'Failed' in overall_status else True
return logs, job_succeeded
Loading