Unity-Technologies · kivistikliis · Aug 24, 2021 · Aug 19, 2021 · Aug 19, 2021 · Aug 19, 2021
@@ -1,84 +1,36 @@
 
 import argparse
 import os
-import sys
 import requests
+import sys
 import json
 import glob
 import re
-from utils.execution_log_patterns import execution_log_patterns
-from utils.utr_log_patterns import utr_log_patterns
+from utils.execution_log import Execution_log
+from utils.utr_log import UTR_log
+from utils.unity_log import Unity_log
+from utils.shared_utils import *
 
 '''
 This script runs for extended Yamato reporting. It
 1. Parses the execution log and for each command and its output (except successful non-retries and failed tests)
-    - finds matching patterns from execution_log_patterns
+    - finds matching patterns from execution_log.py patterns
+    - for each matched pattern, recursively finds matches also for any pattern with 'redirect' specified
     - sends to Yamato extended reporting server
         title: command itself
         summary: substring(s) of the command output matching the specified pattern(s)
         tags: all (distinct) tags beloging to the matched pattern(s)
         conclusion: failure/inconclusive/cancelled/success, which applies to the parsed command (not the whole job status)
-2. In case non-test related failure from UTR is matched, the script also reads TestResults.json and
-    - finds matching patterns from utr_log_patterns
-    - extends the data sent to Yamato extended reporting server in the previous point, by
-        extending the summary with matched substrings from the utr pattern and by
-        appending the utr pattern tags to the list of previous tags
 
 By default, the script requires no parameters and uses default execution log location in Yamato.
 To run it locally, specify
     --local
     --execution-log "<path to execution log file>"
 '''
 
-def load_json(file_path):
-    with open(file_path) as f:
-        json_data = f.readlines()
-        json_data[0] = json_data[0].replace('let testData = ','') # strip the beginning
-        json_data[-1] = json_data[-1][:-1] # strip the ; from end
-        return json.loads(' '.join(json_data))
-        #return json.load(f)
-
-def get_execution_log():
-    '''Returns the path to execution log file.'''
-    path_to_execution_log = os.path.join(os.path.dirname(os.path.dirname(os.getcwd())),'Execution-*.log')
-    print('Searching for logs in: ', path_to_execution_log)
-
-    execution_log_file = glob.glob(path_to_execution_log)[0]
-    print('Reading log: ', execution_log_file)
-    return execution_log_file
-
-
-def read_execution_log(log_file_path):
-    '''Reads execution logs and returns:
-    logs: dictionary with keys corresponding to commands, and values containing log output and status
-    job_succeeded: boolean indicating if the job succeeded
-    '''
-
-    with open(log_file_path, encoding='utf-8') as f:
-        lines = [l.replace('\n','') for l in f.readlines() if l != '\n'] # remove empty lines and all newline indicators
-
-    # all log line idx starting/ending a new command
-    command_idxs = [i for i,line  in enumerate(lines) if '################################### Running next command ###################################' in line]
-    command_idxs_end = [i for i,line  in enumerate(lines) if '############################################################################################' in line]
-    command_idxs.append(len(lines)) # add dummy idx to handle the last command
-
-    # get output (list of lines) for each command
-    logs = {}
-    for i, command_idx in enumerate(command_idxs):
-        if command_idx == len(lines):
-            break
-        command = '\n'.join(lines[command_idx+1: command_idxs_end[i]])
-        output = lines[command_idx+3: command_idxs[i+1]-1]
-        logs[command] = {}
-        logs[command]['output'] = output
-        logs[command]['status'] = 'Failed' if any("Command failed" in line for line in output) else 'Success'
-
-    # if the command block succeeded overall
-    overall_status = [line for line in lines if 'Commands finished with result:' in line][0].split(']')[1].split(': ')[1]
-    job_succeeded = False if 'Failed' in overall_status else True
-    return logs, job_succeeded
-
-def parse_failures(logs, local):
+
+
+def parse_failures(execution_log, logs, local):
     '''Parses each command in the execution log (and possibly UTR logs),
     recognizes any known errors, and posts additional data to Yamato.'''
     for cmd in logs.keys():
@@ -87,101 +39,74 @@ def parse_failures(logs, local):
         # TODO: do we also want to add additional yamato results for these?
         if ((logs[cmd]['status'] == 'Success' and not any("Retrying" in line for line in logs[cmd]['output']))
                 or any("Reason(s): One or more tests have failed." in line for line in logs[cmd]['output'])):
-            # print('Skipping: ', cmd)
             continue
 
-        print('\Found failed command: ', cmd)
-
-        # check if the error matches any known pattern marked in log_patterns.py
-        output = '\n'.join(logs[cmd]['output'])
-        matching_patterns, tags, conclusion = find_matching_patterns(execution_log_patterns, output)
+        print('\nFound failed command: ', cmd, '\n')
 
+        # initialize command data
         logs[cmd]['title'] = cmd
-        logs[cmd]['conclusion'] = conclusion
-        logs[cmd]['tags'] = tags
-        logs[cmd]['summary'] = ''
-        for pattern,match in matching_patterns: # update the command log with input from the matched pattern
-            logs[cmd]['summary'] +=  match.group(0) + ' | ' if pattern['tags'][0] != 'unknown' else 'Unknown failure: check logs for more details. '
+        logs[cmd]['conclusion'] = []
+        logs[cmd]['tags'] = []
+        logs[cmd]['summary'] = []
 
-
-
-        # if it is an UTR non-test related error message not shown in Execution log but in test-results, append that to summary
-        if  'non-test' in logs[cmd]['tags']:
-            test_results_match = re.findall(r'(--artifacts_path=)(.+)(test-results)', cmd)[0]
-            test_results_path = test_results_match[1] + test_results_match[2]
-            # utr_failures, utr_tags, utr_conclusion = read_hoarder_log(os.path.join(test_results_path,'HoarderData.json'))
-            utr_failures, utr_tags, utr_conclusion = read_test_results_json(os.path.join(test_results_path,'TestResults.json'))
-            logs[cmd]['summary'] += utr_failures + ' | '
-            logs[cmd]['tags'].extend(utr_tags)
-            logs[cmd]['conclusion'] = utr_conclusion
+        # check if the error matches any known pattern marked in log_patterns.py, fill the command data for each match
+        cmd_output = '\n'.join(logs[cmd]['output'])
+        recursively_match_patterns(logs, cmd, execution_log.get_patterns(), cmd_output)
 
         # post additional results to Yamato
         post_additional_results(logs[cmd], local)
     return
 
 
-def find_matching_patterns(patterns, failure_string):
-    '''Finds a matching pattern from a specified list of pattern objects for a specified failure string (e.g. command output).
-    Returns the matching pattern object, and the matched substring.'''
-    matches = []
-    for pattern in patterns:
-            match = re.search(pattern['pattern'], failure_string)
-            if match:
-
-                # if a matching patterns was found, skip the general unknown pattern
-                if len(matches) > 0 and pattern['pattern'] == '.+':
-                    continue
+def recursively_match_patterns(logs, cmd, patterns, failure_string):
+    '''Match the given string against any known patterns. If any of the patterns contains a 'redirect',
+    parse also the directed log in a recursive fashion.'''
+    matches = find_matching_patterns(patterns, failure_string)
+    for pattern, match in matches:
 
-                print('Found match for pattern: ',  pattern['pattern'])
-                matches.append((pattern, match))
-
-    tags = [pattern['tags'] for (pattern,_) in matches]
-    flattened_tags = [tag for tag_list in tags for tag in tag_list]
-
-    conclusion = get_ruling_conclusion([pattern['conclusion'] for (pattern,_) in matches])
-
-    return matches, flattened_tags, conclusion
-
-
-def get_ruling_conclusion(conclusions):
-    '''Pick a single conclusion out of several matches in the order of severity'''
-    if 'failure' in conclusions:
-        return 'failure'
-    elif 'inconclusive' in conclusions:
-        return 'inconclusive'
-    elif 'cancelled' in conclusions:
-        return 'cancelled'
-    elif 'success' in conclusions:
-        return 'success'
-    else:
-        return 'failure'
+        logs[cmd]['conclusion'].append(pattern['conclusion'])
+        logs[cmd]['tags'].append(pattern['tags'])
+        logs[cmd]['summary'].append(match.group(0) if pattern['tags'][0] != 'unknown' else 'Unknown failure: check logs for more details. ')
 
+        if pattern.get('redirect'):
+            test_results_match = re.findall(r'(--artifacts_path=)(.+)(test-results)', cmd)[0]
+            test_results_path = test_results_match[1] + test_results_match[2]
+            for redirect in pattern['redirect']:
 
-def read_test_results_json(log_file_path):
-    '''Reads error messages from TestResults.json.'''
-    logs = load_json(log_file_path)
-    error_logs = [log for log in logs if log.get('rootCause')]
-    if len(error_logs) > 0:
-        failure_reasons = ' '.join(error_logs[0].get('errors',['']))
+                if redirect == 'utr_log':
+                    try:
+                        df = UTR_log(test_results_path)
+                        recursively_match_patterns(logs, cmd, df.get_patterns(), df.read_log())
+                    except Exception as e:
+                        print('! Failed to parse UTR TestResults.json: ', e)
+                elif redirect == 'unity_log':
+                    try:
+                        df = Unity_log(test_results_path)
+                        recursively_match_patterns(logs, cmd, df.get_patterns(), df.read_log())
+                    except Exception as e:
+                        print('! Failed to parse UnityLog.txt', e)
 
-        _, tags, conclusion = find_matching_patterns(utr_log_patterns, failure_reasons)
-        return failure_reasons, tags, conclusion
-    else:
-        return [], [], ''
+                else:
+                    print('! Invalid redirect: ', redirect)
 
 
 def post_additional_results(cmd, local):
-    '''Posts additional results to Yamato reporting server'''
+    '''Posts additional results to Yamato reporting server:
+        - title: command itself
+        - summary: concatenated summary of all matched patterns, each capped at 500 char
+        - tags: non-duplicate tags of all matched patterns
+        - conclusion: most severe conclusion of all matched patterns
+        '''
 
     data = {
         'title': cmd['title'],
-        'summary': cmd['summary'][:500] + '...',
-        'conclusion': cmd['conclusion'],
-        'tags' : list(set(cmd['tags']))
+        'summary': ' | '.join(list(set([s[:500] for s in cmd['summary']]))),
+        'conclusion': get_ruling_conclusion(cmd['conclusion']),
+        'tags' : list(set(flatten_tags(cmd['tags'])))
     }
 
     if local:
-        print('Posting: ', json.dumps(data,indent=2), '\n')
+        print('\nPosting: ', json.dumps(data,indent=2), '\n')
     else:
         server_url = os.environ['YAMATO_REPORTING_SERVER'] + '/result'
         headers = {'Content-Type':'application/json'}
@@ -203,12 +128,12 @@ def main(argv):
     try:
         args = parse_args(argv)
 
-        # read execution logs
-        execution_log_file = get_execution_log() if not args.execution_log else args.execution_log
-        logs, job_succeeded = read_execution_log(execution_log_file)
+        # read execution log
+        execution_log = Execution_log(args.execution_log)
+        logs, job_succeeded = execution_log.read_log()
 
         if not job_succeeded:
-            parse_failures(logs, args.local)
+            parse_failures(execution_log, logs, args.local)
 
     except Exception as e:
         print('Failed to parse logs: ', str(e))

@@ -0,0 +1,4 @@
+# Following constants correspond to which log parser class to use
+UTR_LOG = 'utr_log'
+UNITY_LOG = 'unity_log'
+EXECUTION_LOG = 'execution_log'
@@ -0,0 +1,122 @@
+import os
+import glob
+from .shared_utils import load_json, find_matching_patterns
+from .constants import *
+
+COMMAND_START = '################################### Running next command ###################################'
+COMMAND_END = '############################################################################################'
+AFTER_BLOCK_START = 'Starting After Block'
+
+class Execution_log():
+    '''Handles log parsing and error matching of the execution log'''
+
+    def __init__(self, path_to_log=""):
+        self.path = glob.glob(os.path.join(os.path.dirname(os.path.dirname(os.getcwd())),'Execution-*.log'))[0] if path_to_log=="" else path_to_log
+        self.patterns = self.get_patterns()
+
+    def get_patterns(self):
+        '''Returns error patterns to match against. Each pattern has:
+        pattern: regex to match some string against
+        tags: tags to be added to Yamato additional results, typically one as identifier, and one as category such as instability, ...
+        conclusion: success/failure/cancelled/inconclusive (if many patterns are matched for a command, most severe is chosen in the end)'''
+        return [
+            # Order: retry blocks must be matched first
+            # If either retry pattern is matched, the failures are further parsed to match any specific failure patterns written below
+            {
+                # This is matched if all retries fail.
+                'pattern': r'(Failed after)(.+)(retries)',
+                'tags': ['retry'],
+                'conclusion': 'failure',
+            },
+            {
+                # This matches both successful/failed retries.
+                # Successful retries have no concrete pattern to match: the only difference with failed retry is that it does not contain 'Failed after n retries',
+                # but no working regex for matching multiline against a negative lookahead was found yet.
+                # Therefore, this pattern must come after failed retry pattern (python logic will handle recognizing this block as a successful retry)
+                'pattern': r'(Retrying)',
+                'tags': ['retry'],
+                'conclusion': 'success',
+            },
+            # Order: patterns below can be in any order, and the script can match multiple patterns
+            {
+                'pattern': r'(command not found)',
+                'tags': ['failure'],
+                'conclusion': 'failure',
+            },
+            {
+                #  Or with newlines: r'(packet_write_poll: Connection to)((.|\n)+)(Operation not permitted)((.|\n)+)(lost connection)',
+                'pattern': r'(packet_write_poll: Connection to)(.+)(Operation not permitted)',
+                'tags': ['packet_write_poll','instability'],
+                'conclusion': 'inconclusive',
+            },
+            {
+                # Or: r'(LTO : error: L0496: error during communication with the LTO process: The pipe has been ended)'
+                'pattern': r'(orbis-ld stderr :LLVM ERROR: out of memory)((.|\n)+)(LLVM ERROR: out of memory)',
+                'tags': ['oom','instability'],
+                'conclusion': 'failure',
+            },
+            {
+                'pattern': r'(fatal: not a git repository (or any of the parent directories): .git)',
+                'tags': ['git'],
+                'conclusion': 'failure',
+            },
+            {
+                'pattern': r'(LTO : error: L0492: LTOP internal error: bad allocation)',
+                'tags': ['instability', 'bad-allocation'],
+                'conclusion': 'failure',
+            },
+            {
+                'pattern': r'Reason\(s\): One or more tests have failed.', # this one is unused right now since yamato does it automatically
+                'tags': ['tests'],
+                'conclusion': 'failure',
+            },
+            {
+                'pattern': r'Reason\(s\): One or more non-test related errors or failures occurred.', # if hit this, read hoarder file
+                'tags': ['non-test'],
+                'conclusion': 'failure',
+                'redirect': [
+                    UTR_LOG,
+                    UNITY_LOG
+                ]
+            },
+            # Order: this matches everything and must therefore be the last item in the list
+            # If any previous pattern has been matched, this one is skipped
+            {
+                'pattern': r'.+',
+                'tags': ['unknown'],
+                'conclusion': 'failure',
+            }
+        ]
+
+    def read_log(self):
+        '''Reads execution logs and returns:
+        logs: dictionary with keys corresponding to commands, and values containing log output and status
+        job_succeeded: boolean indicating if the job succeeded
+        '''
+
+        with open(self.path, encoding='utf-8') as f:
+            lines = [l.replace('\n','') for l in f.readlines() if l != '\n'] # remove empty lines and all newline indicators
+
+        # after block index
+        after_idx = [i for i,line  in enumerate(lines) if AFTER_BLOCK_START in line][0]
+
+        # all log line idx starting/ending a new command
+        cmd_idxs = [i for i,line  in enumerate(lines) if COMMAND_START in line]
+        cmd_idxs_end = [i for i,line  in enumerate(lines) if COMMAND_END in line]
+        cmd_idxs.append(len(lines)) # add dummy idx to handle the last command
+
+        # get output (list of lines) for each command
+        logs = {}
+        for i, cmd_idx in enumerate(cmd_idxs):
+            if cmd_idx == len(lines) or cmd_idx >= after_idx:
+                break
+            cmd = '\n'.join(lines[cmd_idx+1: cmd_idxs_end[i]])
+            output = lines[cmd_idx+3: cmd_idxs[i+1]-1]
+            logs[cmd] = {}
+            logs[cmd]['output'] = output
+            logs[cmd]['status'] = 'Failed' if any("Command failed" in line for line in output) else 'Success'
+
+        # if the command block succeeded overall
+        overall_status = [line for line in lines if 'Commands finished with result:' in line][0].split(']')[1].split(': ')[1]
+        job_succeeded = False if 'Failed' in overall_status else True
+        return logs, job_succeeded