From c55bb6392f3a8998251b204c5cc99dade35cbfff Mon Sep 17 00:00:00 2001 From: Mou Date: Thu, 2 Mar 2023 19:48:40 -0800 Subject: [PATCH 01/11] a tool that calculate workflow/job failure rate and a tool that collect ci_test failure logs --- ci/fireci/workflow_summary/README.md | 171 ++++++++++++ .../workflow_summary/collect_ci_test_logs.py | 91 +++++++ ci/fireci/workflow_summary/github.py | 78 ++++++ .../workflow_summary/workflow_information.py | 247 ++++++++++++++++++ 4 files changed, 587 insertions(+) create mode 100644 ci/fireci/workflow_summary/README.md create mode 100644 ci/fireci/workflow_summary/collect_ci_test_logs.py create mode 100644 ci/fireci/workflow_summary/github.py create mode 100644 ci/fireci/workflow_summary/workflow_information.py diff --git a/ci/fireci/workflow_summary/README.md b/ci/fireci/workflow_summary/README.md new file mode 100644 index 00000000000..b5414a692a2 --- /dev/null +++ b/ci/fireci/workflow_summary/README.md @@ -0,0 +1,171 @@ +# `workflow_information.py` Script + +## Usage +- Collect last `90` days' `Postsubmit` `ci_workflow.yml` workflow runs: + ``` + python workflow_information.py --token ${your_github_toke} --branch master --event push --d 90 + ``` + +- Collect last `30` days' `Presubmit` `ci_workflow.yml` workflow runs: + ``` + python workflow_information.py --token ${your_github_toke} --event pull_request --d 30 + ``` + +- Please refer `Inputs` section for more use cases, and `Outputs` section for the workflow summary report format. + +## Inputs +- `-o, --repo_owner`: **[Required]** GitHub repo owner, default value is `firebase`. + +- `-n, --repo_name`: **[Required]** GitHub repo name, default value is `firebase-android-sdk`. + +- `-t, --token`: **[Required]** GitHub access token. See [Creating a personal access token](https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/creating-a-personal-access-token). + +- `-w, --workflow_name`: **[Required]** Workflow filename, default value is `ci_tests.yml`. + +- `-d, --days`: Filter workflows that running in past -d days, default value is `90`. See [retention period for GitHub Actions artifacts and logs](https://docs.github.com/en/organizations/managing-organization-settings/configuring-the-retention-period-for-github-actions-artifacts-and-logs-in-your-organization). + +- `-b, --branch`: Filter branch name that workflows run against. + +- `-a, --actor`: Filter the actor who triggers the workflow runs. + +- `-e, --event`: Filter workflows trigger event, default is all events, could be one of the following values `['push', 'pull_request', 'issue']`. + +- `-j, --jobs`: Filter workflows jobs, default is the last job and does not include all jobs (does not include rerun jobs), could be one of the following values `['latest', 'all']`. + +- `-f, --folder`: Workflow and job information will be store here, default value is the current datatime. + + +## Outputs + +- `workflow_summary_report.txt`: a general report contains workflow pass/failure count, running time, etc. + + ``` + 2023-03-03 01:37:07.114500 + Namespace(actor=None, branch=None, days=30, event='pull_request', folder='presubmit_30', jobs='all', repo_name='firebase-android-sdk', repo_owner='firebase', token='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', workflow_name='ci_tests.yml') + + Workflow 'ci_tests.yml' Report: + Workflow Failure Rate:64.77% + Workflow Total Count: 193 (success: 68, failure: 125) + + Workflow Runtime Report: + 161 workflow runs finished without rerun, the average running time: 0:27:24.745342 + Including: + 56 passed workflow runs, with average running time: 0:17:29.214286 + 105 failed workflow runs, with average running time: 0:32:42.361905 + + 32 runs finished with rerun, the average running time: 1 day, 3:57:53.937500 + The running time for each workflow reruns are: + ['1 day, 2:24:32', '3:35:54', '3:19:14', '4 days, 6:10:50', '15:33:39', '1:57:21', '1:13:12', '1:55:18', '12 days, 21:51:29', '0:48:48', '0:45:28', '1:40:21', '2 days, 1:46:35', '19:47:16', '0:45:49', '2:22:36', '0:25:22', '0:55:30', '1:40:32', '1:10:05', '20:08:38', '0:31:03', '5 days, 9:19:25', '5:10:44', '1:20:57', '0:28:47', '1:52:44', '20:19:17', '0:35:15', '21:31:07', '3 days, 1:06:44', '3 days, 2:18:14'] + + Job Failure Report: + Unit Tests (:firebase-storage): + Failure Rate:54.61% + Total Count: 152 (success: 69, failure: 83) + Unit Tests (:firebase-messaging): + Failure Rate:35.37% + Total Count: 147 (success: 95, failure: 52) + ``` + + +- Intermediate file `workflow_summary.json`: contains all the workflow runs and job information attached to each workflow. + + ``` + { + 'workflow_name':'ci_tests.yml', + 'total_count':81, + 'success_count':32, + 'failure_count':49, + 'created':'>2022-11-30T23:15:04Z', + 'workflow_runs':[ + { + 'workflow_id':4296343867, + 'conclusion':'failure', + 'head_branch':'master', + 'actor':'vkryachko', + 'created_at':'2023-02-28T18:47:40Z', + 'updated_at':'2023-02-28T19:20:16Z', + 'run_started_at':'2023-02-28T18:47:40Z', + 'run_attempt':1, + 'html_url':'https://github.com/firebase/firebase-android-sdk/actions/runs/4296343867', + 'jobs_url':'https://api.github.com/repos/firebase/firebase-android-sdk/actions/runs/4296343867/jobs', + 'jobs':{ + 'total_count':95, + 'success_count':92, + 'failure_count':3, + 'job_runs':[ + { + 'job_id':11664775180, + 'job_name':'Determine changed modules', + 'conclusion':'success', + 'created_at':'2023-02-28T18:47:42Z', + 'started_at':'2023-02-28T18:47:50Z', + 'completed_at':'2023-02-28T18:50:11Z', + 'run_attempt': 1, + 'html_url':'https://github.com/firebase/firebase-android-sdk/actions/runs/4296343867/jobs/7487936863', + } + ] + } + } + ] + } + ``` + +- Intermediate file `job_summary.json`: contains all the job runs organized by job name. + ``` + { + 'Unit Test Results':{ # job name + 'total_count':17, + 'success_count':7, + 'failure_count':10, + 'failure_jobs':[ # data structure is the same as same as workflow_summary['workflow_runs']['job_runs'] + { + 'job_id':11372664143, + 'job_name':'Unit Test Results', + 'conclusion':'failure', + 'created_at':'2023-02-15T22:02:06Z', + 'started_at':'2023-02-15T22:02:06Z', + 'completed_at':'2023-02-15T22:02:06Z', + 'run_attempt': 1, + 'html_url':'https://github.com/firebase/firebase-android-sdk/runs/11372664143', + } + ] + } + } + ``` + + +# `collect_ci_test_logs.py` Script + +## Usage +- Collect `ci_workflow.yml` job failure logs from `workflow_information.py` script's intermediate file: + ``` + python collect_ci_test_logs.py --token ${github_toke} --folder ${folder} + ``` + +## Inputs + +- `-t, --token`: **[Required]** GitHub access token. See [Creating a personal access token](https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/creating-a-personal-access-token). + +- `-f, --folder`: Folder that store intermediate files generated by `workflow_information.py`. `ci_workflow.yml` job failure logs will also be stored here. + +## Outputs + +- `${job name}.log`: contains job failure rate, list all failed job links and failure logs. + ``` + Unit Tests (:firebase-storage): + Failure rate:40.00% + Total count: 20 (success: 12, failure: 8) + Failed jobs: + + https://github.com/firebase/firebase-android-sdk/actions/runs/4296343867/jobs/7487989874 + firebase-storage:testDebugUnitTest + Task :firebase-storage:testDebugUnitTest + 2023-02-28T18:54:38.1333725Z + 2023-02-28T18:54:38.1334278Z com.google.firebase.storage.DownloadTest > streamDownloadWithResumeAndCancel FAILED + 2023-02-28T18:54:38.1334918Z org.junit.ComparisonFailure at DownloadTest.java:190 + 2023-02-28T18:57:20.3329130Z + 2023-02-28T18:57:20.3330165Z 112 tests completed, 1 failed + 2023-02-28T18:57:20.5329189Z + 2023-02-28T18:57:20.5330505Z > Task :firebase-storage:testDebugUnitTest FAILED + ``` + diff --git a/ci/fireci/workflow_summary/collect_ci_test_logs.py b/ci/fireci/workflow_summary/collect_ci_test_logs.py new file mode 100644 index 00000000000..1c1ca2293ef --- /dev/null +++ b/ci/fireci/workflow_summary/collect_ci_test_logs.py @@ -0,0 +1,91 @@ +# -*- coding: utf-8 -*- +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the 'License'); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an 'AS IS' BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +'''A utility collecting ci_test.yml workflow failure logs. + +Usage: + + python collect_ci_test_logs.py --token ${github_toke} --folder ${folder} + +''' + +import argparse +import github +import json +import re +import logging +import os +import datetime + +def main(): + logging.getLogger().setLevel(logging.INFO) + + args = parse_cmdline_args() + github.set_api_url('firebase', 'firebase-android-sdk') + + token = args.token + + file_folder = args.folder + if not os.path.exists(file_folder): + logging.error('%s doesn\'t exist' % file_folder) + exit(1) + + job_summary = json.load(open(os.path.join(file_folder, 'job_summary.json'))) + + for job_name in job_summary: + if job_name in ['Determine changed modules','Unit Tests (matrix)','Publish Tests Results','Unit Test Results','Instrumentation Tests','Unit Tests']: + continue + + job = job_summary[job_name] + + if job['failure_rate'] > 0: + failure_rate = job['failure_rate'] + total_count = job['total_count'] + success_count = job['success_count'] + failure_count = job['failure_count'] + + log_file_path = os.path.join(file_folder, '%s.log'%job_name) + file_log = open(log_file_path, 'w') + file_log.write('\n{}:\nFailure rate:{:.2%} \nTotal count: {} (success: {}, failure: {})\nFailed jobs:'.format(job_name, failure_rate, total_count, success_count, failure_count)) + logging.info('\n\n{}:\nFailure rate:{:.2%} \nTotal count: {} (success: {}, failure: {})\nFailed jobs:'.format(job_name, failure_rate, total_count, success_count, failure_count)) + + for failure_job in job['failure_jobs']: + file_log.write('\n\n'+failure_job['html_url']) + logging.info(failure_job['html_url']) + job_id = failure_job['job_id'] + logs = github.job_logs(token, job_id) + if logs: + failed_tasks = re.findall(r"Execution failed for task ':(.*?)'.", logs) + for failed_task in failed_tasks: + file_log.write('\n'+failed_task) + pattern = fr'Task :{failed_task}(.*?)Task :{failed_task} FAILED' + failed_tests = re.search(pattern, logs, re.MULTILINE | re.DOTALL) + if failed_tests: + file_log.write('\n'+failed_tests.group()) + + file_log.close() + + logging.info('\n\nFinsihed collecting failure logs, log files locates under path: %s' % file_folder) + + +def parse_cmdline_args(): + parser = argparse.ArgumentParser(description='Collect certain Github workflow information and calculate failure rate.') + parser.add_argument('-t', '--token', required=True, help='GitHub access token') + parser.add_argument('-f', '--folder', required=True, help='Folder generated by workflow_information.py. Test logs also locate here.') + args = parser.parse_args() + return args + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/ci/fireci/workflow_summary/github.py b/ci/fireci/workflow_summary/github.py new file mode 100644 index 00000000000..9e7179ef381 --- /dev/null +++ b/ci/fireci/workflow_summary/github.py @@ -0,0 +1,78 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""A utility for GitHub REST API.""" + +import requests +import json +import shutil +import re +import logging + +RETRIES = 3 +BACKOFF = 5 +RETRY_STATUS = (403, 500, 502, 504) +TIMEOUT = 5 +TIMEOUT_LONG = 20 + +OWNER = '' +REPO = '' +BASE_URL = 'https://api.github.com' +GITHUB_API_URL = '%s/repos/%s/%s' % (BASE_URL, OWNER, REPO) + + +def set_api_url(owner, repo): + if owner and repo: + global OWNER, REPO, GITHUB_API_URL + OWNER = owner + REPO = repo + GITHUB_API_URL = '%s/repos/%s/%s' % (BASE_URL, OWNER, REPO) + logging.info('GITHUB_API_URL been set: %s' % GITHUB_API_URL) + return True + else: + return False + + +def list_workflows(token, workflow_id, params): + """https://docs.github.com/en/rest/actions/workflow-runs#list-workflow-runs-for-a-workflow""" + url = f'{GITHUB_API_URL}/actions/workflows/{workflow_id}/runs' + headers = {'Accept': 'application/vnd.github+json', 'Authorization': f'token {token}'} + with requests.get(url, headers=headers, params=params, + stream=True, timeout=TIMEOUT_LONG) as response: + logging.info("list_workflows: %s, params: %s, response: %s", url, params, response) + return response.json() + +def list_jobs(token, run_id, params): + """https://docs.github.com/en/rest/actions/workflow-jobs#list-jobs-for-a-workflow-run""" + url = f'{GITHUB_API_URL}/actions/runs/{run_id}/jobs' + headers = {'Accept': 'application/vnd.github+json', 'Authorization': f'token {token}'} + with requests.get(url, headers=headers, params=params, + stream=True, timeout=TIMEOUT_LONG) as response: + logging.info("list_jobs: %s, params: %s, response: %s", url, params, response) + return response.json() + +def job_logs(token, job_id): + """https://docs.github.com/rest/reference/actions#download-job-logs-for-a-workflow-run""" + url = f'{GITHUB_API_URL}/actions/jobs/{job_id}/logs' + headers = {'Accept': 'application/vnd.github+json', 'Authorization': f'token {token}'} + with requests.get(url, headers=headers, allow_redirects=False, + stream=True, timeout=TIMEOUT_LONG) as response: + logging.info("job_logs: %s response: %s", url, response) + if response.status_code == 302: + with requests.get(response.headers['Location'], headers=headers, allow_redirects=False, + stream=True, timeout=TIMEOUT_LONG) as get_log_response: + return get_log_response.content.decode('utf-8') + else: + print('no log avaliable') + return '' diff --git a/ci/fireci/workflow_summary/workflow_information.py b/ci/fireci/workflow_summary/workflow_information.py new file mode 100644 index 00000000000..a5203dac7f5 --- /dev/null +++ b/ci/fireci/workflow_summary/workflow_information.py @@ -0,0 +1,247 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the 'License'); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an 'AS IS' BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import github +import json +import datetime +import argparse +import logging +import os +import threading + + +'''A utility collecting ci_test.yml workflow failure logs. + +Usage: + + python workflow_information.py --token ${github_toke} --workflow_name ${workflow_name} + +''' + +def main(): + logging.getLogger().setLevel(logging.INFO) + + args = parse_cmdline_args() + logging.info(args) + + github.set_api_url(args.repo_owner, args.repo_name) + + # location for all artifacts + if args.folder: + file_folder = os.path.normpath(args.folder) + else: + file_folder = os.path.normpath(datetime.datetime.utcnow().strftime('%Y-%m-%d+%H:%M:%S')) + if not os.path.exists(file_folder): + os.makedirs(file_folder) + + workflow_summary = get_workflow_summary(args) + json.dump(workflow_summary, open(os.path.join(file_folder, 'workflow_summary.json'),'w')) + logging.info('Workflow summary has been write to %s\n' % os.path.join(file_folder, 'workflow_summary.json')) + + job_summary = get_job_summary(workflow_summary) + json.dump(job_summary, open(os.path.join(file_folder, 'job_summary.json'),'w')) + logging.info('Job summary has been write to %s\n' % os.path.join(file_folder, 'job_summary.json')) + + workflow_summary_report = f"{datetime.datetime.utcnow()}\n{args}\n\n" + workflow_summary_report += generate_summary_report(workflow_summary, job_summary) + open(os.path.join(file_folder, 'workflow_summary_report.txt'), 'w').write(workflow_summary_report) + logging.info('Workflow summary report has been write to %s\n' % os.path.join(file_folder, 'workflow_summary_report.txt')) + + +def get_workflow_summary(args): + token = args.token + workflow_name = args.workflow_name + # https://docs.github.com/en/search-github/getting-started-with-searching-on-github/understanding-the-search-syntax#query-for-dates + days = args.days + current_datetime = datetime.datetime.utcnow() + since_datetime = current_datetime - datetime.timedelta(days=days) + created = '>' + since_datetime.strftime('%Y-%m-%dT%H:%M:%SZ') + + workflow_summary = {'workflow_name': workflow_name, + 'total_count': 0, + 'success_count': 0, + 'failure_count': 0, + 'created': created, + 'workflow_runs': []} + + logging.info('START collecting workflow run data\n') + workflow_page = 0 + per_page = 100 # max 100 + list_workflows_params = {'status': 'completed', 'created': created, 'page': workflow_page, 'per_page': per_page} + if args.event: + list_workflows_params['event'] = args.event + if args.actor: + list_workflows_params['actor'] = args.actor + if args.branch: + list_workflows_params['branch'] = args.branch + + request_workflow_list = True + while request_workflow_list: + workflow_page += 1 + list_workflows_params['page'] = workflow_page + workflows = github.list_workflows(token, workflow_name, list_workflows_params) + + if 'workflow_runs' in workflows and workflows['workflow_runs']: + for workflow in workflows['workflow_runs']: + if workflow['conclusion'] in ['success', 'failure']: + workflow_summary['workflow_runs'].append({'workflow_id': workflow['id'], 'conclusion': workflow['conclusion'], + 'head_branch': workflow['head_branch'], 'actor': workflow['actor']['login'], + 'created_at': workflow['created_at'], 'updated_at': workflow['updated_at'], + 'run_started_at': workflow['run_started_at'], 'run_attempt': workflow['run_attempt'], + 'html_url': workflow['html_url'], 'jobs_url': workflow['jobs_url'], + 'jobs': {'total_count': 0, 'success_count': 0, 'failure_count': 0, 'job_runs': []}}) + workflow_summary['total_count'] += 1 + if workflow['conclusion'] == 'success': + workflow_summary['success_count'] += 1 + else: + workflow_summary['failure_count'] += 1 + else: + request_workflow_list = False + + logging.info('END collecting workflow run data\n') + + logging.info('START collecting job data by workflow run\n') + for workflow_run in workflow_summary['workflow_runs']: + get_workflow_jobs(args, workflow_run) + logging.info('END collecting job data by workflow run\n') + + return workflow_summary + +def get_workflow_jobs(args, workflow_run): + workflow_jobs = workflow_run['jobs'] + job_page = 0 + request_job_list = True + while request_job_list: + job_page += 1 + list_jobs_params = {'filter': args.jobs, 'per_page': 100, 'page': job_page} # per_page: max 100 + jobs = github.list_jobs(args.token, workflow_run['workflow_id'], list_jobs_params) + if 'jobs' in jobs and jobs['jobs']: + for job in jobs['jobs']: + workflow_jobs['job_runs'].append({'job_id': job['id'], 'job_name': job['name'], 'conclusion': job['conclusion'], + 'created_at': job['created_at'], 'started_at': job['started_at'], 'completed_at': job['completed_at'], + 'run_attempt': job['run_attempt'], 'html_url': job['html_url']}) + if job['conclusion'] in ['success', 'failure']: + workflow_jobs['total_count'] += 1 + if job['conclusion'] == 'success': + workflow_jobs['success_count'] += 1 + else: + workflow_jobs['failure_count'] += 1 + + if 'jobs' not in jobs or jobs['total_count'] < job_page * 100: + request_job_list = False + + +def get_job_summary(workflow_summary): + logging.info('START gathering job information by job name\n') + job_summary = {} + for workflow_run in workflow_summary['workflow_runs']: + for job_run in workflow_run['jobs']['job_runs']: + job_name = job_run['job_name'] + if job_name not in job_summary: + job_summary[job_name] = {'total_count': 0, + 'success_count': 0, + 'failure_count': 0, + 'failure_jobs': []} + + job = job_summary[job_name] + job['total_count'] += 1 + if job_run['conclusion'] == 'success': + job['success_count'] += 1 + else: + job['failure_count'] += 1 + job['failure_jobs'].append(job_run) + + for job_name in job_summary: + total_count = job_summary[job_name]['total_count'] + failure_count = job_summary[job_name]['failure_count'] + job_summary[job_name]['failure_rate'] = float(failure_count/total_count) + + job_summary=dict(sorted(job_summary.items(), key=lambda item: item[1]['failure_rate'], reverse=True)) + + logging.info('END gathering job information by job name\n') + return job_summary + + +def generate_summary_report(workflow_summary, job_summary): + report_content = '' + + workflow_name = workflow_summary['workflow_name'] + total_count = workflow_summary['total_count'] + success_count = workflow_summary['success_count'] + failure_count = workflow_summary['failure_count'] + failure_rate = float(failure_count/total_count) + report_content += f"Workflow '{workflow_name}' Report: \n Workflow Failure Rate:{failure_rate:.2%} \n Workflow Total Count: {total_count} (success: {success_count}, failure: {failure_count})\n\n" + + report_content += workflow_runtime_report(workflow_summary) + + report_content += 'Job Failure Report:\n' + for job_name in job_summary: + job = job_summary[job_name] + if job['failure_rate'] > 0: + report_content += f"{job_name}:\n Failure Rate:{job['failure_rate']:.2%}\n Total Count: {job['total_count']} (success: {job['success_count']}, failure: {job['failure_count']})\n" + + logging.info(report_content) + return report_content + + +def workflow_runtime_report(workflow_summary): + for workflow in workflow_summary['workflow_runs']: + created_at = datetime.datetime.strptime(workflow['created_at'], '%Y-%m-%dT%H:%M:%SZ') + updated_at = datetime.datetime.strptime(workflow['updated_at'], '%Y-%m-%dT%H:%M:%SZ') + workflow['runtime'] = (updated_at - created_at).total_seconds() + + success_without_rerun = [w['runtime'] for w in workflow_summary['workflow_runs'] if w['run_attempt'] == 1 and w['conclusion'] == 'success'] + failure_without_rerun = [w['runtime'] for w in workflow_summary['workflow_runs'] if w['run_attempt'] == 1 and w['conclusion'] == 'failure'] + without_rerun = success_without_rerun + failure_without_rerun + with_rerun = [w['runtime'] for w in workflow_summary['workflow_runs'] if w['run_attempt'] > 1] + + runtime_report = 'Workflow Runtime Report:\n' + if without_rerun: + runtime_report += f"{len(without_rerun)} workflow runs finished without rerun, the average running time: {datetime.timedelta(seconds=sum(without_rerun)/len(without_rerun))}\n" + runtime_report += 'Including:\n' + if success_without_rerun: + runtime_report += f" {len(success_without_rerun)} passed workflow runs, with average running time: {datetime.timedelta(seconds=sum(success_without_rerun)/len(success_without_rerun))}\n" + if failure_without_rerun: + runtime_report += f" {len(failure_without_rerun)} failed workflow runs, with average running time: {datetime.timedelta(seconds=sum(failure_without_rerun)/len(failure_without_rerun))}\n\n" + + if with_rerun: + runtime_report += f"{len(with_rerun)} runs finished with rerun, the average running time: {datetime.timedelta(seconds=sum(with_rerun)/len(with_rerun))}\n" + runtime_report += f"The running time for each workflow reruns are:\n {[str(datetime.timedelta(seconds=x)) for x in with_rerun]}\n\n" + + return runtime_report + + +def parse_cmdline_args(): + parser = argparse.ArgumentParser(description='Collect certain Github workflow information and calculate failure rate.') + parser.add_argument('-o', '--repo_owner', default='firebase', help='GitHub repo owner') + parser.add_argument('-n', '--repo_name', default='firebase-android-sdk', help='GitHub repo name') + parser.add_argument('-t', '--token', required=True, help='GitHub access token') + + parser.add_argument('-w', '--workflow_name', default='ci_tests.yml', help='Workflow filename to run') + # By default, the artifacts and log files generated by workflows are retained for 90 days before they are automatically deleted. + # https://docs.github.com/en/organizations/managing-organization-settings/configuring-the-retention-period-for-github-actions-artifacts-and-logs-in-your-organization + parser.add_argument('-d', '--days', type=int, default=90, help='Filter workflows that running in past -d days') + parser.add_argument('-b', '--branch', help='Filter branch name that workflows run against, default is all branches') + parser.add_argument('-a', '--actor', help='Filter someone\'s workflow runs, default is actors') + parser.add_argument('-e', '--event', choices=['push', 'pull_request', 'issue'], help='Filter workflows trigger event, default is all events') + parser.add_argument('-j', '--jobs', default='all', choices=['latest', 'all'], help='Filter workflows jobs, default is the last job and does not include all jobs (does not include rerun jobs)') + + parser.add_argument('-f', '--folder', help='Workflow and job information will be store here, default is current datatime') + + args = parser.parse_args() + return args + + +if __name__ == '__main__': + main() \ No newline at end of file From 1921f7e53ebe4643fa3aa8553891203f2e28ae98 Mon Sep 17 00:00:00 2001 From: Mou Date: Thu, 2 Mar 2023 20:26:07 -0800 Subject: [PATCH 02/11] fix typo --- ci/fireci/workflow_summary/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/fireci/workflow_summary/README.md b/ci/fireci/workflow_summary/README.md index b5414a692a2..a7abd71722f 100644 --- a/ci/fireci/workflow_summary/README.md +++ b/ci/fireci/workflow_summary/README.md @@ -11,7 +11,7 @@ python workflow_information.py --token ${your_github_toke} --event pull_request --d 30 ``` -- Please refer `Inputs` section for more use cases, and `Outputs` section for the workflow summary report format. +- Please refer to `Inputs` section for more use cases, and `Outputs` section for the workflow summary report format. ## Inputs - `-o, --repo_owner`: **[Required]** GitHub repo owner, default value is `firebase`. @@ -41,7 +41,7 @@ ``` 2023-03-03 01:37:07.114500 - Namespace(actor=None, branch=None, days=30, event='pull_request', folder='presubmit_30', jobs='all', repo_name='firebase-android-sdk', repo_owner='firebase', token='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', workflow_name='ci_tests.yml') + Namespace(actor=None, branch=None, days=30, event='pull_request', folder='presubmit_30', jobs='all', repo_name='firebase-android-sdk', repo_owner='firebase', token=${your_github_token}, workflow_name='ci_tests.yml') Workflow 'ci_tests.yml' Report: Workflow Failure Rate:64.77% From 84254ee94ca79848c961200e32e5853a699db4ae Mon Sep 17 00:00:00 2001 From: Mou Date: Fri, 3 Mar 2023 10:39:36 -0800 Subject: [PATCH 03/11] update Readme --- ci/fireci/workflow_summary/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/fireci/workflow_summary/README.md b/ci/fireci/workflow_summary/README.md index a7abd71722f..d0e2e0c9335 100644 --- a/ci/fireci/workflow_summary/README.md +++ b/ci/fireci/workflow_summary/README.md @@ -28,9 +28,9 @@ - `-a, --actor`: Filter the actor who triggers the workflow runs. -- `-e, --event`: Filter workflows trigger event, default is all events, could be one of the following values `['push', 'pull_request', 'issue']`. +- `-e, --event`: Filter workflows trigger event, could be one of the following values `['push', 'pull_request', 'issue']`. -- `-j, --jobs`: Filter workflows jobs, default is the last job and does not include all jobs (does not include rerun jobs), could be one of the following values `['latest', 'all']`. +- `-j, --jobs`: Filter workflows jobs, default is `all` (including rerun jobs), could be one of the following values `['latest', 'all']`. - `-f, --folder`: Workflow and job information will be store here, default value is the current datatime. From 35420a97dc5de648bc1d483c43758d665fae9272 Mon Sep 17 00:00:00 2001 From: Mou Date: Fri, 3 Mar 2023 10:41:07 -0800 Subject: [PATCH 04/11] update Readme --- ci/fireci/workflow_summary/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/fireci/workflow_summary/README.md b/ci/fireci/workflow_summary/README.md index d0e2e0c9335..adc540403af 100644 --- a/ci/fireci/workflow_summary/README.md +++ b/ci/fireci/workflow_summary/README.md @@ -146,7 +146,7 @@ - `-t, --token`: **[Required]** GitHub access token. See [Creating a personal access token](https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/creating-a-personal-access-token). -- `-f, --folder`: Folder that store intermediate files generated by `workflow_information.py`. `ci_workflow.yml` job failure logs will also be stored here. +- `-f, --folder`: **[Required]** Folder that store intermediate files generated by `workflow_information.py`. `ci_workflow.yml` job failure logs will also be stored here. ## Outputs From e2b8343aa7ba8682023699061150eb910fdb2094 Mon Sep 17 00:00:00 2001 From: Mou Date: Fri, 3 Mar 2023 11:10:54 -0800 Subject: [PATCH 05/11] update Readme --- ci/fireci/workflow_summary/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/fireci/workflow_summary/README.md b/ci/fireci/workflow_summary/README.md index adc540403af..1c28e4d536a 100644 --- a/ci/fireci/workflow_summary/README.md +++ b/ci/fireci/workflow_summary/README.md @@ -137,7 +137,7 @@ # `collect_ci_test_logs.py` Script ## Usage -- Collect `ci_workflow.yml` job failure logs from `workflow_information.py` script's intermediate file: +- Collect `ci_test.yml` job failure logs from `workflow_information.py` script's intermediate file: ``` python collect_ci_test_logs.py --token ${github_toke} --folder ${folder} ``` From 94f02497d53532ca6b945f92938c3978d86e51a9 Mon Sep 17 00:00:00 2001 From: Mou Date: Fri, 3 Mar 2023 19:24:40 -0800 Subject: [PATCH 06/11] update format string based on comment --- .../workflow_summary/collect_ci_test_logs.py | 12 ++++++------ .../workflow_summary/workflow_information.py | 15 +++++++++------ 2 files changed, 15 insertions(+), 12 deletions(-) diff --git a/ci/fireci/workflow_summary/collect_ci_test_logs.py b/ci/fireci/workflow_summary/collect_ci_test_logs.py index 1c1ca2293ef..a62ac81dbb7 100644 --- a/ci/fireci/workflow_summary/collect_ci_test_logs.py +++ b/ci/fireci/workflow_summary/collect_ci_test_logs.py @@ -39,7 +39,7 @@ def main(): file_folder = args.folder if not os.path.exists(file_folder): - logging.error('%s doesn\'t exist' % file_folder) + logging.error(f'{file_folder} doesn\'t exist') exit(1) job_summary = json.load(open(os.path.join(file_folder, 'job_summary.json'))) @@ -56,11 +56,11 @@ def main(): success_count = job['success_count'] failure_count = job['failure_count'] - log_file_path = os.path.join(file_folder, '%s.log'%job_name) + log_file_path = os.path.join(file_folder, f'{job_name}.log') file_log = open(log_file_path, 'w') - file_log.write('\n{}:\nFailure rate:{:.2%} \nTotal count: {} (success: {}, failure: {})\nFailed jobs:'.format(job_name, failure_rate, total_count, success_count, failure_count)) - logging.info('\n\n{}:\nFailure rate:{:.2%} \nTotal count: {} (success: {}, failure: {})\nFailed jobs:'.format(job_name, failure_rate, total_count, success_count, failure_count)) - + file_log.write(f'\n{job_name}:\nFailure rate:{failure_rate:.2%} \nTotal count: {total_count} (success: {success_count}, failure: {failure_count})\nFailed jobs:') + logging.info(f'\n\n{job_name}:\nFailure rate:{failure_rate:.2%} \nTotal count: {total_count} (success: {success_count}, failure: {failure_count})\nFailed jobs:') + for failure_job in job['failure_jobs']: file_log.write('\n\n'+failure_job['html_url']) logging.info(failure_job['html_url']) @@ -77,7 +77,7 @@ def main(): file_log.close() - logging.info('\n\nFinsihed collecting failure logs, log files locates under path: %s' % file_folder) + logging.info(f'\n\nFinsihed collecting failure logs, log files locates under path: {file_folder}') def parse_cmdline_args(): diff --git a/ci/fireci/workflow_summary/workflow_information.py b/ci/fireci/workflow_summary/workflow_information.py index a5203dac7f5..dc64f7a441c 100644 --- a/ci/fireci/workflow_summary/workflow_information.py +++ b/ci/fireci/workflow_summary/workflow_information.py @@ -46,17 +46,20 @@ def main(): os.makedirs(file_folder) workflow_summary = get_workflow_summary(args) - json.dump(workflow_summary, open(os.path.join(file_folder, 'workflow_summary.json'),'w')) - logging.info('Workflow summary has been write to %s\n' % os.path.join(file_folder, 'workflow_summary.json')) + workflow_summary_file_path = os.path.join(file_folder, 'workflow_summary.json') + json.dump(workflow_summary, open(workflow_summary_file_path,'w')) + logging.info(f'Workflow summary has been write to {workflow_summary_file_path}\n') job_summary = get_job_summary(workflow_summary) - json.dump(job_summary, open(os.path.join(file_folder, 'job_summary.json'),'w')) - logging.info('Job summary has been write to %s\n' % os.path.join(file_folder, 'job_summary.json')) + job_summary_file_path = os.path.join(file_folder, 'job_summary.json') + json.dump(job_summary, open(job_summary_file_path,'w')) + logging.info(f'Job summary has been write to {job_summary_file_path}\n') workflow_summary_report = f"{datetime.datetime.utcnow()}\n{args}\n\n" workflow_summary_report += generate_summary_report(workflow_summary, job_summary) - open(os.path.join(file_folder, 'workflow_summary_report.txt'), 'w').write(workflow_summary_report) - logging.info('Workflow summary report has been write to %s\n' % os.path.join(file_folder, 'workflow_summary_report.txt')) + report_file_path = os.path.join(file_folder, 'workflow_summary_report.txt') + open(report_file_path, 'w').write(workflow_summary_report) + logging.info(f'Workflow summary report has been write to {report_file_path}\n') def get_workflow_summary(args): From 046852ea85266bca96ba935a0e4d493dc5d5a90c Mon Sep 17 00:00:00 2001 From: Mou Date: Mon, 6 Mar 2023 10:34:36 -0800 Subject: [PATCH 07/11] change the file path --- ci/{fireci => }/workflow_summary/README.md | 6 ++++++ ci/{fireci => }/workflow_summary/collect_ci_test_logs.py | 6 +++--- ci/{fireci => }/workflow_summary/github.py | 3 --- ci/{fireci => }/workflow_summary/workflow_information.py | 4 ++-- 4 files changed, 11 insertions(+), 8 deletions(-) rename ci/{fireci => }/workflow_summary/README.md (98%) rename ci/{fireci => }/workflow_summary/collect_ci_test_logs.py (99%) rename ci/{fireci => }/workflow_summary/github.py (98%) rename ci/{fireci => }/workflow_summary/workflow_information.py (99%) diff --git a/ci/fireci/workflow_summary/README.md b/ci/workflow_summary/README.md similarity index 98% rename from ci/fireci/workflow_summary/README.md rename to ci/workflow_summary/README.md index 1c28e4d536a..d5452925476 100644 --- a/ci/fireci/workflow_summary/README.md +++ b/ci/workflow_summary/README.md @@ -1,5 +1,11 @@ # `workflow_information.py` Script +## Prerequisites +- [Python](https://www.python.org/) and required packages. + ``` + pip install requests argparse + ``` + ## Usage - Collect last `90` days' `Postsubmit` `ci_workflow.yml` workflow runs: ``` diff --git a/ci/fireci/workflow_summary/collect_ci_test_logs.py b/ci/workflow_summary/collect_ci_test_logs.py similarity index 99% rename from ci/fireci/workflow_summary/collect_ci_test_logs.py rename to ci/workflow_summary/collect_ci_test_logs.py index a62ac81dbb7..927b6662e8c 100644 --- a/ci/fireci/workflow_summary/collect_ci_test_logs.py +++ b/ci/workflow_summary/collect_ci_test_logs.py @@ -21,13 +21,13 @@ ''' -import argparse import github +import argparse import json import re import logging import os -import datetime + def main(): logging.getLogger().setLevel(logging.INFO) @@ -88,4 +88,4 @@ def parse_cmdline_args(): return args if __name__ == '__main__': - main() \ No newline at end of file + main() diff --git a/ci/fireci/workflow_summary/github.py b/ci/workflow_summary/github.py similarity index 98% rename from ci/fireci/workflow_summary/github.py rename to ci/workflow_summary/github.py index 9e7179ef381..18f0efc260f 100644 --- a/ci/fireci/workflow_summary/github.py +++ b/ci/workflow_summary/github.py @@ -15,9 +15,6 @@ """A utility for GitHub REST API.""" import requests -import json -import shutil -import re import logging RETRIES = 3 diff --git a/ci/fireci/workflow_summary/workflow_information.py b/ci/workflow_summary/workflow_information.py similarity index 99% rename from ci/fireci/workflow_summary/workflow_information.py rename to ci/workflow_summary/workflow_information.py index dc64f7a441c..585302e6196 100644 --- a/ci/fireci/workflow_summary/workflow_information.py +++ b/ci/workflow_summary/workflow_information.py @@ -18,7 +18,6 @@ import argparse import logging import os -import threading '''A utility collecting ci_test.yml workflow failure logs. @@ -247,4 +246,5 @@ def parse_cmdline_args(): if __name__ == '__main__': - main() \ No newline at end of file + main() + \ No newline at end of file From 2becbe932ac0cc80f560f92640fd85a7dc17d9ba Mon Sep 17 00:00:00 2001 From: Mou Date: Mon, 6 Mar 2023 11:39:45 -0800 Subject: [PATCH 08/11] remove space --- ci/workflow_summary/README.md | 1 - ci/workflow_summary/workflow_information.py | 1 - 2 files changed, 2 deletions(-) diff --git a/ci/workflow_summary/README.md b/ci/workflow_summary/README.md index d5452925476..c60558db9d2 100644 --- a/ci/workflow_summary/README.md +++ b/ci/workflow_summary/README.md @@ -174,4 +174,3 @@ 2023-02-28T18:57:20.5329189Z 2023-02-28T18:57:20.5330505Z > Task :firebase-storage:testDebugUnitTest FAILED ``` - diff --git a/ci/workflow_summary/workflow_information.py b/ci/workflow_summary/workflow_information.py index 585302e6196..e4899303b9c 100644 --- a/ci/workflow_summary/workflow_information.py +++ b/ci/workflow_summary/workflow_information.py @@ -247,4 +247,3 @@ def parse_cmdline_args(): if __name__ == '__main__': main() - \ No newline at end of file From 1c3080a685edce27fc1b19f9a885fd077c1d000e Mon Sep 17 00:00:00 2001 From: Mou Date: Tue, 7 Mar 2023 15:54:25 -0800 Subject: [PATCH 09/11] update based on comments --- ci/workflow_summary/github.py | 6 +++--- ci/workflow_summary/workflow_information.py | 19 ++++++++++--------- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/ci/workflow_summary/github.py b/ci/workflow_summary/github.py index 18f0efc260f..6f20a97df32 100644 --- a/ci/workflow_summary/github.py +++ b/ci/workflow_summary/github.py @@ -47,7 +47,7 @@ def list_workflows(token, workflow_id, params): headers = {'Accept': 'application/vnd.github+json', 'Authorization': f'token {token}'} with requests.get(url, headers=headers, params=params, stream=True, timeout=TIMEOUT_LONG) as response: - logging.info("list_workflows: %s, params: %s, response: %s", url, params, response) + logging.info('list_workflows: %s, params: %s, response: %s', url, params, response) return response.json() def list_jobs(token, run_id, params): @@ -56,7 +56,7 @@ def list_jobs(token, run_id, params): headers = {'Accept': 'application/vnd.github+json', 'Authorization': f'token {token}'} with requests.get(url, headers=headers, params=params, stream=True, timeout=TIMEOUT_LONG) as response: - logging.info("list_jobs: %s, params: %s, response: %s", url, params, response) + logging.info('list_jobs: %s, params: %s, response: %s', url, params, response) return response.json() def job_logs(token, job_id): @@ -65,7 +65,7 @@ def job_logs(token, job_id): headers = {'Accept': 'application/vnd.github+json', 'Authorization': f'token {token}'} with requests.get(url, headers=headers, allow_redirects=False, stream=True, timeout=TIMEOUT_LONG) as response: - logging.info("job_logs: %s response: %s", url, response) + logging.info('job_logs: %s response: %s', url, response) if response.status_code == 302: with requests.get(response.headers['Location'], headers=headers, allow_redirects=False, stream=True, timeout=TIMEOUT_LONG) as get_log_response: diff --git a/ci/workflow_summary/workflow_information.py b/ci/workflow_summary/workflow_information.py index e4899303b9c..445547906c5 100644 --- a/ci/workflow_summary/workflow_information.py +++ b/ci/workflow_summary/workflow_information.py @@ -46,18 +46,21 @@ def main(): workflow_summary = get_workflow_summary(args) workflow_summary_file_path = os.path.join(file_folder, 'workflow_summary.json') - json.dump(workflow_summary, open(workflow_summary_file_path,'w')) + with open(workflow_summary_file_path, 'w') as f: + json.dump(workflow_summary, f) logging.info(f'Workflow summary has been write to {workflow_summary_file_path}\n') job_summary = get_job_summary(workflow_summary) job_summary_file_path = os.path.join(file_folder, 'job_summary.json') - json.dump(job_summary, open(job_summary_file_path,'w')) + with open(job_summary_file_path, 'w') as f: + json.dump(job_summary, f) logging.info(f'Job summary has been write to {job_summary_file_path}\n') workflow_summary_report = f"{datetime.datetime.utcnow()}\n{args}\n\n" workflow_summary_report += generate_summary_report(workflow_summary, job_summary) report_file_path = os.path.join(file_folder, 'workflow_summary_report.txt') - open(report_file_path, 'w').write(workflow_summary_report) + with open(report_file_path, 'w') as f: + f.write(workflow_summary_report) logging.info(f'Workflow summary report has been write to {report_file_path}\n') @@ -88,8 +91,7 @@ def get_workflow_summary(args): if args.branch: list_workflows_params['branch'] = args.branch - request_workflow_list = True - while request_workflow_list: + while True: workflow_page += 1 list_workflows_params['page'] = workflow_page workflows = github.list_workflows(token, workflow_name, list_workflows_params) @@ -109,7 +111,7 @@ def get_workflow_summary(args): else: workflow_summary['failure_count'] += 1 else: - request_workflow_list = False + break logging.info('END collecting workflow run data\n') @@ -123,8 +125,7 @@ def get_workflow_summary(args): def get_workflow_jobs(args, workflow_run): workflow_jobs = workflow_run['jobs'] job_page = 0 - request_job_list = True - while request_job_list: + while True: job_page += 1 list_jobs_params = {'filter': args.jobs, 'per_page': 100, 'page': job_page} # per_page: max 100 jobs = github.list_jobs(args.token, workflow_run['workflow_id'], list_jobs_params) @@ -141,7 +142,7 @@ def get_workflow_jobs(args, workflow_run): workflow_jobs['failure_count'] += 1 if 'jobs' not in jobs or jobs['total_count'] < job_page * 100: - request_job_list = False + break def get_job_summary(workflow_summary): From d9352555a8def56972f9c2732be8c943072d574f Mon Sep 17 00:00:00 2001 From: Mou Date: Tue, 7 Mar 2023 16:10:17 -0800 Subject: [PATCH 10/11] collecting logs --- ci/workflow_summary/collect_ci_test_logs.py | 4 +- ci/workflow_summary/github.py | 87 +++++++++------------ ci/workflow_summary/workflow_information.py | 14 ++-- 3 files changed, 46 insertions(+), 59 deletions(-) diff --git a/ci/workflow_summary/collect_ci_test_logs.py b/ci/workflow_summary/collect_ci_test_logs.py index 927b6662e8c..142c6c7be6b 100644 --- a/ci/workflow_summary/collect_ci_test_logs.py +++ b/ci/workflow_summary/collect_ci_test_logs.py @@ -33,7 +33,7 @@ def main(): logging.getLogger().setLevel(logging.INFO) args = parse_cmdline_args() - github.set_api_url('firebase', 'firebase-android-sdk') + gh = github.GitHub('firebase', 'firebase-android-sdk') token = args.token @@ -65,7 +65,7 @@ def main(): file_log.write('\n\n'+failure_job['html_url']) logging.info(failure_job['html_url']) job_id = failure_job['job_id'] - logs = github.job_logs(token, job_id) + logs = gh.job_logs(token, job_id) if logs: failed_tasks = re.findall(r"Execution failed for task ':(.*?)'.", logs) for failed_task in failed_tasks: diff --git a/ci/workflow_summary/github.py b/ci/workflow_summary/github.py index 6f20a97df32..b1c93b25f24 100644 --- a/ci/workflow_summary/github.py +++ b/ci/workflow_summary/github.py @@ -23,53 +23,40 @@ TIMEOUT = 5 TIMEOUT_LONG = 20 -OWNER = '' -REPO = '' -BASE_URL = 'https://api.github.com' -GITHUB_API_URL = '%s/repos/%s/%s' % (BASE_URL, OWNER, REPO) - - -def set_api_url(owner, repo): - if owner and repo: - global OWNER, REPO, GITHUB_API_URL - OWNER = owner - REPO = repo - GITHUB_API_URL = '%s/repos/%s/%s' % (BASE_URL, OWNER, REPO) - logging.info('GITHUB_API_URL been set: %s' % GITHUB_API_URL) - return True - else: - return False - - -def list_workflows(token, workflow_id, params): - """https://docs.github.com/en/rest/actions/workflow-runs#list-workflow-runs-for-a-workflow""" - url = f'{GITHUB_API_URL}/actions/workflows/{workflow_id}/runs' - headers = {'Accept': 'application/vnd.github+json', 'Authorization': f'token {token}'} - with requests.get(url, headers=headers, params=params, - stream=True, timeout=TIMEOUT_LONG) as response: - logging.info('list_workflows: %s, params: %s, response: %s', url, params, response) - return response.json() - -def list_jobs(token, run_id, params): - """https://docs.github.com/en/rest/actions/workflow-jobs#list-jobs-for-a-workflow-run""" - url = f'{GITHUB_API_URL}/actions/runs/{run_id}/jobs' - headers = {'Accept': 'application/vnd.github+json', 'Authorization': f'token {token}'} - with requests.get(url, headers=headers, params=params, - stream=True, timeout=TIMEOUT_LONG) as response: - logging.info('list_jobs: %s, params: %s, response: %s', url, params, response) - return response.json() - -def job_logs(token, job_id): - """https://docs.github.com/rest/reference/actions#download-job-logs-for-a-workflow-run""" - url = f'{GITHUB_API_URL}/actions/jobs/{job_id}/logs' - headers = {'Accept': 'application/vnd.github+json', 'Authorization': f'token {token}'} - with requests.get(url, headers=headers, allow_redirects=False, - stream=True, timeout=TIMEOUT_LONG) as response: - logging.info('job_logs: %s response: %s', url, response) - if response.status_code == 302: - with requests.get(response.headers['Location'], headers=headers, allow_redirects=False, - stream=True, timeout=TIMEOUT_LONG) as get_log_response: - return get_log_response.content.decode('utf-8') - else: - print('no log avaliable') - return '' +class GitHub: + + def __init__(self, owner, repo): + self.github_api_url = f'https://api.github.com/repos/{owner}/{repo}' + + def list_workflows(self, token, workflow_id, params): + """https://docs.github.com/en/rest/actions/workflow-runs#list-workflow-runs-for-a-workflow""" + url = f'{self.github_api_url}/actions/workflows/{workflow_id}/runs' + headers = {'Accept': 'application/vnd.github+json', 'Authorization': f'token {token}'} + with requests.get(url, headers=headers, params=params, + stream=True, timeout=TIMEOUT_LONG) as response: + logging.info('list_workflows: %s, params: %s, response: %s', url, params, response) + return response.json() + + def list_jobs(self, token, run_id, params): + """https://docs.github.com/en/rest/actions/workflow-jobs#list-jobs-for-a-workflow-run""" + url = f'{self.github_api_url}/actions/runs/{run_id}/jobs' + headers = {'Accept': 'application/vnd.github+json', 'Authorization': f'token {token}'} + with requests.get(url, headers=headers, params=params, + stream=True, timeout=TIMEOUT_LONG) as response: + logging.info('list_jobs: %s, params: %s, response: %s', url, params, response) + return response.json() + + def job_logs(self, token, job_id): + """https://docs.github.com/rest/reference/actions#download-job-logs-for-a-workflow-run""" + url = f'{self.github_api_url}/actions/jobs/{job_id}/logs' + headers = {'Accept': 'application/vnd.github+json', 'Authorization': f'token {token}'} + with requests.get(url, headers=headers, allow_redirects=False, + stream=True, timeout=TIMEOUT_LONG) as response: + logging.info('job_logs: %s response: %s', url, response) + if response.status_code == 302: + with requests.get(response.headers['Location'], headers=headers, allow_redirects=False, + stream=True, timeout=TIMEOUT_LONG) as get_log_response: + return get_log_response.content.decode('utf-8') + else: + print('no log avaliable') + return '' diff --git a/ci/workflow_summary/workflow_information.py b/ci/workflow_summary/workflow_information.py index 445547906c5..597a664d617 100644 --- a/ci/workflow_summary/workflow_information.py +++ b/ci/workflow_summary/workflow_information.py @@ -34,7 +34,7 @@ def main(): args = parse_cmdline_args() logging.info(args) - github.set_api_url(args.repo_owner, args.repo_name) + gh = github.GitHub(args.repo_owner, args.repo_name) # location for all artifacts if args.folder: @@ -44,7 +44,7 @@ def main(): if not os.path.exists(file_folder): os.makedirs(file_folder) - workflow_summary = get_workflow_summary(args) + workflow_summary = get_workflow_summary(gh, args) workflow_summary_file_path = os.path.join(file_folder, 'workflow_summary.json') with open(workflow_summary_file_path, 'w') as f: json.dump(workflow_summary, f) @@ -64,7 +64,7 @@ def main(): logging.info(f'Workflow summary report has been write to {report_file_path}\n') -def get_workflow_summary(args): +def get_workflow_summary(gh, args): token = args.token workflow_name = args.workflow_name # https://docs.github.com/en/search-github/getting-started-with-searching-on-github/understanding-the-search-syntax#query-for-dates @@ -94,7 +94,7 @@ def get_workflow_summary(args): while True: workflow_page += 1 list_workflows_params['page'] = workflow_page - workflows = github.list_workflows(token, workflow_name, list_workflows_params) + workflows = gh.list_workflows(token, workflow_name, list_workflows_params) if 'workflow_runs' in workflows and workflows['workflow_runs']: for workflow in workflows['workflow_runs']: @@ -117,18 +117,18 @@ def get_workflow_summary(args): logging.info('START collecting job data by workflow run\n') for workflow_run in workflow_summary['workflow_runs']: - get_workflow_jobs(args, workflow_run) + get_workflow_jobs(gh, args, workflow_run) logging.info('END collecting job data by workflow run\n') return workflow_summary -def get_workflow_jobs(args, workflow_run): +def get_workflow_jobs(gh, args, workflow_run): workflow_jobs = workflow_run['jobs'] job_page = 0 while True: job_page += 1 list_jobs_params = {'filter': args.jobs, 'per_page': 100, 'page': job_page} # per_page: max 100 - jobs = github.list_jobs(args.token, workflow_run['workflow_id'], list_jobs_params) + jobs = gh.list_jobs(args.token, workflow_run['workflow_id'], list_jobs_params) if 'jobs' in jobs and jobs['jobs']: for job in jobs['jobs']: workflow_jobs['job_runs'].append({'job_id': job['id'], 'job_name': job['name'], 'conclusion': job['conclusion'], From 5062c33906014c2ee1d7cacfa807e469114c7e6d Mon Sep 17 00:00:00 2001 From: Mou Date: Wed, 8 Mar 2023 10:13:02 -0800 Subject: [PATCH 11/11] fix based on comments --- ci/workflow_summary/collect_ci_test_logs.py | 9 +++- ci/workflow_summary/github.py | 2 +- ci/workflow_summary/workflow_information.py | 52 ++++++++++----------- 3 files changed, 34 insertions(+), 29 deletions(-) diff --git a/ci/workflow_summary/collect_ci_test_logs.py b/ci/workflow_summary/collect_ci_test_logs.py index 142c6c7be6b..7409c991871 100644 --- a/ci/workflow_summary/collect_ci_test_logs.py +++ b/ci/workflow_summary/collect_ci_test_logs.py @@ -29,11 +29,15 @@ import os +REPO_OWNER = 'firebase' +REPO_NAME = 'firebase-android-sdk' +EXCLUDE_JOB_LIST = ['Determine changed modules','Unit Tests (matrix)','Publish Tests Results','Unit Test Results','Instrumentation Tests','Unit Tests'] + def main(): logging.getLogger().setLevel(logging.INFO) args = parse_cmdline_args() - gh = github.GitHub('firebase', 'firebase-android-sdk') + gh = github.GitHub(REPO_OWNER, REPO_NAME) token = args.token @@ -45,7 +49,7 @@ def main(): job_summary = json.load(open(os.path.join(file_folder, 'job_summary.json'))) for job_name in job_summary: - if job_name in ['Determine changed modules','Unit Tests (matrix)','Publish Tests Results','Unit Test Results','Instrumentation Tests','Unit Tests']: + if job_name in EXCLUDE_JOB_LIST: continue job = job_summary[job_name] @@ -67,6 +71,7 @@ def main(): job_id = failure_job['job_id'] logs = gh.job_logs(token, job_id) if logs: + # using regex to extract failure information failed_tasks = re.findall(r"Execution failed for task ':(.*?)'.", logs) for failed_task in failed_tasks: file_log.write('\n'+failed_task) diff --git a/ci/workflow_summary/github.py b/ci/workflow_summary/github.py index b1c93b25f24..24f62d55661 100644 --- a/ci/workflow_summary/github.py +++ b/ci/workflow_summary/github.py @@ -58,5 +58,5 @@ def job_logs(self, token, job_id): stream=True, timeout=TIMEOUT_LONG) as get_log_response: return get_log_response.content.decode('utf-8') else: - print('no log avaliable') + logging.info('no log avaliable') return '' diff --git a/ci/workflow_summary/workflow_information.py b/ci/workflow_summary/workflow_information.py index 597a664d617..06082cafd07 100644 --- a/ci/workflow_summary/workflow_information.py +++ b/ci/workflow_summary/workflow_information.py @@ -96,23 +96,23 @@ def get_workflow_summary(gh, args): list_workflows_params['page'] = workflow_page workflows = gh.list_workflows(token, workflow_name, list_workflows_params) - if 'workflow_runs' in workflows and workflows['workflow_runs']: - for workflow in workflows['workflow_runs']: - if workflow['conclusion'] in ['success', 'failure']: - workflow_summary['workflow_runs'].append({'workflow_id': workflow['id'], 'conclusion': workflow['conclusion'], - 'head_branch': workflow['head_branch'], 'actor': workflow['actor']['login'], - 'created_at': workflow['created_at'], 'updated_at': workflow['updated_at'], - 'run_started_at': workflow['run_started_at'], 'run_attempt': workflow['run_attempt'], - 'html_url': workflow['html_url'], 'jobs_url': workflow['jobs_url'], - 'jobs': {'total_count': 0, 'success_count': 0, 'failure_count': 0, 'job_runs': []}}) - workflow_summary['total_count'] += 1 - if workflow['conclusion'] == 'success': - workflow_summary['success_count'] += 1 - else: - workflow_summary['failure_count'] += 1 - else: + if 'workflow_runs' not in workflows or not workflows['workflow_runs']: break + for workflow in workflows['workflow_runs']: + if workflow['conclusion'] in ['success', 'failure']: + workflow_summary['workflow_runs'].append({'workflow_id': workflow['id'], 'conclusion': workflow['conclusion'], + 'head_branch': workflow['head_branch'], 'actor': workflow['actor']['login'], + 'created_at': workflow['created_at'], 'updated_at': workflow['updated_at'], + 'run_started_at': workflow['run_started_at'], 'run_attempt': workflow['run_attempt'], + 'html_url': workflow['html_url'], 'jobs_url': workflow['jobs_url'], + 'jobs': {'total_count': 0, 'success_count': 0, 'failure_count': 0, 'job_runs': []}}) + workflow_summary['total_count'] += 1 + if workflow['conclusion'] == 'success': + workflow_summary['success_count'] += 1 + else: + workflow_summary['failure_count'] += 1 + logging.info('END collecting workflow run data\n') logging.info('START collecting job data by workflow run\n') @@ -129,21 +129,21 @@ def get_workflow_jobs(gh, args, workflow_run): job_page += 1 list_jobs_params = {'filter': args.jobs, 'per_page': 100, 'page': job_page} # per_page: max 100 jobs = gh.list_jobs(args.token, workflow_run['workflow_id'], list_jobs_params) - if 'jobs' in jobs and jobs['jobs']: - for job in jobs['jobs']: - workflow_jobs['job_runs'].append({'job_id': job['id'], 'job_name': job['name'], 'conclusion': job['conclusion'], - 'created_at': job['created_at'], 'started_at': job['started_at'], 'completed_at': job['completed_at'], - 'run_attempt': job['run_attempt'], 'html_url': job['html_url']}) - if job['conclusion'] in ['success', 'failure']: - workflow_jobs['total_count'] += 1 - if job['conclusion'] == 'success': - workflow_jobs['success_count'] += 1 - else: - workflow_jobs['failure_count'] += 1 if 'jobs' not in jobs or jobs['total_count'] < job_page * 100: break + for job in jobs['jobs']: + workflow_jobs['job_runs'].append({'job_id': job['id'], 'job_name': job['name'], 'conclusion': job['conclusion'], + 'created_at': job['created_at'], 'started_at': job['started_at'], 'completed_at': job['completed_at'], + 'run_attempt': job['run_attempt'], 'html_url': job['html_url']}) + if job['conclusion'] in ['success', 'failure']: + workflow_jobs['total_count'] += 1 + if job['conclusion'] == 'success': + workflow_jobs['success_count'] += 1 + else: + workflow_jobs['failure_count'] += 1 + def get_job_summary(workflow_summary): logging.info('START gathering job information by job name\n')