From c55bb6392f3a8998251b204c5cc99dade35cbfff Mon Sep 17 00:00:00 2001
From: Mou <sunmou@google.com>
Date: Thu, 2 Mar 2023 19:48:40 -0800
Subject: [PATCH 01/11] a tool that calculate workflow/job failure rate and a
 tool that collect ci_test failure logs

---
 ci/fireci/workflow_summary/README.md          | 171 ++++++++++++
 .../workflow_summary/collect_ci_test_logs.py  |  91 +++++++
 ci/fireci/workflow_summary/github.py          |  78 ++++++
 .../workflow_summary/workflow_information.py  | 247 ++++++++++++++++++
 4 files changed, 587 insertions(+)
 create mode 100644 ci/fireci/workflow_summary/README.md
 create mode 100644 ci/fireci/workflow_summary/collect_ci_test_logs.py
 create mode 100644 ci/fireci/workflow_summary/github.py
 create mode 100644 ci/fireci/workflow_summary/workflow_information.py

diff --git a/ci/fireci/workflow_summary/README.md b/ci/fireci/workflow_summary/README.md
new file mode 100644
index 00000000000..b5414a692a2
--- /dev/null
+++ b/ci/fireci/workflow_summary/README.md
@@ -0,0 +1,171 @@
+# `workflow_information.py` Script
+
+## Usage
+-   Collect last `90` days' `Postsubmit` `ci_workflow.yml` workflow runs:
+    ```
+    python workflow_information.py --token ${your_github_toke} --branch master --event push --d 90
+    ```
+
+-   Collect last `30` days' `Presubmit` `ci_workflow.yml` workflow runs:
+    ```
+    python workflow_information.py --token ${your_github_toke} --event pull_request --d 30
+    ```
+
+-   Please refer `Inputs` section for more use cases, and `Outputs` section for the workflow summary report format.
+
+## Inputs
+-  `-o, --repo_owner`: **[Required]** GitHub repo owner, default value is `firebase`.
+
+-  `-n, --repo_name`: **[Required]** GitHub repo name, default value is `firebase-android-sdk`.
+
+-  `-t, --token`: **[Required]** GitHub access token. See [Creating a personal access token](https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/creating-a-personal-access-token).
+
+-  `-w, --workflow_name`: **[Required]** Workflow filename, default value is `ci_tests.yml`.
+
+-  `-d, --days`: Filter workflows that running in past -d days, default value is `90`. See [retention period for GitHub Actions artifacts and logs](https://docs.github.com/en/organizations/managing-organization-settings/configuring-the-retention-period-for-github-actions-artifacts-and-logs-in-your-organization).
+
+-  `-b, --branch`: Filter branch name that workflows run against.
+
+-  `-a, --actor`: Filter the actor who triggers the workflow runs.
+
+-  `-e, --event`: Filter workflows trigger event, default is all events, could be one of the following values `['push', 'pull_request', 'issue']`.
+
+-  `-j, --jobs`: Filter workflows jobs, default is the last job and does not include all jobs (does not include rerun jobs), could be one of the following values `['latest', 'all']`.
+
+-  `-f, --folder`: Workflow and job information will be store here, default value is the current datatime.
+
+
+## Outputs
+
+-   `workflow_summary_report.txt`: a general report contains workflow pass/failure count, running time, etc.
+
+    ```
+    2023-03-03 01:37:07.114500
+    Namespace(actor=None, branch=None, days=30, event='pull_request', folder='presubmit_30', jobs='all', repo_name='firebase-android-sdk', repo_owner='firebase', token='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', workflow_name='ci_tests.yml')
+
+    Workflow 'ci_tests.yml' Report: 
+     Workflow Failure Rate:64.77% 
+     Workflow Total Count: 193 (success: 68, failure: 125)
+
+    Workflow Runtime Report:
+    161 workflow runs finished without rerun, the average running time: 0:27:24.745342
+    Including:
+     56 passed workflow runs, with average running time: 0:17:29.214286
+     105 failed workflow runs, with average running time: 0:32:42.361905
+
+    32 runs finished with rerun, the average running time: 1 day, 3:57:53.937500
+    The running time for each workflow reruns are:
+     ['1 day, 2:24:32', '3:35:54', '3:19:14', '4 days, 6:10:50', '15:33:39', '1:57:21', '1:13:12', '1:55:18', '12 days, 21:51:29', '0:48:48', '0:45:28', '1:40:21', '2 days, 1:46:35', '19:47:16', '0:45:49', '2:22:36', '0:25:22', '0:55:30', '1:40:32', '1:10:05', '20:08:38', '0:31:03', '5 days, 9:19:25', '5:10:44', '1:20:57', '0:28:47', '1:52:44', '20:19:17', '0:35:15', '21:31:07', '3 days, 1:06:44', '3 days, 2:18:14']
+
+    Job Failure Report:
+    Unit Tests (:firebase-storage):
+     Failure Rate:54.61%
+     Total Count: 152 (success: 69, failure: 83)
+    Unit Tests (:firebase-messaging):
+     Failure Rate:35.37%
+     Total Count: 147 (success: 95, failure: 52)
+    ```
+
+
+-   Intermediate file `workflow_summary.json`: contains all the workflow runs and job information attached to each workflow.
+
+    ```
+    {
+      'workflow_name':'ci_tests.yml',
+      'total_count':81,
+      'success_count':32,
+      'failure_count':49,
+      'created':'>2022-11-30T23:15:04Z',
+      'workflow_runs':[
+        {
+          'workflow_id':4296343867,
+          'conclusion':'failure',
+          'head_branch':'master',
+          'actor':'vkryachko',
+          'created_at':'2023-02-28T18:47:40Z',
+          'updated_at':'2023-02-28T19:20:16Z',
+          'run_started_at':'2023-02-28T18:47:40Z',
+          'run_attempt':1,
+          'html_url':'https://github.com/firebase/firebase-android-sdk/actions/runs/4296343867',
+          'jobs_url':'https://api.github.com/repos/firebase/firebase-android-sdk/actions/runs/4296343867/jobs',
+          'jobs':{
+            'total_count':95,
+            'success_count':92,
+            'failure_count':3,
+            'job_runs':[
+              {
+                'job_id':11664775180,
+                'job_name':'Determine changed modules',
+                'conclusion':'success',
+                'created_at':'2023-02-28T18:47:42Z',
+                'started_at':'2023-02-28T18:47:50Z',
+                'completed_at':'2023-02-28T18:50:11Z',
+                'run_attempt': 1, 
+                'html_url':'https://github.com/firebase/firebase-android-sdk/actions/runs/4296343867/jobs/7487936863',
+              }
+            ]
+          }
+        }
+      ]
+    }
+    ```
+
+-   Intermediate file `job_summary.json`: contains all the job runs organized by job name.
+    ```
+    {
+      'Unit Test Results':{   # job name
+        'total_count':17,
+        'success_count':7,
+        'failure_count':10,
+        'failure_jobs':[      # data structure is the same as same as workflow_summary['workflow_runs']['job_runs']
+          {
+            'job_id':11372664143,
+            'job_name':'Unit Test Results',
+            'conclusion':'failure',
+            'created_at':'2023-02-15T22:02:06Z',
+            'started_at':'2023-02-15T22:02:06Z',
+            'completed_at':'2023-02-15T22:02:06Z',
+            'run_attempt': 1, 
+            'html_url':'https://github.com/firebase/firebase-android-sdk/runs/11372664143',
+          }
+        ]
+      }
+    }
+    ```
+
+
+# `collect_ci_test_logs.py` Script
+
+## Usage
+-   Collect `ci_workflow.yml` job failure logs from `workflow_information.py` script's intermediate file:
+    ```
+    python collect_ci_test_logs.py --token ${github_toke} --folder ${folder}
+    ```
+
+## Inputs
+
+-  `-t, --token`: **[Required]** GitHub access token. See [Creating a personal access token](https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/creating-a-personal-access-token).
+
+-  `-f, --folder`: Folder that store intermediate files generated by `workflow_information.py`. `ci_workflow.yml` job failure logs will also be stored here.
+
+## Outputs
+
+-   `${job name}.log`: contains job failure rate, list all failed job links and failure logs.
+    ```
+    Unit Tests (:firebase-storage):
+    Failure rate:40.00% 
+    Total count: 20 (success: 12, failure: 8)
+    Failed jobs:
+
+    https://github.com/firebase/firebase-android-sdk/actions/runs/4296343867/jobs/7487989874
+    firebase-storage:testDebugUnitTest
+    Task :firebase-storage:testDebugUnitTest
+    2023-02-28T18:54:38.1333725Z 
+    2023-02-28T18:54:38.1334278Z com.google.firebase.storage.DownloadTest > streamDownloadWithResumeAndCancel FAILED
+    2023-02-28T18:54:38.1334918Z     org.junit.ComparisonFailure at DownloadTest.java:190
+    2023-02-28T18:57:20.3329130Z 
+    2023-02-28T18:57:20.3330165Z 112 tests completed, 1 failed
+    2023-02-28T18:57:20.5329189Z 
+    2023-02-28T18:57:20.5330505Z > Task :firebase-storage:testDebugUnitTest FAILED
+    ```
+
diff --git a/ci/fireci/workflow_summary/collect_ci_test_logs.py b/ci/fireci/workflow_summary/collect_ci_test_logs.py
new file mode 100644
index 00000000000..1c1ca2293ef
--- /dev/null
+++ b/ci/fireci/workflow_summary/collect_ci_test_logs.py
@@ -0,0 +1,91 @@
+# -*- coding: utf-8 -*-
+# Copyright 2023 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the 'License');
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an 'AS IS' BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+'''A utility collecting ci_test.yml workflow failure logs.
+
+Usage:
+
+  python collect_ci_test_logs.py --token ${github_toke} --folder ${folder}
+
+'''
+
+import argparse
+import github
+import json
+import re
+import logging
+import os
+import datetime
+
+def main():
+  logging.getLogger().setLevel(logging.INFO)
+
+  args = parse_cmdline_args()
+  github.set_api_url('firebase', 'firebase-android-sdk')
+
+  token = args.token
+
+  file_folder = args.folder
+  if not os.path.exists(file_folder):
+    logging.error('%s doesn\'t exist' % file_folder)
+    exit(1)
+
+  job_summary = json.load(open(os.path.join(file_folder, 'job_summary.json')))
+
+  for job_name in job_summary:
+    if job_name in ['Determine changed modules','Unit Tests (matrix)','Publish Tests Results','Unit Test Results','Instrumentation Tests','Unit Tests']:
+      continue
+
+    job = job_summary[job_name]
+
+    if job['failure_rate'] > 0:
+      failure_rate = job['failure_rate']  
+      total_count = job['total_count'] 
+      success_count = job['success_count'] 
+      failure_count = job['failure_count']      
+
+      log_file_path = os.path.join(file_folder, '%s.log'%job_name)
+      file_log = open(log_file_path, 'w')
+      file_log.write('\n{}:\nFailure rate:{:.2%} \nTotal count: {} (success: {}, failure: {})\nFailed jobs:'.format(job_name, failure_rate, total_count, success_count, failure_count))
+      logging.info('\n\n{}:\nFailure rate:{:.2%} \nTotal count: {} (success: {}, failure: {})\nFailed jobs:'.format(job_name, failure_rate, total_count, success_count, failure_count))
+      
+      for failure_job in job['failure_jobs']:
+        file_log.write('\n\n'+failure_job['html_url'])
+        logging.info(failure_job['html_url'])
+        job_id = failure_job['job_id']
+        logs = github.job_logs(token, job_id)
+        if logs:
+          failed_tasks = re.findall(r"Execution failed for task ':(.*?)'.", logs)
+          for failed_task in failed_tasks:
+            file_log.write('\n'+failed_task)
+            pattern = fr'Task :{failed_task}(.*?)Task :{failed_task} FAILED'
+            failed_tests = re.search(pattern, logs, re.MULTILINE | re.DOTALL)
+            if failed_tests:
+              file_log.write('\n'+failed_tests.group())
+
+    file_log.close()
+
+  logging.info('\n\nFinsihed collecting failure logs, log files locates under path: %s' % file_folder)
+      
+
+def parse_cmdline_args():
+  parser = argparse.ArgumentParser(description='Collect certain Github workflow information and calculate failure rate.')
+  parser.add_argument('-t', '--token', required=True, help='GitHub access token')
+  parser.add_argument('-f', '--folder', required=True, help='Folder generated by workflow_information.py. Test logs also locate here.')
+  args = parser.parse_args()
+  return args
+
+if __name__ == '__main__':
+  main()
\ No newline at end of file
diff --git a/ci/fireci/workflow_summary/github.py b/ci/fireci/workflow_summary/github.py
new file mode 100644
index 00000000000..9e7179ef381
--- /dev/null
+++ b/ci/fireci/workflow_summary/github.py
@@ -0,0 +1,78 @@
+# Copyright 2023 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""A utility for GitHub REST API."""
+
+import requests
+import json
+import shutil
+import re
+import logging
+
+RETRIES = 3
+BACKOFF = 5
+RETRY_STATUS = (403, 500, 502, 504)
+TIMEOUT = 5
+TIMEOUT_LONG = 20
+
+OWNER = ''
+REPO = ''
+BASE_URL = 'https://api.github.com'
+GITHUB_API_URL = '%s/repos/%s/%s' % (BASE_URL, OWNER, REPO)
+
+
+def set_api_url(owner, repo): 
+  if owner and repo:
+    global OWNER, REPO, GITHUB_API_URL
+    OWNER = owner
+    REPO = repo
+    GITHUB_API_URL = '%s/repos/%s/%s' % (BASE_URL, OWNER, REPO)
+    logging.info('GITHUB_API_URL been set: %s' % GITHUB_API_URL)
+    return True
+  else:
+    return False
+
+
+def list_workflows(token, workflow_id, params):
+  """https://docs.github.com/en/rest/actions/workflow-runs#list-workflow-runs-for-a-workflow"""
+  url = f'{GITHUB_API_URL}/actions/workflows/{workflow_id}/runs'
+  headers = {'Accept': 'application/vnd.github+json', 'Authorization': f'token {token}'}
+  with requests.get(url, headers=headers, params=params,
+                    stream=True, timeout=TIMEOUT_LONG) as response:
+    logging.info("list_workflows: %s, params: %s, response: %s", url, params, response)
+    return response.json()
+
+def list_jobs(token, run_id, params):
+  """https://docs.github.com/en/rest/actions/workflow-jobs#list-jobs-for-a-workflow-run"""
+  url = f'{GITHUB_API_URL}/actions/runs/{run_id}/jobs'
+  headers = {'Accept': 'application/vnd.github+json', 'Authorization': f'token {token}'}
+  with requests.get(url, headers=headers, params=params,
+                    stream=True, timeout=TIMEOUT_LONG) as response:
+    logging.info("list_jobs: %s, params: %s, response: %s", url, params, response)
+    return response.json()
+
+def job_logs(token, job_id):
+  """https://docs.github.com/rest/reference/actions#download-job-logs-for-a-workflow-run"""
+  url = f'{GITHUB_API_URL}/actions/jobs/{job_id}/logs'
+  headers = {'Accept': 'application/vnd.github+json', 'Authorization': f'token {token}'}
+  with requests.get(url, headers=headers, allow_redirects=False,
+                    stream=True, timeout=TIMEOUT_LONG) as response:
+    logging.info("job_logs: %s response: %s", url, response)
+    if response.status_code == 302:
+      with requests.get(response.headers['Location'], headers=headers, allow_redirects=False,
+                        stream=True, timeout=TIMEOUT_LONG) as get_log_response:
+        return get_log_response.content.decode('utf-8')
+    else:
+      print('no log avaliable')
+      return ''
diff --git a/ci/fireci/workflow_summary/workflow_information.py b/ci/fireci/workflow_summary/workflow_information.py
new file mode 100644
index 00000000000..a5203dac7f5
--- /dev/null
+++ b/ci/fireci/workflow_summary/workflow_information.py
@@ -0,0 +1,247 @@
+# Copyright 2023 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the 'License');
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an 'AS IS' BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import github
+import json
+import datetime
+import argparse
+import logging
+import os 
+import threading
+
+
+'''A utility collecting ci_test.yml workflow failure logs.
+
+Usage:
+
+  python workflow_information.py --token ${github_toke} --workflow_name ${workflow_name}
+
+'''
+
+def main():
+  logging.getLogger().setLevel(logging.INFO)
+
+  args = parse_cmdline_args()
+  logging.info(args)
+
+  github.set_api_url(args.repo_owner, args.repo_name)
+
+  # location for all artifacts
+  if args.folder:
+    file_folder = os.path.normpath(args.folder)
+  else:
+    file_folder = os.path.normpath(datetime.datetime.utcnow().strftime('%Y-%m-%d+%H:%M:%S'))
+  if not os.path.exists(file_folder):
+    os.makedirs(file_folder)
+
+  workflow_summary = get_workflow_summary(args)
+  json.dump(workflow_summary, open(os.path.join(file_folder, 'workflow_summary.json'),'w'))
+  logging.info('Workflow summary has been write to %s\n' % os.path.join(file_folder, 'workflow_summary.json'))
+
+  job_summary = get_job_summary(workflow_summary)
+  json.dump(job_summary, open(os.path.join(file_folder, 'job_summary.json'),'w'))
+  logging.info('Job summary has been write to %s\n' % os.path.join(file_folder, 'job_summary.json'))
+
+  workflow_summary_report = f"{datetime.datetime.utcnow()}\n{args}\n\n"
+  workflow_summary_report += generate_summary_report(workflow_summary, job_summary)
+  open(os.path.join(file_folder, 'workflow_summary_report.txt'), 'w').write(workflow_summary_report)
+  logging.info('Workflow summary report has been write to %s\n' % os.path.join(file_folder, 'workflow_summary_report.txt'))
+
+
+def get_workflow_summary(args):  
+  token = args.token
+  workflow_name = args.workflow_name
+  # https://docs.github.com/en/search-github/getting-started-with-searching-on-github/understanding-the-search-syntax#query-for-dates
+  days = args.days
+  current_datetime = datetime.datetime.utcnow()
+  since_datetime = current_datetime - datetime.timedelta(days=days)
+  created = '>' + since_datetime.strftime('%Y-%m-%dT%H:%M:%SZ')
+
+  workflow_summary = {'workflow_name': workflow_name, 
+                    'total_count': 0, 
+                    'success_count': 0, 
+                    'failure_count': 0, 
+                    'created': created,
+                    'workflow_runs': []}
+
+  logging.info('START collecting workflow run data\n')
+  workflow_page = 0
+  per_page = 100 # max 100
+  list_workflows_params = {'status': 'completed', 'created': created, 'page': workflow_page, 'per_page': per_page}
+  if args.event:
+    list_workflows_params['event'] = args.event
+  if args.actor:
+    list_workflows_params['actor'] = args.actor
+  if args.branch:
+    list_workflows_params['branch'] = args.branch
+
+  request_workflow_list = True
+  while request_workflow_list:
+    workflow_page += 1
+    list_workflows_params['page'] = workflow_page
+    workflows = github.list_workflows(token, workflow_name, list_workflows_params)
+
+    if 'workflow_runs' in workflows and workflows['workflow_runs']:
+      for workflow in workflows['workflow_runs']:
+        if workflow['conclusion'] in ['success', 'failure']:
+          workflow_summary['workflow_runs'].append({'workflow_id': workflow['id'], 'conclusion': workflow['conclusion'],
+                                                    'head_branch': workflow['head_branch'], 'actor': workflow['actor']['login'], 
+                                                    'created_at': workflow['created_at'], 'updated_at': workflow['updated_at'], 
+                                                    'run_started_at': workflow['run_started_at'], 'run_attempt': workflow['run_attempt'], 
+                                                    'html_url': workflow['html_url'], 'jobs_url': workflow['jobs_url'], 
+                                                    'jobs': {'total_count': 0, 'success_count': 0, 'failure_count': 0,  'job_runs': []}})
+          workflow_summary['total_count']  += 1
+          if workflow['conclusion'] == 'success':
+            workflow_summary['success_count'] += 1 
+          else: 
+            workflow_summary['failure_count'] += 1
+    else:
+      request_workflow_list = False
+
+  logging.info('END collecting workflow run data\n')
+
+  logging.info('START collecting job data by workflow run\n')
+  for workflow_run in workflow_summary['workflow_runs']:
+    get_workflow_jobs(args, workflow_run)
+  logging.info('END collecting job data by workflow run\n')
+
+  return workflow_summary
+
+def get_workflow_jobs(args, workflow_run):
+  workflow_jobs = workflow_run['jobs']
+  job_page = 0
+  request_job_list = True
+  while request_job_list:
+    job_page += 1
+    list_jobs_params = {'filter': args.jobs, 'per_page': 100, 'page': job_page} # per_page: max 100
+    jobs = github.list_jobs(args.token, workflow_run['workflow_id'], list_jobs_params)
+    if 'jobs' in jobs and jobs['jobs']:
+      for job in jobs['jobs']:
+        workflow_jobs['job_runs'].append({'job_id': job['id'], 'job_name': job['name'], 'conclusion': job['conclusion'], 
+                                          'created_at': job['created_at'], 'started_at': job['started_at'], 'completed_at': job['completed_at'],
+                                          'run_attempt': job['run_attempt'], 'html_url': job['html_url']})
+        if job['conclusion'] in ['success', 'failure']:
+          workflow_jobs['total_count'] += 1
+        if job['conclusion'] == 'success':
+          workflow_jobs['success_count'] += 1 
+        else: 
+          workflow_jobs['failure_count'] += 1
+
+    if 'jobs' not in jobs or jobs['total_count'] < job_page * 100:
+      request_job_list = False
+
+
+def get_job_summary(workflow_summary):
+  logging.info('START gathering job information by job name\n')
+  job_summary = {}
+  for workflow_run in workflow_summary['workflow_runs']:
+    for job_run in workflow_run['jobs']['job_runs']:
+      job_name = job_run['job_name']
+      if job_name not in job_summary:
+        job_summary[job_name] = {'total_count': 0, 
+                                 'success_count': 0, 
+                                 'failure_count': 0, 
+                                 'failure_jobs': []}
+
+      job = job_summary[job_name]  
+      job['total_count'] += 1
+      if job_run['conclusion'] == 'success':
+        job['success_count'] += 1
+      else:
+        job['failure_count'] += 1
+        job['failure_jobs'].append(job_run)
+
+  for job_name in job_summary:
+    total_count = job_summary[job_name]['total_count'] 
+    failure_count = job_summary[job_name]['failure_count']    
+    job_summary[job_name]['failure_rate'] = float(failure_count/total_count)
+
+  job_summary=dict(sorted(job_summary.items(), key=lambda item: item[1]['failure_rate'], reverse=True))
+
+  logging.info('END gathering job information by job name\n')
+  return job_summary
+
+
+def generate_summary_report(workflow_summary, job_summary):
+  report_content = ''
+
+  workflow_name = workflow_summary['workflow_name']
+  total_count = workflow_summary['total_count'] 
+  success_count = workflow_summary['success_count'] 
+  failure_count = workflow_summary['failure_count'] 
+  failure_rate = float(failure_count/total_count)
+  report_content += f"Workflow '{workflow_name}' Report: \n Workflow Failure Rate:{failure_rate:.2%} \n Workflow Total Count: {total_count} (success: {success_count}, failure: {failure_count})\n\n"
+
+  report_content += workflow_runtime_report(workflow_summary)
+
+  report_content += 'Job Failure Report:\n'
+  for job_name in job_summary:
+    job = job_summary[job_name]
+    if job['failure_rate'] > 0:
+      report_content += f"{job_name}:\n Failure Rate:{job['failure_rate']:.2%}\n Total Count: {job['total_count']} (success: {job['success_count']}, failure: {job['failure_count']})\n"
+  
+  logging.info(report_content)
+  return report_content
+      
+
+def workflow_runtime_report(workflow_summary):     
+  for workflow in workflow_summary['workflow_runs']:
+    created_at = datetime.datetime.strptime(workflow['created_at'], '%Y-%m-%dT%H:%M:%SZ')
+    updated_at = datetime.datetime.strptime(workflow['updated_at'], '%Y-%m-%dT%H:%M:%SZ')
+    workflow['runtime'] = (updated_at - created_at).total_seconds()
+    
+  success_without_rerun = [w['runtime'] for w in workflow_summary['workflow_runs'] if w['run_attempt'] == 1 and w['conclusion'] == 'success']
+  failure_without_rerun = [w['runtime'] for w in workflow_summary['workflow_runs'] if w['run_attempt'] == 1 and w['conclusion'] == 'failure']
+  without_rerun = success_without_rerun + failure_without_rerun
+  with_rerun = [w['runtime'] for w in workflow_summary['workflow_runs'] if w['run_attempt'] > 1]
+
+  runtime_report = 'Workflow Runtime Report:\n'
+  if without_rerun:
+    runtime_report += f"{len(without_rerun)} workflow runs finished without rerun, the average running time: {datetime.timedelta(seconds=sum(without_rerun)/len(without_rerun))}\n"
+    runtime_report += 'Including:\n'
+    if success_without_rerun:
+      runtime_report += f" {len(success_without_rerun)} passed workflow runs, with average running time: {datetime.timedelta(seconds=sum(success_without_rerun)/len(success_without_rerun))}\n"
+    if failure_without_rerun:
+      runtime_report += f" {len(failure_without_rerun)} failed workflow runs, with average running time: {datetime.timedelta(seconds=sum(failure_without_rerun)/len(failure_without_rerun))}\n\n"
+
+  if with_rerun:
+    runtime_report += f"{len(with_rerun)} runs finished with rerun, the average running time: {datetime.timedelta(seconds=sum(with_rerun)/len(with_rerun))}\n"
+    runtime_report += f"The running time for each workflow reruns are:\n {[str(datetime.timedelta(seconds=x)) for x in with_rerun]}\n\n"
+
+  return runtime_report
+
+
+def parse_cmdline_args():
+  parser = argparse.ArgumentParser(description='Collect certain Github workflow information and calculate failure rate.')
+  parser.add_argument('-o', '--repo_owner', default='firebase', help='GitHub repo owner')
+  parser.add_argument('-n', '--repo_name', default='firebase-android-sdk', help='GitHub repo name')
+  parser.add_argument('-t', '--token', required=True, help='GitHub access token')
+
+  parser.add_argument('-w', '--workflow_name', default='ci_tests.yml', help='Workflow filename to run')
+  # By default, the artifacts and log files generated by workflows are retained for 90 days before they are automatically deleted.
+  # https://docs.github.com/en/organizations/managing-organization-settings/configuring-the-retention-period-for-github-actions-artifacts-and-logs-in-your-organization
+  parser.add_argument('-d', '--days', type=int, default=90, help='Filter workflows that running in past -d days')
+  parser.add_argument('-b', '--branch', help='Filter branch name that workflows run against, default is all branches')
+  parser.add_argument('-a', '--actor', help='Filter someone\'s workflow runs, default is actors')
+  parser.add_argument('-e', '--event', choices=['push', 'pull_request', 'issue'], help='Filter workflows trigger event, default is all events')
+  parser.add_argument('-j', '--jobs', default='all', choices=['latest', 'all'], help='Filter workflows jobs, default is the last job and does not include all jobs (does not include rerun jobs)')
+
+  parser.add_argument('-f', '--folder', help='Workflow and job information will be store here, default is current datatime')
+
+  args = parser.parse_args()
+  return args
+
+
+if __name__ == '__main__':
+  main()
\ No newline at end of file

From 1921f7e53ebe4643fa3aa8553891203f2e28ae98 Mon Sep 17 00:00:00 2001
From: Mou <sunmou@google.com>
Date: Thu, 2 Mar 2023 20:26:07 -0800
Subject: [PATCH 02/11] fix typo

---
 ci/fireci/workflow_summary/README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ci/fireci/workflow_summary/README.md b/ci/fireci/workflow_summary/README.md
index b5414a692a2..a7abd71722f 100644
--- a/ci/fireci/workflow_summary/README.md
+++ b/ci/fireci/workflow_summary/README.md
@@ -11,7 +11,7 @@
     python workflow_information.py --token ${your_github_toke} --event pull_request --d 30
     ```
 
--   Please refer `Inputs` section for more use cases, and `Outputs` section for the workflow summary report format.
+-   Please refer to `Inputs` section for more use cases, and `Outputs` section for the workflow summary report format.
 
 ## Inputs
 -  `-o, --repo_owner`: **[Required]** GitHub repo owner, default value is `firebase`.
@@ -41,7 +41,7 @@
 
     ```
     2023-03-03 01:37:07.114500
-    Namespace(actor=None, branch=None, days=30, event='pull_request', folder='presubmit_30', jobs='all', repo_name='firebase-android-sdk', repo_owner='firebase', token='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', workflow_name='ci_tests.yml')
+    Namespace(actor=None, branch=None, days=30, event='pull_request', folder='presubmit_30', jobs='all', repo_name='firebase-android-sdk', repo_owner='firebase', token=${your_github_token}, workflow_name='ci_tests.yml')
 
     Workflow 'ci_tests.yml' Report: 
      Workflow Failure Rate:64.77% 

From 84254ee94ca79848c961200e32e5853a699db4ae Mon Sep 17 00:00:00 2001
From: Mou <sunmou@google.com>
Date: Fri, 3 Mar 2023 10:39:36 -0800
Subject: [PATCH 03/11] update Readme

---
 ci/fireci/workflow_summary/README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ci/fireci/workflow_summary/README.md b/ci/fireci/workflow_summary/README.md
index a7abd71722f..d0e2e0c9335 100644
--- a/ci/fireci/workflow_summary/README.md
+++ b/ci/fireci/workflow_summary/README.md
@@ -28,9 +28,9 @@
 
 -  `-a, --actor`: Filter the actor who triggers the workflow runs.
 
--  `-e, --event`: Filter workflows trigger event, default is all events, could be one of the following values `['push', 'pull_request', 'issue']`.
+-  `-e, --event`: Filter workflows trigger event, could be one of the following values `['push', 'pull_request', 'issue']`.
 
--  `-j, --jobs`: Filter workflows jobs, default is the last job and does not include all jobs (does not include rerun jobs), could be one of the following values `['latest', 'all']`.
+-  `-j, --jobs`: Filter workflows jobs, default is `all` (including rerun jobs), could be one of the following values `['latest', 'all']`.
 
 -  `-f, --folder`: Workflow and job information will be store here, default value is the current datatime.
 

From 35420a97dc5de648bc1d483c43758d665fae9272 Mon Sep 17 00:00:00 2001
From: Mou <sunmou@google.com>
Date: Fri, 3 Mar 2023 10:41:07 -0800
Subject: [PATCH 04/11] update Readme

---
 ci/fireci/workflow_summary/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ci/fireci/workflow_summary/README.md b/ci/fireci/workflow_summary/README.md
index d0e2e0c9335..adc540403af 100644
--- a/ci/fireci/workflow_summary/README.md
+++ b/ci/fireci/workflow_summary/README.md
@@ -146,7 +146,7 @@
 
 -  `-t, --token`: **[Required]** GitHub access token. See [Creating a personal access token](https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/creating-a-personal-access-token).
 
--  `-f, --folder`: Folder that store intermediate files generated by `workflow_information.py`. `ci_workflow.yml` job failure logs will also be stored here.
+-  `-f, --folder`: **[Required]** Folder that store intermediate files generated by `workflow_information.py`. `ci_workflow.yml` job failure logs will also be stored here.
 
 ## Outputs
 

From e2b8343aa7ba8682023699061150eb910fdb2094 Mon Sep 17 00:00:00 2001
From: Mou <sunmou@google.com>
Date: Fri, 3 Mar 2023 11:10:54 -0800
Subject: [PATCH 05/11] update Readme

---
 ci/fireci/workflow_summary/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ci/fireci/workflow_summary/README.md b/ci/fireci/workflow_summary/README.md
index adc540403af..1c28e4d536a 100644
--- a/ci/fireci/workflow_summary/README.md
+++ b/ci/fireci/workflow_summary/README.md
@@ -137,7 +137,7 @@
 # `collect_ci_test_logs.py` Script
 
 ## Usage
--   Collect `ci_workflow.yml` job failure logs from `workflow_information.py` script's intermediate file:
+-   Collect `ci_test.yml` job failure logs from `workflow_information.py` script's intermediate file:
     ```
     python collect_ci_test_logs.py --token ${github_toke} --folder ${folder}
     ```

From 94f02497d53532ca6b945f92938c3978d86e51a9 Mon Sep 17 00:00:00 2001
From: Mou <sunmou@google.com>
Date: Fri, 3 Mar 2023 19:24:40 -0800
Subject: [PATCH 06/11] update format string based on comment

---
 .../workflow_summary/collect_ci_test_logs.py      | 12 ++++++------
 .../workflow_summary/workflow_information.py      | 15 +++++++++------
 2 files changed, 15 insertions(+), 12 deletions(-)

diff --git a/ci/fireci/workflow_summary/collect_ci_test_logs.py b/ci/fireci/workflow_summary/collect_ci_test_logs.py
index 1c1ca2293ef..a62ac81dbb7 100644
--- a/ci/fireci/workflow_summary/collect_ci_test_logs.py
+++ b/ci/fireci/workflow_summary/collect_ci_test_logs.py
@@ -39,7 +39,7 @@ def main():
 
   file_folder = args.folder
   if not os.path.exists(file_folder):
-    logging.error('%s doesn\'t exist' % file_folder)
+    logging.error(f'{file_folder} doesn\'t exist')
     exit(1)
 
   job_summary = json.load(open(os.path.join(file_folder, 'job_summary.json')))
@@ -56,11 +56,11 @@ def main():
       success_count = job['success_count'] 
       failure_count = job['failure_count']      
 
-      log_file_path = os.path.join(file_folder, '%s.log'%job_name)
+      log_file_path = os.path.join(file_folder, f'{job_name}.log')
       file_log = open(log_file_path, 'w')
-      file_log.write('\n{}:\nFailure rate:{:.2%} \nTotal count: {} (success: {}, failure: {})\nFailed jobs:'.format(job_name, failure_rate, total_count, success_count, failure_count))
-      logging.info('\n\n{}:\nFailure rate:{:.2%} \nTotal count: {} (success: {}, failure: {})\nFailed jobs:'.format(job_name, failure_rate, total_count, success_count, failure_count))
-      
+      file_log.write(f'\n{job_name}:\nFailure rate:{failure_rate:.2%} \nTotal count: {total_count} (success: {success_count}, failure: {failure_count})\nFailed jobs:')
+      logging.info(f'\n\n{job_name}:\nFailure rate:{failure_rate:.2%} \nTotal count: {total_count} (success: {success_count}, failure: {failure_count})\nFailed jobs:')
+
       for failure_job in job['failure_jobs']:
         file_log.write('\n\n'+failure_job['html_url'])
         logging.info(failure_job['html_url'])
@@ -77,7 +77,7 @@ def main():
 
     file_log.close()
 
-  logging.info('\n\nFinsihed collecting failure logs, log files locates under path: %s' % file_folder)
+  logging.info(f'\n\nFinsihed collecting failure logs, log files locates under path: {file_folder}')
       
 
 def parse_cmdline_args():
diff --git a/ci/fireci/workflow_summary/workflow_information.py b/ci/fireci/workflow_summary/workflow_information.py
index a5203dac7f5..dc64f7a441c 100644
--- a/ci/fireci/workflow_summary/workflow_information.py
+++ b/ci/fireci/workflow_summary/workflow_information.py
@@ -46,17 +46,20 @@ def main():
     os.makedirs(file_folder)
 
   workflow_summary = get_workflow_summary(args)
-  json.dump(workflow_summary, open(os.path.join(file_folder, 'workflow_summary.json'),'w'))
-  logging.info('Workflow summary has been write to %s\n' % os.path.join(file_folder, 'workflow_summary.json'))
+  workflow_summary_file_path = os.path.join(file_folder, 'workflow_summary.json')
+  json.dump(workflow_summary, open(workflow_summary_file_path,'w'))
+  logging.info(f'Workflow summary has been write to {workflow_summary_file_path}\n')
 
   job_summary = get_job_summary(workflow_summary)
-  json.dump(job_summary, open(os.path.join(file_folder, 'job_summary.json'),'w'))
-  logging.info('Job summary has been write to %s\n' % os.path.join(file_folder, 'job_summary.json'))
+  job_summary_file_path = os.path.join(file_folder, 'job_summary.json')
+  json.dump(job_summary, open(job_summary_file_path,'w'))
+  logging.info(f'Job summary has been write to {job_summary_file_path}\n')
 
   workflow_summary_report = f"{datetime.datetime.utcnow()}\n{args}\n\n"
   workflow_summary_report += generate_summary_report(workflow_summary, job_summary)
-  open(os.path.join(file_folder, 'workflow_summary_report.txt'), 'w').write(workflow_summary_report)
-  logging.info('Workflow summary report has been write to %s\n' % os.path.join(file_folder, 'workflow_summary_report.txt'))
+  report_file_path = os.path.join(file_folder, 'workflow_summary_report.txt')
+  open(report_file_path, 'w').write(workflow_summary_report)
+  logging.info(f'Workflow summary report has been write to {report_file_path}\n')
 
 
 def get_workflow_summary(args):  

From 046852ea85266bca96ba935a0e4d493dc5d5a90c Mon Sep 17 00:00:00 2001
From: Mou <sunmou@google.com>
Date: Mon, 6 Mar 2023 10:34:36 -0800
Subject: [PATCH 07/11] change the file path

---
 ci/{fireci => }/workflow_summary/README.md               | 6 ++++++
 ci/{fireci => }/workflow_summary/collect_ci_test_logs.py | 6 +++---
 ci/{fireci => }/workflow_summary/github.py               | 3 ---
 ci/{fireci => }/workflow_summary/workflow_information.py | 4 ++--
 4 files changed, 11 insertions(+), 8 deletions(-)
 rename ci/{fireci => }/workflow_summary/README.md (98%)
 rename ci/{fireci => }/workflow_summary/collect_ci_test_logs.py (99%)
 rename ci/{fireci => }/workflow_summary/github.py (98%)
 rename ci/{fireci => }/workflow_summary/workflow_information.py (99%)

diff --git a/ci/fireci/workflow_summary/README.md b/ci/workflow_summary/README.md
similarity index 98%
rename from ci/fireci/workflow_summary/README.md
rename to ci/workflow_summary/README.md
index 1c28e4d536a..d5452925476 100644
--- a/ci/fireci/workflow_summary/README.md
+++ b/ci/workflow_summary/README.md
@@ -1,5 +1,11 @@
 # `workflow_information.py` Script
 
+## Prerequisites
+-   [Python](https://www.python.org/) and required packages.
+    ```
+    pip install requests argparse
+    ```
+
 ## Usage
 -   Collect last `90` days' `Postsubmit` `ci_workflow.yml` workflow runs:
     ```
diff --git a/ci/fireci/workflow_summary/collect_ci_test_logs.py b/ci/workflow_summary/collect_ci_test_logs.py
similarity index 99%
rename from ci/fireci/workflow_summary/collect_ci_test_logs.py
rename to ci/workflow_summary/collect_ci_test_logs.py
index a62ac81dbb7..927b6662e8c 100644
--- a/ci/fireci/workflow_summary/collect_ci_test_logs.py
+++ b/ci/workflow_summary/collect_ci_test_logs.py
@@ -21,13 +21,13 @@
 
 '''
 
-import argparse
 import github
+import argparse
 import json
 import re
 import logging
 import os
-import datetime
+
 
 def main():
   logging.getLogger().setLevel(logging.INFO)
@@ -88,4 +88,4 @@ def parse_cmdline_args():
   return args
 
 if __name__ == '__main__':
-  main()
\ No newline at end of file
+  main()
diff --git a/ci/fireci/workflow_summary/github.py b/ci/workflow_summary/github.py
similarity index 98%
rename from ci/fireci/workflow_summary/github.py
rename to ci/workflow_summary/github.py
index 9e7179ef381..18f0efc260f 100644
--- a/ci/fireci/workflow_summary/github.py
+++ b/ci/workflow_summary/github.py
@@ -15,9 +15,6 @@
 """A utility for GitHub REST API."""
 
 import requests
-import json
-import shutil
-import re
 import logging
 
 RETRIES = 3
diff --git a/ci/fireci/workflow_summary/workflow_information.py b/ci/workflow_summary/workflow_information.py
similarity index 99%
rename from ci/fireci/workflow_summary/workflow_information.py
rename to ci/workflow_summary/workflow_information.py
index dc64f7a441c..585302e6196 100644
--- a/ci/fireci/workflow_summary/workflow_information.py
+++ b/ci/workflow_summary/workflow_information.py
@@ -18,7 +18,6 @@
 import argparse
 import logging
 import os 
-import threading
 
 
 '''A utility collecting ci_test.yml workflow failure logs.
@@ -247,4 +246,5 @@ def parse_cmdline_args():
 
 
 if __name__ == '__main__':
-  main()
\ No newline at end of file
+  main()
+  
\ No newline at end of file

From 2becbe932ac0cc80f560f92640fd85a7dc17d9ba Mon Sep 17 00:00:00 2001
From: Mou <sunmou@google.com>
Date: Mon, 6 Mar 2023 11:39:45 -0800
Subject: [PATCH 08/11] remove space

---
 ci/workflow_summary/README.md               | 1 -
 ci/workflow_summary/workflow_information.py | 1 -
 2 files changed, 2 deletions(-)

diff --git a/ci/workflow_summary/README.md b/ci/workflow_summary/README.md
index d5452925476..c60558db9d2 100644
--- a/ci/workflow_summary/README.md
+++ b/ci/workflow_summary/README.md
@@ -174,4 +174,3 @@
     2023-02-28T18:57:20.5329189Z 
     2023-02-28T18:57:20.5330505Z > Task :firebase-storage:testDebugUnitTest FAILED
     ```
-
diff --git a/ci/workflow_summary/workflow_information.py b/ci/workflow_summary/workflow_information.py
index 585302e6196..e4899303b9c 100644
--- a/ci/workflow_summary/workflow_information.py
+++ b/ci/workflow_summary/workflow_information.py
@@ -247,4 +247,3 @@ def parse_cmdline_args():
 
 if __name__ == '__main__':
   main()
-  
\ No newline at end of file

From 1c3080a685edce27fc1b19f9a885fd077c1d000e Mon Sep 17 00:00:00 2001
From: Mou <sunmou@google.com>
Date: Tue, 7 Mar 2023 15:54:25 -0800
Subject: [PATCH 09/11] update based on comments

---
 ci/workflow_summary/github.py               |  6 +++---
 ci/workflow_summary/workflow_information.py | 19 ++++++++++---------
 2 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/ci/workflow_summary/github.py b/ci/workflow_summary/github.py
index 18f0efc260f..6f20a97df32 100644
--- a/ci/workflow_summary/github.py
+++ b/ci/workflow_summary/github.py
@@ -47,7 +47,7 @@ def list_workflows(token, workflow_id, params):
   headers = {'Accept': 'application/vnd.github+json', 'Authorization': f'token {token}'}
   with requests.get(url, headers=headers, params=params,
                     stream=True, timeout=TIMEOUT_LONG) as response:
-    logging.info("list_workflows: %s, params: %s, response: %s", url, params, response)
+    logging.info('list_workflows: %s, params: %s, response: %s', url, params, response)
     return response.json()
 
 def list_jobs(token, run_id, params):
@@ -56,7 +56,7 @@ def list_jobs(token, run_id, params):
   headers = {'Accept': 'application/vnd.github+json', 'Authorization': f'token {token}'}
   with requests.get(url, headers=headers, params=params,
                     stream=True, timeout=TIMEOUT_LONG) as response:
-    logging.info("list_jobs: %s, params: %s, response: %s", url, params, response)
+    logging.info('list_jobs: %s, params: %s, response: %s', url, params, response)
     return response.json()
 
 def job_logs(token, job_id):
@@ -65,7 +65,7 @@ def job_logs(token, job_id):
   headers = {'Accept': 'application/vnd.github+json', 'Authorization': f'token {token}'}
   with requests.get(url, headers=headers, allow_redirects=False,
                     stream=True, timeout=TIMEOUT_LONG) as response:
-    logging.info("job_logs: %s response: %s", url, response)
+    logging.info('job_logs: %s response: %s', url, response)
     if response.status_code == 302:
       with requests.get(response.headers['Location'], headers=headers, allow_redirects=False,
                         stream=True, timeout=TIMEOUT_LONG) as get_log_response:
diff --git a/ci/workflow_summary/workflow_information.py b/ci/workflow_summary/workflow_information.py
index e4899303b9c..445547906c5 100644
--- a/ci/workflow_summary/workflow_information.py
+++ b/ci/workflow_summary/workflow_information.py
@@ -46,18 +46,21 @@ def main():
 
   workflow_summary = get_workflow_summary(args)
   workflow_summary_file_path = os.path.join(file_folder, 'workflow_summary.json')
-  json.dump(workflow_summary, open(workflow_summary_file_path,'w'))
+  with open(workflow_summary_file_path, 'w') as f:
+    json.dump(workflow_summary, f)
   logging.info(f'Workflow summary has been write to {workflow_summary_file_path}\n')
 
   job_summary = get_job_summary(workflow_summary)
   job_summary_file_path = os.path.join(file_folder, 'job_summary.json')
-  json.dump(job_summary, open(job_summary_file_path,'w'))
+  with open(job_summary_file_path, 'w') as f:
+    json.dump(job_summary, f)
   logging.info(f'Job summary has been write to {job_summary_file_path}\n')
 
   workflow_summary_report = f"{datetime.datetime.utcnow()}\n{args}\n\n"
   workflow_summary_report += generate_summary_report(workflow_summary, job_summary)
   report_file_path = os.path.join(file_folder, 'workflow_summary_report.txt')
-  open(report_file_path, 'w').write(workflow_summary_report)
+  with open(report_file_path, 'w') as f:
+    f.write(workflow_summary_report)
   logging.info(f'Workflow summary report has been write to {report_file_path}\n')
 
 
@@ -88,8 +91,7 @@ def get_workflow_summary(args):
   if args.branch:
     list_workflows_params['branch'] = args.branch
 
-  request_workflow_list = True
-  while request_workflow_list:
+  while True:
     workflow_page += 1
     list_workflows_params['page'] = workflow_page
     workflows = github.list_workflows(token, workflow_name, list_workflows_params)
@@ -109,7 +111,7 @@ def get_workflow_summary(args):
           else: 
             workflow_summary['failure_count'] += 1
     else:
-      request_workflow_list = False
+      break
 
   logging.info('END collecting workflow run data\n')
 
@@ -123,8 +125,7 @@ def get_workflow_summary(args):
 def get_workflow_jobs(args, workflow_run):
   workflow_jobs = workflow_run['jobs']
   job_page = 0
-  request_job_list = True
-  while request_job_list:
+  while True:
     job_page += 1
     list_jobs_params = {'filter': args.jobs, 'per_page': 100, 'page': job_page} # per_page: max 100
     jobs = github.list_jobs(args.token, workflow_run['workflow_id'], list_jobs_params)
@@ -141,7 +142,7 @@ def get_workflow_jobs(args, workflow_run):
           workflow_jobs['failure_count'] += 1
 
     if 'jobs' not in jobs or jobs['total_count'] < job_page * 100:
-      request_job_list = False
+      break
 
 
 def get_job_summary(workflow_summary):

From d9352555a8def56972f9c2732be8c943072d574f Mon Sep 17 00:00:00 2001
From: Mou <sunmou@google.com>
Date: Tue, 7 Mar 2023 16:10:17 -0800
Subject: [PATCH 10/11] collecting logs

---
 ci/workflow_summary/collect_ci_test_logs.py |  4 +-
 ci/workflow_summary/github.py               | 87 +++++++++------------
 ci/workflow_summary/workflow_information.py | 14 ++--
 3 files changed, 46 insertions(+), 59 deletions(-)

diff --git a/ci/workflow_summary/collect_ci_test_logs.py b/ci/workflow_summary/collect_ci_test_logs.py
index 927b6662e8c..142c6c7be6b 100644
--- a/ci/workflow_summary/collect_ci_test_logs.py
+++ b/ci/workflow_summary/collect_ci_test_logs.py
@@ -33,7 +33,7 @@ def main():
   logging.getLogger().setLevel(logging.INFO)
 
   args = parse_cmdline_args()
-  github.set_api_url('firebase', 'firebase-android-sdk')
+  gh = github.GitHub('firebase', 'firebase-android-sdk')
 
   token = args.token
 
@@ -65,7 +65,7 @@ def main():
         file_log.write('\n\n'+failure_job['html_url'])
         logging.info(failure_job['html_url'])
         job_id = failure_job['job_id']
-        logs = github.job_logs(token, job_id)
+        logs = gh.job_logs(token, job_id)
         if logs:
           failed_tasks = re.findall(r"Execution failed for task ':(.*?)'.", logs)
           for failed_task in failed_tasks:
diff --git a/ci/workflow_summary/github.py b/ci/workflow_summary/github.py
index 6f20a97df32..b1c93b25f24 100644
--- a/ci/workflow_summary/github.py
+++ b/ci/workflow_summary/github.py
@@ -23,53 +23,40 @@
 TIMEOUT = 5
 TIMEOUT_LONG = 20
 
-OWNER = ''
-REPO = ''
-BASE_URL = 'https://api.github.com'
-GITHUB_API_URL = '%s/repos/%s/%s' % (BASE_URL, OWNER, REPO)
-
-
-def set_api_url(owner, repo): 
-  if owner and repo:
-    global OWNER, REPO, GITHUB_API_URL
-    OWNER = owner
-    REPO = repo
-    GITHUB_API_URL = '%s/repos/%s/%s' % (BASE_URL, OWNER, REPO)
-    logging.info('GITHUB_API_URL been set: %s' % GITHUB_API_URL)
-    return True
-  else:
-    return False
-
-
-def list_workflows(token, workflow_id, params):
-  """https://docs.github.com/en/rest/actions/workflow-runs#list-workflow-runs-for-a-workflow"""
-  url = f'{GITHUB_API_URL}/actions/workflows/{workflow_id}/runs'
-  headers = {'Accept': 'application/vnd.github+json', 'Authorization': f'token {token}'}
-  with requests.get(url, headers=headers, params=params,
-                    stream=True, timeout=TIMEOUT_LONG) as response:
-    logging.info('list_workflows: %s, params: %s, response: %s', url, params, response)
-    return response.json()
-
-def list_jobs(token, run_id, params):
-  """https://docs.github.com/en/rest/actions/workflow-jobs#list-jobs-for-a-workflow-run"""
-  url = f'{GITHUB_API_URL}/actions/runs/{run_id}/jobs'
-  headers = {'Accept': 'application/vnd.github+json', 'Authorization': f'token {token}'}
-  with requests.get(url, headers=headers, params=params,
-                    stream=True, timeout=TIMEOUT_LONG) as response:
-    logging.info('list_jobs: %s, params: %s, response: %s', url, params, response)
-    return response.json()
-
-def job_logs(token, job_id):
-  """https://docs.github.com/rest/reference/actions#download-job-logs-for-a-workflow-run"""
-  url = f'{GITHUB_API_URL}/actions/jobs/{job_id}/logs'
-  headers = {'Accept': 'application/vnd.github+json', 'Authorization': f'token {token}'}
-  with requests.get(url, headers=headers, allow_redirects=False,
-                    stream=True, timeout=TIMEOUT_LONG) as response:
-    logging.info('job_logs: %s response: %s', url, response)
-    if response.status_code == 302:
-      with requests.get(response.headers['Location'], headers=headers, allow_redirects=False,
-                        stream=True, timeout=TIMEOUT_LONG) as get_log_response:
-        return get_log_response.content.decode('utf-8')
-    else:
-      print('no log avaliable')
-      return ''
+class GitHub:
+
+  def __init__(self, owner, repo):
+    self.github_api_url = f'https://api.github.com/repos/{owner}/{repo}'
+
+  def list_workflows(self, token, workflow_id, params):
+    """https://docs.github.com/en/rest/actions/workflow-runs#list-workflow-runs-for-a-workflow"""
+    url = f'{self.github_api_url}/actions/workflows/{workflow_id}/runs'
+    headers = {'Accept': 'application/vnd.github+json', 'Authorization': f'token {token}'}
+    with requests.get(url, headers=headers, params=params,
+                      stream=True, timeout=TIMEOUT_LONG) as response:
+      logging.info('list_workflows: %s, params: %s, response: %s', url, params, response)
+      return response.json()
+
+  def list_jobs(self, token, run_id, params):
+    """https://docs.github.com/en/rest/actions/workflow-jobs#list-jobs-for-a-workflow-run"""
+    url = f'{self.github_api_url}/actions/runs/{run_id}/jobs'
+    headers = {'Accept': 'application/vnd.github+json', 'Authorization': f'token {token}'}
+    with requests.get(url, headers=headers, params=params,
+                      stream=True, timeout=TIMEOUT_LONG) as response:
+      logging.info('list_jobs: %s, params: %s, response: %s', url, params, response)
+      return response.json()
+
+  def job_logs(self, token, job_id):
+    """https://docs.github.com/rest/reference/actions#download-job-logs-for-a-workflow-run"""
+    url = f'{self.github_api_url}/actions/jobs/{job_id}/logs'
+    headers = {'Accept': 'application/vnd.github+json', 'Authorization': f'token {token}'}
+    with requests.get(url, headers=headers, allow_redirects=False,
+                      stream=True, timeout=TIMEOUT_LONG) as response:
+      logging.info('job_logs: %s response: %s', url, response)
+      if response.status_code == 302:
+        with requests.get(response.headers['Location'], headers=headers, allow_redirects=False,
+                          stream=True, timeout=TIMEOUT_LONG) as get_log_response:
+          return get_log_response.content.decode('utf-8')
+      else:
+        print('no log avaliable')
+        return ''
diff --git a/ci/workflow_summary/workflow_information.py b/ci/workflow_summary/workflow_information.py
index 445547906c5..597a664d617 100644
--- a/ci/workflow_summary/workflow_information.py
+++ b/ci/workflow_summary/workflow_information.py
@@ -34,7 +34,7 @@ def main():
   args = parse_cmdline_args()
   logging.info(args)
 
-  github.set_api_url(args.repo_owner, args.repo_name)
+  gh = github.GitHub(args.repo_owner, args.repo_name)
 
   # location for all artifacts
   if args.folder:
@@ -44,7 +44,7 @@ def main():
   if not os.path.exists(file_folder):
     os.makedirs(file_folder)
 
-  workflow_summary = get_workflow_summary(args)
+  workflow_summary = get_workflow_summary(gh, args)
   workflow_summary_file_path = os.path.join(file_folder, 'workflow_summary.json')
   with open(workflow_summary_file_path, 'w') as f:
     json.dump(workflow_summary, f)
@@ -64,7 +64,7 @@ def main():
   logging.info(f'Workflow summary report has been write to {report_file_path}\n')
 
 
-def get_workflow_summary(args):  
+def get_workflow_summary(gh, args):  
   token = args.token
   workflow_name = args.workflow_name
   # https://docs.github.com/en/search-github/getting-started-with-searching-on-github/understanding-the-search-syntax#query-for-dates
@@ -94,7 +94,7 @@ def get_workflow_summary(args):
   while True:
     workflow_page += 1
     list_workflows_params['page'] = workflow_page
-    workflows = github.list_workflows(token, workflow_name, list_workflows_params)
+    workflows = gh.list_workflows(token, workflow_name, list_workflows_params)
 
     if 'workflow_runs' in workflows and workflows['workflow_runs']:
       for workflow in workflows['workflow_runs']:
@@ -117,18 +117,18 @@ def get_workflow_summary(args):
 
   logging.info('START collecting job data by workflow run\n')
   for workflow_run in workflow_summary['workflow_runs']:
-    get_workflow_jobs(args, workflow_run)
+    get_workflow_jobs(gh, args, workflow_run)
   logging.info('END collecting job data by workflow run\n')
 
   return workflow_summary
 
-def get_workflow_jobs(args, workflow_run):
+def get_workflow_jobs(gh, args, workflow_run):
   workflow_jobs = workflow_run['jobs']
   job_page = 0
   while True:
     job_page += 1
     list_jobs_params = {'filter': args.jobs, 'per_page': 100, 'page': job_page} # per_page: max 100
-    jobs = github.list_jobs(args.token, workflow_run['workflow_id'], list_jobs_params)
+    jobs = gh.list_jobs(args.token, workflow_run['workflow_id'], list_jobs_params)
     if 'jobs' in jobs and jobs['jobs']:
       for job in jobs['jobs']:
         workflow_jobs['job_runs'].append({'job_id': job['id'], 'job_name': job['name'], 'conclusion': job['conclusion'], 

From 5062c33906014c2ee1d7cacfa807e469114c7e6d Mon Sep 17 00:00:00 2001
From: Mou <sunmou@google.com>
Date: Wed, 8 Mar 2023 10:13:02 -0800
Subject: [PATCH 11/11] fix based on comments

---
 ci/workflow_summary/collect_ci_test_logs.py |  9 +++-
 ci/workflow_summary/github.py               |  2 +-
 ci/workflow_summary/workflow_information.py | 52 ++++++++++-----------
 3 files changed, 34 insertions(+), 29 deletions(-)

diff --git a/ci/workflow_summary/collect_ci_test_logs.py b/ci/workflow_summary/collect_ci_test_logs.py
index 142c6c7be6b..7409c991871 100644
--- a/ci/workflow_summary/collect_ci_test_logs.py
+++ b/ci/workflow_summary/collect_ci_test_logs.py
@@ -29,11 +29,15 @@
 import os
 
 
+REPO_OWNER = 'firebase'
+REPO_NAME = 'firebase-android-sdk'
+EXCLUDE_JOB_LIST = ['Determine changed modules','Unit Tests (matrix)','Publish Tests Results','Unit Test Results','Instrumentation Tests','Unit Tests']
+
 def main():
   logging.getLogger().setLevel(logging.INFO)
 
   args = parse_cmdline_args()
-  gh = github.GitHub('firebase', 'firebase-android-sdk')
+  gh = github.GitHub(REPO_OWNER, REPO_NAME)
 
   token = args.token
 
@@ -45,7 +49,7 @@ def main():
   job_summary = json.load(open(os.path.join(file_folder, 'job_summary.json')))
 
   for job_name in job_summary:
-    if job_name in ['Determine changed modules','Unit Tests (matrix)','Publish Tests Results','Unit Test Results','Instrumentation Tests','Unit Tests']:
+    if job_name in EXCLUDE_JOB_LIST:
       continue
 
     job = job_summary[job_name]
@@ -67,6 +71,7 @@ def main():
         job_id = failure_job['job_id']
         logs = gh.job_logs(token, job_id)
         if logs:
+          # using regex to extract failure information
           failed_tasks = re.findall(r"Execution failed for task ':(.*?)'.", logs)
           for failed_task in failed_tasks:
             file_log.write('\n'+failed_task)
diff --git a/ci/workflow_summary/github.py b/ci/workflow_summary/github.py
index b1c93b25f24..24f62d55661 100644
--- a/ci/workflow_summary/github.py
+++ b/ci/workflow_summary/github.py
@@ -58,5 +58,5 @@ def job_logs(self, token, job_id):
                           stream=True, timeout=TIMEOUT_LONG) as get_log_response:
           return get_log_response.content.decode('utf-8')
       else:
-        print('no log avaliable')
+        logging.info('no log avaliable')
         return ''
diff --git a/ci/workflow_summary/workflow_information.py b/ci/workflow_summary/workflow_information.py
index 597a664d617..06082cafd07 100644
--- a/ci/workflow_summary/workflow_information.py
+++ b/ci/workflow_summary/workflow_information.py
@@ -96,23 +96,23 @@ def get_workflow_summary(gh, args):
     list_workflows_params['page'] = workflow_page
     workflows = gh.list_workflows(token, workflow_name, list_workflows_params)
 
-    if 'workflow_runs' in workflows and workflows['workflow_runs']:
-      for workflow in workflows['workflow_runs']:
-        if workflow['conclusion'] in ['success', 'failure']:
-          workflow_summary['workflow_runs'].append({'workflow_id': workflow['id'], 'conclusion': workflow['conclusion'],
-                                                    'head_branch': workflow['head_branch'], 'actor': workflow['actor']['login'], 
-                                                    'created_at': workflow['created_at'], 'updated_at': workflow['updated_at'], 
-                                                    'run_started_at': workflow['run_started_at'], 'run_attempt': workflow['run_attempt'], 
-                                                    'html_url': workflow['html_url'], 'jobs_url': workflow['jobs_url'], 
-                                                    'jobs': {'total_count': 0, 'success_count': 0, 'failure_count': 0,  'job_runs': []}})
-          workflow_summary['total_count']  += 1
-          if workflow['conclusion'] == 'success':
-            workflow_summary['success_count'] += 1 
-          else: 
-            workflow_summary['failure_count'] += 1
-    else:
+    if 'workflow_runs' not in workflows or not workflows['workflow_runs']:
       break
 
+    for workflow in workflows['workflow_runs']:
+      if workflow['conclusion'] in ['success', 'failure']:
+        workflow_summary['workflow_runs'].append({'workflow_id': workflow['id'], 'conclusion': workflow['conclusion'],
+                                                  'head_branch': workflow['head_branch'], 'actor': workflow['actor']['login'], 
+                                                  'created_at': workflow['created_at'], 'updated_at': workflow['updated_at'], 
+                                                  'run_started_at': workflow['run_started_at'], 'run_attempt': workflow['run_attempt'], 
+                                                  'html_url': workflow['html_url'], 'jobs_url': workflow['jobs_url'], 
+                                                  'jobs': {'total_count': 0, 'success_count': 0, 'failure_count': 0,  'job_runs': []}})
+        workflow_summary['total_count']  += 1
+        if workflow['conclusion'] == 'success':
+          workflow_summary['success_count'] += 1 
+        else: 
+          workflow_summary['failure_count'] += 1
+
   logging.info('END collecting workflow run data\n')
 
   logging.info('START collecting job data by workflow run\n')
@@ -129,21 +129,21 @@ def get_workflow_jobs(gh, args, workflow_run):
     job_page += 1
     list_jobs_params = {'filter': args.jobs, 'per_page': 100, 'page': job_page} # per_page: max 100
     jobs = gh.list_jobs(args.token, workflow_run['workflow_id'], list_jobs_params)
-    if 'jobs' in jobs and jobs['jobs']:
-      for job in jobs['jobs']:
-        workflow_jobs['job_runs'].append({'job_id': job['id'], 'job_name': job['name'], 'conclusion': job['conclusion'], 
-                                          'created_at': job['created_at'], 'started_at': job['started_at'], 'completed_at': job['completed_at'],
-                                          'run_attempt': job['run_attempt'], 'html_url': job['html_url']})
-        if job['conclusion'] in ['success', 'failure']:
-          workflow_jobs['total_count'] += 1
-        if job['conclusion'] == 'success':
-          workflow_jobs['success_count'] += 1 
-        else: 
-          workflow_jobs['failure_count'] += 1
 
     if 'jobs' not in jobs or jobs['total_count'] < job_page * 100:
       break
 
+    for job in jobs['jobs']:
+      workflow_jobs['job_runs'].append({'job_id': job['id'], 'job_name': job['name'], 'conclusion': job['conclusion'], 
+                                        'created_at': job['created_at'], 'started_at': job['started_at'], 'completed_at': job['completed_at'],
+                                        'run_attempt': job['run_attempt'], 'html_url': job['html_url']})
+      if job['conclusion'] in ['success', 'failure']:
+        workflow_jobs['total_count'] += 1
+      if job['conclusion'] == 'success':
+        workflow_jobs['success_count'] += 1 
+      else: 
+        workflow_jobs['failure_count'] += 1
+
 
 def get_job_summary(workflow_summary):
   logging.info('START gathering job information by job name\n')