From 3b50ad539f9a1f109b1921769272a27fe8386d97 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Tue, 27 Feb 2024 17:11:47 +0530 Subject: [PATCH 1/9] add: support for notifying maintainers about the nightly test status --- .github/workflows/nightly_tests.yml | 22 ++++- scripts/log_reports.py | 141 ++++++++++++++++++++++++++++ 2 files changed, 161 insertions(+), 2 deletions(-) create mode 100644 scripts/log_reports.py diff --git a/.github/workflows/nightly_tests.yml b/.github/workflows/nightly_tests.yml index fb0ce92cb61c..289e628f185d 100644 --- a/.github/workflows/nightly_tests.yml +++ b/.github/workflows/nightly_tests.yml @@ -63,6 +63,7 @@ jobs: python -m pip install -e .[quality,test] python -m pip install -U git+https://github.com/huggingface/transformers python -m pip install git+https://github.com/huggingface/accelerate + pip install pytest-reportlog - name: Environment run: | @@ -76,7 +77,8 @@ jobs: python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \ -s -v -k "not Flax and not Onnx" \ --make-reports=tests_${{ matrix.config.report }} \ - tests/ + --report-log=${{ matrix.config.report }}.log \ + tests/ - name: Run nightly Flax TPU tests if: ${{ matrix.config.framework == 'flax' }} @@ -86,6 +88,7 @@ jobs: python -m pytest -n 0 \ -s -v -k "Flax" \ --make-reports=tests_${{ matrix.config.report }} \ + --report-log=${{ matrix.config.report }}.log \ tests/ - name: Run nightly ONNXRuntime CUDA tests @@ -96,6 +99,7 @@ jobs: python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \ -s -v -k "Onnx" \ --make-reports=tests_${{ matrix.config.report }} \ + --report-log=${{ matrix.config.report }}.log \ tests/ - name: Failure short reports @@ -108,6 +112,12 @@ jobs: with: name: ${{ matrix.config.report }}_test_reports path: reports + + - name: Generate Report and Notify Channel + if: always() + run: | + pip install slack_sdk tabulate + python scripts/log_reports.py >> $GITHUB_STEP_SUMMARY run_nightly_tests_apple_m1: name: Nightly PyTorch MPS tests on MacOS @@ -148,7 +158,9 @@ jobs: HF_HOME: /System/Volumes/Data/mnt/cache HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} run: | - ${CONDA_RUN} python -m pytest -n 1 -s -v --make-reports=tests_torch_mps tests/ + ${CONDA_RUN} python -m pytest -n 1 -s -v --make-reports=tests_torch_mps \ + --report-log=tests_torch_mps.log \ + tests/ - name: Failure short reports if: ${{ failure() }} @@ -160,3 +172,9 @@ jobs: with: name: torch_mps_test_reports path: reports + + - name: Generate Report and Notify Channel + if: always() + run: | + pip install slack_sdk tabulate + python scripts/log_reports.py >> $GITHUB_STEP_SUMMARY diff --git a/scripts/log_reports.py b/scripts/log_reports.py new file mode 100644 index 000000000000..949b74947c17 --- /dev/null +++ b/scripts/log_reports.py @@ -0,0 +1,141 @@ +import argparse +import json +import os +from datetime import date +from pathlib import Path + +from tabulate import tabulate + + +MAX_LEN_MESSAGE = 2900 # slack endpoint has a limit of 3001 characters + +parser = argparse.ArgumentParser() +parser.add_argument("--slack_channel_name", default="diffusers-ci-nightly") + + +def main(slack_channel_name=None): + failed = [] + passed = [] + + group_info = [] + + total_num_failed = 0 + empty_file = False or len(list(Path().glob("*.log"))) == 0 + + total_empty_files = [] + + for log in Path().glob("*.log"): + section_num_failed = 0 + i = 0 + with open(log) as f: + for line in f: + line = json.loads(line) + i += 1 + if line.get("nodeid", "") != "": + test = line["nodeid"] + if line.get("duration", None) is not None: + duration = f'{line["duration"]:.4f}' + if line.get("outcome", "") == "failed": + section_num_failed += 1 + failed.append([test, duration, log.name.split("_")[0]]) + total_num_failed += 1 + else: + passed.append([test, duration, log.name.split("_")[0]]) + empty_file = i == 0 + group_info.append([str(log), section_num_failed, failed]) + total_empty_files.append(empty_file) + os.remove(log) + failed = [] + text = ( + "🌞 There were no failures!" + if not any(total_empty_files) + else "Something went wrong there is at least one empty file - please check GH action results." + ) + no_error_payload = { + "type": "section", + "text": { + "type": "plain_text", + "text": text, + "emoji": True, + }, + } + + message = "" + payload = [ + { + "type": "header", + "text": { + "type": "plain_text", + "text": "🤗 Results of the {} PEFT scheduled tests.".format(os.environ.get("TEST_TYPE", "")), + }, + }, + ] + if total_num_failed > 0: + for i, (name, num_failed, failed_tests) in enumerate(group_info): + if num_failed > 0: + if num_failed == 1: + message += f"*{name}: {num_failed} failed test*\n" + else: + message += f"*{name}: {num_failed} failed tests*\n" + failed_table = [] + for test in failed_tests: + failed_table.append(test[0].split("::")) + failed_table = tabulate( + failed_table, + headers=["Test Location", "Test Case", "Test Name"], + showindex="always", + tablefmt="grid", + maxcolwidths=[12, 12, 12], + ) + message += "\n```\n" + failed_table + "\n```" + + if total_empty_files[i]: + message += f"\n*{name}: Warning! Empty file - please check the GitHub action job *\n" + print(f"### {message}") + else: + payload.append(no_error_payload) + + if os.environ.get("TEST_TYPE", "") != "": + from slack_sdk import WebClient + + if len(message) > MAX_LEN_MESSAGE: + print(f"Truncating long message from {len(message)} to {MAX_LEN_MESSAGE}") + message = message[:MAX_LEN_MESSAGE] + "..." + + if len(message) != 0: + md_report = { + "type": "section", + "text": {"type": "mrkdwn", "text": message}, + } + payload.append(md_report) + action_button = { + "type": "section", + "text": {"type": "mrkdwn", "text": "*For more details:*"}, + "accessory": { + "type": "button", + "text": {"type": "plain_text", "text": "Check Action results", "emoji": True}, + "url": f"https://github.com/huggingface/peft/actions/runs/{os.environ['GITHUB_RUN_ID']}", + }, + } + payload.append(action_button) + + date_report = { + "type": "context", + "elements": [ + { + "type": "plain_text", + "text": f"Nightly {os.environ.get('TEST_TYPE')} test results for {date.today()}", + }, + ], + } + payload.append(date_report) + + print(payload) + + client = WebClient(token=os.environ.get("SLACK_API_TOKEN")) + client.chat_postMessage(channel=f"#{slack_channel_name}", text=message, blocks=payload) + + +if __name__ == "__main__": + args = parser.parse_args() + main(args.slack_channel_name) From 8f5739795cab2a70064b92bc4334b30839409148 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Tue, 27 Feb 2024 17:22:56 +0530 Subject: [PATCH 2/9] add: a tempoerary workflow for validation. --- .github/workflows/temp_workflow.yml | 49 +++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 .github/workflows/temp_workflow.yml diff --git a/.github/workflows/temp_workflow.yml b/.github/workflows/temp_workflow.yml new file mode 100644 index 000000000000..8643abc6aa10 --- /dev/null +++ b/.github/workflows/temp_workflow.yml @@ -0,0 +1,49 @@ +name: Temporary workflow to check if the notifications are working as expected + +on: + pull_request: + branches: + - main + push: + branches: + - ci-* + +env: + DIFFUSERS_IS_CI: yes + HF_HOME: /mnt/cache + OMP_NUM_THREADS: 8 + MKL_NUM_THREADS: 8 + PYTEST_TIMEOUT: 600 + RUN_SLOW: yes + RUN_NIGHTLY: yes + +jobs: + run_nightly_tests: + name: Run a nightly test + strategy: + fail-fast: false + runner: docker-gpu + container: + image: diffusers/diffusers-pytorch-cuda + options: --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache/ --gpus 0 + steps: + - name: Checkout diffusers + uses: actions/checkout@v3 + with: + fetch-depth: 2 + - name: Install dependencies + run: | + python -m pip install -e .[quality,test] + python -m pip install -U git+https://github.com/huggingface/transformers + python -m pip install git+https://github.com/huggingface/accelerate + pip install pytest-reportlog + - name: Run a nightly PyTorch CUDA test + run: | + python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \ + --report-log=single_nightly_test.log \ + tests/lora/test_lora_layers_peft.py::StableDiffusionLoRATests::test_integration_logits_no_scale + - name: Generate Report and Notify Channel + if: always() + run: | + pip install slack_sdk tabulate + python scripts/log_reports.py >> $GITHUB_STEP_SUMMARY \ No newline at end of file From 8dbe5df6dc0bc22e4f46653bb7976210944723c5 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Tue, 27 Feb 2024 17:26:54 +0530 Subject: [PATCH 3/9] cancel in progress. --- .github/workflows/temp_workflow.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/temp_workflow.yml b/.github/workflows/temp_workflow.yml index 8643abc6aa10..234c08be6a62 100644 --- a/.github/workflows/temp_workflow.yml +++ b/.github/workflows/temp_workflow.yml @@ -8,6 +8,10 @@ on: branches: - ci-* +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} + cancel-in-progress: true + env: DIFFUSERS_IS_CI: yes HF_HOME: /mnt/cache From 87d96eb578de98fffe301a7f3ee6f2d9e04e883e Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Tue, 27 Feb 2024 17:43:27 +0530 Subject: [PATCH 4/9] runs-on --- .github/workflows/temp_workflow.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/temp_workflow.yml b/.github/workflows/temp_workflow.yml index 234c08be6a62..ae5c345e133b 100644 --- a/.github/workflows/temp_workflow.yml +++ b/.github/workflows/temp_workflow.yml @@ -26,7 +26,7 @@ jobs: name: Run a nightly test strategy: fail-fast: false - runner: docker-gpu + runs-on: docker-gpu container: image: diffusers/diffusers-pytorch-cuda options: --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache/ --gpus 0 From c8b8833be0fcdadfb79ebc34f95e28f8ccfd041f Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Tue, 27 Feb 2024 17:45:12 +0530 Subject: [PATCH 5/9] clean up --- .github/workflows/temp_workflow.yml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.github/workflows/temp_workflow.yml b/.github/workflows/temp_workflow.yml index ae5c345e133b..a4f98d666586 100644 --- a/.github/workflows/temp_workflow.yml +++ b/.github/workflows/temp_workflow.yml @@ -30,6 +30,10 @@ jobs: container: image: diffusers/diffusers-pytorch-cuda options: --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache/ --gpus 0 + defaults: + run: + shell: bash + steps: - name: Checkout diffusers uses: actions/checkout@v3 @@ -43,7 +47,7 @@ jobs: pip install pytest-reportlog - name: Run a nightly PyTorch CUDA test run: | - python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \ + python -m pytest \ --report-log=single_nightly_test.log \ tests/lora/test_lora_layers_peft.py::StableDiffusionLoRATests::test_integration_logits_no_scale - name: Generate Report and Notify Channel From 5eeb22fc2e33118725e7b558bd670d653e2498bb Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Tue, 27 Feb 2024 18:00:07 +0530 Subject: [PATCH 6/9] add: peft dep --- .github/workflows/temp_workflow.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/temp_workflow.yml b/.github/workflows/temp_workflow.yml index a4f98d666586..6d53210ba52a 100644 --- a/.github/workflows/temp_workflow.yml +++ b/.github/workflows/temp_workflow.yml @@ -44,6 +44,7 @@ jobs: python -m pip install -e .[quality,test] python -m pip install -U git+https://github.com/huggingface/transformers python -m pip install git+https://github.com/huggingface/accelerate + python -m pip install git+https://github.com/huggingface/peft pip install pytest-reportlog - name: Run a nightly PyTorch CUDA test run: | From be6737182ee1d713e52c7ca6fe923e5da551982f Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Tue, 27 Feb 2024 18:26:32 +0530 Subject: [PATCH 7/9] change device. --- .github/workflows/temp_workflow.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/temp_workflow.yml b/.github/workflows/temp_workflow.yml index 6d53210ba52a..7ae8b28af2fe 100644 --- a/.github/workflows/temp_workflow.yml +++ b/.github/workflows/temp_workflow.yml @@ -26,7 +26,7 @@ jobs: name: Run a nightly test strategy: fail-fast: false - runs-on: docker-gpu + runs-on: [single-gpu, nvidia-gpu, a10, ci] container: image: diffusers/diffusers-pytorch-cuda options: --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache/ --gpus 0 From 710e4421622e311cc7da7460f7f132d84d1c5fe0 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Tue, 27 Feb 2024 19:51:01 +0530 Subject: [PATCH 8/9] multiple edits. --- .github/workflows/nightly_tests.yml | 1 + .github/workflows/temp_workflow.yml | 1 + scripts/log_reports.py | 72 ++++++++++++++--------------- 3 files changed, 37 insertions(+), 37 deletions(-) diff --git a/.github/workflows/nightly_tests.yml b/.github/workflows/nightly_tests.yml index 289e628f185d..89cbabfb67b2 100644 --- a/.github/workflows/nightly_tests.yml +++ b/.github/workflows/nightly_tests.yml @@ -12,6 +12,7 @@ env: PYTEST_TIMEOUT: 600 RUN_SLOW: yes RUN_NIGHTLY: yes + SLACK_API_TOKEN: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }} jobs: run_nightly_tests: diff --git a/.github/workflows/temp_workflow.yml b/.github/workflows/temp_workflow.yml index 7ae8b28af2fe..5cfcdbc91970 100644 --- a/.github/workflows/temp_workflow.yml +++ b/.github/workflows/temp_workflow.yml @@ -20,6 +20,7 @@ env: PYTEST_TIMEOUT: 600 RUN_SLOW: yes RUN_NIGHTLY: yes + SLACK_API_TOKEN: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }} jobs: run_nightly_tests: diff --git a/scripts/log_reports.py b/scripts/log_reports.py index 949b74947c17..dd1b258519d7 100644 --- a/scripts/log_reports.py +++ b/scripts/log_reports.py @@ -4,6 +4,7 @@ from datetime import date from pathlib import Path +from slack_sdk import WebClient from tabulate import tabulate @@ -66,7 +67,7 @@ def main(slack_channel_name=None): "type": "header", "text": { "type": "plain_text", - "text": "🤗 Results of the {} PEFT scheduled tests.".format(os.environ.get("TEST_TYPE", "")), + "text": "🤗 Results of the Diffusers scheduled nightly tests.", }, }, ] @@ -95,45 +96,42 @@ def main(slack_channel_name=None): else: payload.append(no_error_payload) - if os.environ.get("TEST_TYPE", "") != "": - from slack_sdk import WebClient - - if len(message) > MAX_LEN_MESSAGE: - print(f"Truncating long message from {len(message)} to {MAX_LEN_MESSAGE}") - message = message[:MAX_LEN_MESSAGE] + "..." - - if len(message) != 0: - md_report = { - "type": "section", - "text": {"type": "mrkdwn", "text": message}, - } - payload.append(md_report) - action_button = { - "type": "section", - "text": {"type": "mrkdwn", "text": "*For more details:*"}, - "accessory": { - "type": "button", - "text": {"type": "plain_text", "text": "Check Action results", "emoji": True}, - "url": f"https://github.com/huggingface/peft/actions/runs/{os.environ['GITHUB_RUN_ID']}", - }, - } - payload.append(action_button) - - date_report = { - "type": "context", - "elements": [ - { - "type": "plain_text", - "text": f"Nightly {os.environ.get('TEST_TYPE')} test results for {date.today()}", - }, - ], + if len(message) > MAX_LEN_MESSAGE: + print(f"Truncating long message from {len(message)} to {MAX_LEN_MESSAGE}") + message = message[:MAX_LEN_MESSAGE] + "..." + + if len(message) != 0: + md_report = { + "type": "section", + "text": {"type": "mrkdwn", "text": message}, + } + payload.append(md_report) + action_button = { + "type": "section", + "text": {"type": "mrkdwn", "text": "*For more details:*"}, + "accessory": { + "type": "button", + "text": {"type": "plain_text", "text": "Check Action results", "emoji": True}, + "url": f"https://github.com/huggingface/diffusers/actions/runs/{os.environ['GITHUB_RUN_ID']}", + }, } - payload.append(date_report) + payload.append(action_button) + + date_report = { + "type": "context", + "elements": [ + { + "type": "plain_text", + "text": f"Nightly test results for {date.today()}", + }, + ], + } + payload.append(date_report) - print(payload) + print(payload) - client = WebClient(token=os.environ.get("SLACK_API_TOKEN")) - client.chat_postMessage(channel=f"#{slack_channel_name}", text=message, blocks=payload) + client = WebClient(token=os.environ.get("SLACK_API_TOKEN")) + client.chat_postMessage(channel=f"#{slack_channel_name}", text=message, blocks=payload) if __name__ == "__main__": From 2a426f9799a20d79b62aeeae3b71a20ef1c3f6ec Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Wed, 13 Mar 2024 16:14:16 +0530 Subject: [PATCH 9/9] remove temp workflow. --- .github/workflows/temp_workflow.yml | 59 ----------------------------- 1 file changed, 59 deletions(-) delete mode 100644 .github/workflows/temp_workflow.yml diff --git a/.github/workflows/temp_workflow.yml b/.github/workflows/temp_workflow.yml deleted file mode 100644 index 5cfcdbc91970..000000000000 --- a/.github/workflows/temp_workflow.yml +++ /dev/null @@ -1,59 +0,0 @@ -name: Temporary workflow to check if the notifications are working as expected - -on: - pull_request: - branches: - - main - push: - branches: - - ci-* - -concurrency: - group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} - cancel-in-progress: true - -env: - DIFFUSERS_IS_CI: yes - HF_HOME: /mnt/cache - OMP_NUM_THREADS: 8 - MKL_NUM_THREADS: 8 - PYTEST_TIMEOUT: 600 - RUN_SLOW: yes - RUN_NIGHTLY: yes - SLACK_API_TOKEN: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }} - -jobs: - run_nightly_tests: - name: Run a nightly test - strategy: - fail-fast: false - runs-on: [single-gpu, nvidia-gpu, a10, ci] - container: - image: diffusers/diffusers-pytorch-cuda - options: --shm-size "16gb" --ipc host -v /mnt/hf_cache:/mnt/cache/ --gpus 0 - defaults: - run: - shell: bash - - steps: - - name: Checkout diffusers - uses: actions/checkout@v3 - with: - fetch-depth: 2 - - name: Install dependencies - run: | - python -m pip install -e .[quality,test] - python -m pip install -U git+https://github.com/huggingface/transformers - python -m pip install git+https://github.com/huggingface/accelerate - python -m pip install git+https://github.com/huggingface/peft - pip install pytest-reportlog - - name: Run a nightly PyTorch CUDA test - run: | - python -m pytest \ - --report-log=single_nightly_test.log \ - tests/lora/test_lora_layers_peft.py::StableDiffusionLoRATests::test_integration_logits_no_scale - - name: Generate Report and Notify Channel - if: always() - run: | - pip install slack_sdk tabulate - python scripts/log_reports.py >> $GITHUB_STEP_SUMMARY \ No newline at end of file