diff --git a/.github/workflow_scripts/run_benchmark.sh b/.github/workflow_scripts/run_benchmark.sh index f09ce92411a..73f46bb89cd 100644 --- a/.github/workflow_scripts/run_benchmark.sh +++ b/.github/workflow_scripts/run_benchmark.sh @@ -16,14 +16,40 @@ setup_benchmark_env /bin/bash CI/bench/generate_bench_config.sh $MODULE $PRESET $BENCHMARK $TIME_LIMIT $BRANCH_OR_PR_NUMBER agbench run $MODULE"_cloud_configs.yaml" --wait -python CI/bench/evaluate.py --config_path ./ag_bench_runs/tabular/ --time_limit $TIME_LIMIT -aws s3 cp --recursive ./results s3://autogluon-ci-benchmark/cleaned/$BRANCH_OR_PR_NUMBER/$SHA/ -aws s3 rm --recursive s3://autogluon-ci-benchmark/cleaned/$BRANCH_OR_PR_NUMBER/latest/ -aws s3 cp --recursive ./results s3://autogluon-ci-benchmark/cleaned/$BRANCH_OR_PR_NUMBER/latest/ - -cwd=`pwd` -ls data/results/output/openml/ag_eval/pairwise/* | grep .csv > $cwd/agg_csv.txt -filename=`head -1 $cwd/agg_csv.txt` -prefix=$BRANCH_OR_PR_NUMBER/$SHA -agdash --per_dataset_csv 'data/results/output/openml/ag_eval/results_ranked_by_dataset_all.csv' --agg_dataset_csv $filename --s3_prefix benchmark-dashboard/$prefix --s3_bucket autogluon-staging --s3_region us-west-2 > $cwd/out.txt -tail -1 $cwd/out.txt > $cwd/website.txt +# If it is a PR, fetch the cleaned file of master-evaluation +if [ $BRANCH_OR_PR_NUMBER != "master" ] +then + # Capture the name of the file, rename it and store it in ./results + master_cleaned_file=$(aws s3 ls s3://autogluon-ci-benchmark/cleaned/master/latest/ | awk '{print $NF}') + new_master_cleaned_file="master_${master_cleaned_file}" + aws s3 cp --recursive s3://autogluon-ci-benchmark/cleaned/master/latest/ ./results + mv "./results/$master_cleaned_file" "./results/$new_master_cleaned_file" +fi + +python CI/bench/evaluate.py --config_path ./ag_bench_runs/tabular/ --time_limit $TIME_LIMIT --branch_name $BRANCH_OR_PR_NUMBER + +for file in ./results/*; do + # Check if the file does not start with "master" + if [[ "$(basename "$file")" != "master"* ]] + then + aws s3 cp "$file" "s3://autogluon-ci-benchmark/cleaned/$BRANCH_OR_PR_NUMBER/$SHA/$(basename "$file")" + aws s3 rm --recursive s3://autogluon-ci-benchmark/cleaned/$BRANCH_OR_PR_NUMBER/latest/ + aws s3 cp "$file" s3://autogluon-ci-benchmark/cleaned/$BRANCH_OR_PR_NUMBER/latest/$(basename "$file") + else + aws s3 cp "$file" "s3://autogluon-ci-benchmark/cleaned/master/$SHA/$(basename "$file")" + aws s3 rm --recursive s3://autogluon-ci-benchmark/cleaned/master/latest/ + aws s3 cp "$file" s3://autogluon-ci-benchmark/cleaned/master/latest/$(basename "$file") + fi +done + +# Run dashboard if the branch is not master +if [ $BRANCH_OR_PR_NUMBER != "master" ] +then + cwd=`pwd` + ls data/results/output/openml/ag_eval/pairwise/* | grep .csv > $cwd/agg_csv.txt + cat agg_csv.txt + filename=`head -1 $cwd/agg_csv.txt` + prefix=$BRANCH_OR_PR_NUMBER/$SHA + agdash --per_dataset_csv 'data/results/output/openml/ag_eval/results_ranked_by_dataset_all.csv' --agg_dataset_csv $filename --s3_prefix benchmark-dashboard/$prefix --s3_bucket autogluon-staging --s3_region us-west-2 > $cwd/out.txt + tail -1 $cwd/out.txt > $cwd/website.txt +fi diff --git a/.github/workflows/benchmark-command.yml b/.github/workflows/benchmark-command.yml index 13ff2028026..431479b19b9 100644 --- a/.github/workflows/benchmark-command.yml +++ b/.github/workflows/benchmark-command.yml @@ -1,4 +1,5 @@ -name: Benchmark +# Workflow to trigger benchmarking, cleaning, aggregation of the PR and evaluating w.r.t master branch, results on dashboard +name: Benchmark Pull Request on: workflow_dispatch: inputs: @@ -67,9 +68,31 @@ jobs: [Benchmark Output][1] [1]: ${{ steps.vars.outputs.run-url }} + + generate_amlb_user_dir: + needs: setup + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@v2 + - name: Setup Env Vars + uses: ./.github/actions/setup-env-vars + - name: Configure AWS Credentials + uses: aws-actions/configure-aws-credentials@v1 + with: + role-to-assume: arn:aws:iam::369469875935:role/AutoGluonCIBenchmarkConfig + role-duration-seconds: 3600 + aws-region: us-east-1 + - name: Extract branch name + shell: bash + run: echo "branch=${GITHUB_HEAD_REF:-${GITHUB_REF#refs/heads/}}" >> $GITHUB_OUTPUT + id: extract_branch + - name: Generate AMLB User Dir + run: | + /bin/bash CI/bench/generate_amlb_user_dir.sh ${{ github.repository }} ${{ steps.extract_branch.outputs.branch }} ${{ github.sha }} benchmark: - needs: setup + needs: generate_amlb_user_dir runs-on: ubuntu-latest defaults: run: @@ -108,12 +131,16 @@ jobs: role-to-assume: arn:aws:iam::369469875935:role/AutoGluonCIBenchmark role-duration-seconds: 14400 aws-region: us-east-1 + - name: Extract branch name + shell: bash + run: echo "branch=${GITHUB_HEAD_REF:-${GITHUB_REF#refs/heads/}}" >> $GITHUB_OUTPUT + id: extract_branch - name: Run benchmark shell: bash -l {0} run: | nvm install 20 npm install -g aws-cdk - /bin/bash ./.github/workflow_scripts/run_benchmark.sh ${{ github.event.inputs.module }} ${{ github.event.inputs.preset }} ${{ github.event.inputs.benchmark }} ${{ github.event.inputs.time_limit }} ${{ github.event.inputs.branch_or_pr_number }} ${{ github.event.inputs.pr-sha }} + /bin/bash ./.github/workflow_scripts/run_benchmark.sh ${{ github.event.inputs.module }} ${{ github.event.inputs.preset }} ${{ github.event.inputs.benchmark }} ${{ github.event.inputs.time_limit }} ${{ steps.extract_branch.outputs.branch }} ${{ github.sha }} - name: Upload website.txt uses: actions/upload-artifact@v3 with: @@ -143,4 +170,4 @@ jobs: repository: ${{ github.event.inputs.repository }} comment-id: ${{ github.event.inputs.comment-id }} body: ${{ steps.website.outputs.body }} - \ No newline at end of file + diff --git a/.github/workflows/benchmark_master.yml b/.github/workflows/benchmark_master.yml new file mode 100644 index 00000000000..8ec4e4697dd --- /dev/null +++ b/.github/workflows/benchmark_master.yml @@ -0,0 +1,77 @@ +# Workflow to trigger/schedule benchmarking, cleaning, aggregating on master branch only and storing results in S3 +name: Benchmark Master Branch +on: + push: + branches: ["master"] + schedule: + - cron: '00 09 * * SUN' # UTC 9:00(2:00 PST Time) every Sunday + +env: + AG_MODULE: tabular + AG_PRESET: medium + AG_BENCHMARK: test + AG_TIME_LIMIT: 1h + AG_BRANCH_NAME: master + +permissions: + id-token: write + contents: read + +jobs: + generate_amlb_user_dir: + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@v2 + - name: Setup Env Vars + uses: ./.github/actions/setup-env-vars + - name: Configure AWS Credentials + uses: aws-actions/configure-aws-credentials@v1 + with: + role-to-assume: arn:aws:iam::369469875935:role/AutoGluonCIBenchmarkConfig + role-duration-seconds: 3600 + aws-region: us-east-1 + - name: Generate AMLB User Dir + run: | + /bin/bash CI/bench/generate_amlb_user_dir.sh ${{ github.repository }} ${{ github.ref }} ${{ github.sha }} + + benchmark: + needs: generate_amlb_user_dir + runs-on: ubuntu-latest + defaults: + run: + shell: bash + steps: + - name: Free Disk Space (Ubuntu) + # uses: jlumbroso/free-disk-space@v1.2.0 + uses: hirnidrin/free-disk-space@main # revert back once fix in https://github.com/jlumbroso/free-disk-space/pull/11 + with: + tool-cache: false + android: true + dotnet: true + haskell: true + large-packages: true + docker-images: true + swap-storage: true + - name: Checkout repository for PR + uses: actions/checkout@v2 + - name: Setup Python + uses: actions/setup-python@v1 + with: + python-version: '3.9' + - name: Setup npm + uses: actions/setup-node@v3 + with: + node-version: 'latest' + - name: Configure AWS Credentials + uses: aws-actions/configure-aws-credentials@v1 + with: + role-to-assume: arn:aws:iam::369469875935:role/AutoGluonCIBenchmark + role-duration-seconds: 14400 + aws-region: us-east-1 + - name: Run benchmark + shell: bash -l {0} + run: | + nvm install 20 + npm install -g aws-cdk + /bin/bash ./.github/workflow_scripts/run_benchmark.sh ${{ env.AG_MODULE }} ${{ env.AG_PRESET }} ${{ env.AG_BENCHMARK }} ${{ env.AG_TIME_LIMIT }} ${{ env.AG_BRANCH_NAME }} ${{ github.sha }} diff --git a/.github/workflows/continuous_integration.yml b/.github/workflows/continuous_integration.yml index 9943c8292fb..430198c3b3c 100644 --- a/.github/workflows/continuous_integration.yml +++ b/.github/workflows/continuous_integration.yml @@ -22,28 +22,6 @@ jobs: run: | echo This is a restricted branch reserved for certain modules. Please use another branch instead exit 1 - generate_bench_configs: - needs: branch_check - runs-on: ubuntu-latest - steps: - - name: Checkout repository - uses: actions/checkout@v2 - - name: Setup Env Vars - uses: ./.github/actions/setup-env-vars - - name: Configure AWS Credentials - uses: aws-actions/configure-aws-credentials@v1 - with: - role-to-assume: arn:aws:iam::369469875935:role/AutoGluonCIBenchmarkConfig - role-duration-seconds: 3600 - aws-region: us-east-1 - - name: Generate bench configs (For Push) - if: ${{ github.event_name == 'push' }} - run: | - /bin/bash CI/bench/generate_amlb_user_dir.sh ${{ env.GIT_REPO }} ${{ env.BRANCH }} ${{ env.SHORT_SHA }} - - name: Generate bench configs (For Pull Request) - if: ${{ github.event_name == 'pull_request_target' }} - run: | - /bin/bash CI/bench/generate_amlb_user_dir.sh ${{ env.GIT_REPO }} ${{ env.BRANCH }} ${{ env.SHORT_SHA }} ${{ env.PR_NUMBER }} lint_check: needs: branch_check runs-on: ubuntu-latest diff --git a/.github/workflows/slash_command_dispatch.yml b/.github/workflows/slash_command_dispatch.yml index 2c8c3666e64..647bf319ab5 100644 --- a/.github/workflows/slash_command_dispatch.yml +++ b/.github/workflows/slash_command_dispatch.yml @@ -33,6 +33,7 @@ jobs: uses: peter-evans/slash-command-dispatch@v2.3.0 with: token: ${{ secrets.PAT }} + permission: write commands: | benchmark platform_tests diff --git a/CI/bench/evaluate.py b/CI/bench/evaluate.py index 1795d526cf4..f330896b758 100644 --- a/CI/bench/evaluate.py +++ b/CI/bench/evaluate.py @@ -11,11 +11,13 @@ "--config_path", help="path to generated config path to fetch benchmark name", type=str, required=True ) parser.add_argument("--time_limit", help="time limit of the benchmark run", type=str, required=True) +parser.add_argument("--branch_name", help="if it happens to be master then just push the cleaned result, do not evaluate", type=str, required=True) args = parser.parse_args() config_path = args.config_path time_limit = args.time_limit +branch_name = args.branch_name for root, dirs, files in os.walk(config_path): for file in files: @@ -54,25 +56,38 @@ ] ) -paths = [] -frameworks = [] -for file in os.listdir("./results"): - if file.endswith(".csv"): - file = os.path.join("./results", file) - df = pd.read_csv(file) - paths.append(os.path.basename(file)) - frameworks += list(df["framework"].unique()) +# If it is a PR then perform the evaluation w.r.t cleaned master bench reaults +if branch_name != "master": + paths = [] + frameworks = [] + for file in os.listdir("./results"): + if file.endswith(".csv"): + file = os.path.join("./results", file) + df = pd.read_csv(file) + paths.append(os.path.basename(file)) + frameworks += list(df["framework"].unique()) -subprocess.run( - [ - "agbench", - "evaluate-amlb-results", - "--frameworks-run", - f"{','.join(frameworks)}", - "--results-dir-input", - "./results", - "--paths", - f"{','.join(paths)}", - "--no-clean-data", - ] -) + modified_list_paths = [] + modified_list_frameworks = [] + + for path in paths: + modified_list_paths.append('--paths') + modified_list_paths.append(path) + + for framework in frameworks: + modified_list_frameworks.append('--frameworks-run') + modified_list_frameworks.append(framework) + + paths = modified_list_paths + frameworks = modified_list_frameworks + subprocess.run( + [ + "agbench", + "evaluate-amlb-results", + *frameworks, + "--results-dir-input", + "./results", + *paths, + "--no-clean-data", + ] + )