diff --git a/.github/workflows/long-tests.yml b/.github/workflows/long-tests.yml index 86df7acd3..250eb142b 100644 --- a/.github/workflows/long-tests.yml +++ b/.github/workflows/long-tests.yml @@ -1,5 +1,7 @@ # This is a basic workflow to help you get started with Actions +#Turing commands are commented out , we regress QV100,RTX3070,A100 + name: Long Tests # Controls when the workflow will run @@ -40,10 +42,17 @@ jobs: run: | source ./env-setup/12.4_env_setup.sh source ./gpu-simulator/setup_environment.sh - ./util/job_launching/run_simulations.py -B rodinia_2.0-ft,GPU_Microbenchmark -C QV100-SASS -T ~/../common/accel-sim/traces/volta-tesla-v100/latest/ -N sass-short-$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT - ./util/job_launching/run_simulations.py -B rodinia_2.0-ft,GPU_Microbenchmark -C RTX2060-SASS -T ~/../common/accel-sim/traces/turing-rtx2060/latest/ -N sass-short-$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT - ./util/job_launching/run_simulations.py -B rodinia_2.0-ft,GPU_Microbenchmark -C RTX3070-SASS -T ~/../common/accel-sim/traces/ampere-rtx3070/latest/ -N sass-short-$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT - ./util/job_launching/run_simulations.py -B rodinia_2.0-ft,GPU_Microbenchmark -C A100-SASS -T /scratch/tgrogers-disk01/a/common/for-sharing/accel-sim/A100/traces/device-0/12.8/ -N sass-short-$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT + ./util/job_launching/run_simulations.py -B GPU_Microbenchmark -C QV100-SASS -T /scratch/tgrogers-disk01/a/common/for-sharing/accel-sim/QV100//hw_run/traces/device-0/12.8/ -N sass-short-$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT + ./util/job_launching/run_simulations.py -B rodinia_2.0-ft -C QV100-SASS -T /scratch/tgrogers-disk01/a/common/for-sharing/accel-sim/QV100/hw_run/traces/device-0/12.8/ -N sass-short-$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT + + #./util/job_launching/run_simulations.py -B rodinia_2.0-ft -C RTX2060-SASS -T ~/../common/accel-sim/traces/turing-rtx2060/latest/ -N sass-short-$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT + + ./util/job_launching/run_simulations.py -B GPU_Microbenchmark -C RTX3070-SASS -T /scratch/tgrogers-disk01/a/common/for-sharing/accel-sim/RTX3070/hw_run/traces/device-0/12.8/ -N sass-short-$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT + ./util/job_launching/run_simulations.py -B rodinia_2.0-ft -C RTX3070-SASS -T /scratch/tgrogers-disk01/a/common/for-sharing/accel-sim/RTX3070/hw_run/traces/device-0/12.8/ -N sass-short-$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT + + ./util/job_launching/run_simulations.py -B GPU_Microbenchmark -C A100-SASS -T /scratch/tgrogers-disk01/a/common/for-sharing/accel-sim/A100/hw_run/traces/device-0/12.8/ -N sass-short-$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT + ./util/job_launching/run_simulations.py -B rodinia_2.0-ft -C A100-SASS -T /scratch/tgrogers-disk01/a/common/for-sharing/accel-sim/A100/hw_run/traces/device-0/12.8/ -N sass-short-$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT + ./util/job_launching/run_simulations.py -B mlperf_inference -C RTX3070-SASS -T /scratch/tgrogers-disk01/a/common/for-sharing/accel-sim/mlperf_rtx3070/traces/device-0/12.8/ -N sass-short-$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT ./util/job_launching/monitor_func_test.py -v -s --sleep_time 300 stats-per-app-sass.csv -N sass-short-$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT @@ -56,7 +65,7 @@ jobs: # either create a new branch or check it out if it already exists git -C ./statistics-archive checkout $BRANCH_NAME 2>/dev/null || git -C ./statistics-archive checkout -b $BRANCH_NAME ./util/job_launching/get_stats.py -k -K -R -B GPU_Microbenchmark -C QV100-SASS -A | tee v100-ubench-sass-$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT.csv - ./util/job_launching/get_stats.py -k -K -R -B GPU_Microbenchmark -C RTX2060-SASS -A | tee turing-ubench-sass-$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT.csv + #./util/job_launching/get_stats.py -k -K -R -B GPU_Microbenchmark -C RTX2060-SASS -A | tee turing-ubench-sass-$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT.csv ./util/job_launching/get_stats.py -k -K -R -B GPU_Microbenchmark -C RTX3070-SASS -A | tee ampere-ubench-sass-$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT.csv ./util/job_launching/get_stats.py -k -K -R -B GPU_Microbenchmark -C A100-SASS -A | tee ampere-a100-ubench-sass-$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT.csv @@ -64,8 +73,8 @@ jobs: # First we merge and archive this run to the main csv that contains all previous runs ./util/plotting/merge-stats.py -R -c ./statistics-archive/ubench/v100-ubench-sass.csv,v100-ubench-sass-$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT.csv \ | tee v100-ubench-sass.csv && mv v100-ubench-sass.csv ./statistics-archive/ubench/ - ./util/plotting/merge-stats.py -R -c ./statistics-archive/ubench/turing-ubench-sass.csv,turing-ubench-sass-$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT.csv \ - | tee turing-ubench-sass.csv && mv turing-ubench-sass.csv ./statistics-archive/ubench/ + # ./util/plotting/merge-stats.py -R -c ./statistics-archive/ubench/turing-ubench-sass.csv,turing-ubench-sass-$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT.csv \ + # | tee turing-ubench-sass.csv && mv turing-ubench-sass.csv ./statistics-archive/ubench/ ./util/plotting/merge-stats.py -R -c ./statistics-archive/ubench/ampere-ubench-sass.csv,ampere-ubench-sass-$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT.csv \ | tee ampere-ubench-sass.csv && mv ampere-ubench-sass.csv ./statistics-archive/ubench/ ./util/plotting/merge-stats.py -R -c ./statistics-archive/ubench/ampere-a100-ubench-sass.csv,ampere-ubench-sass-$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT.csv \ @@ -73,8 +82,8 @@ jobs: # Next we merge the latest run with the current run (used for correlation plots) then archive the current run as the new latest for the next time this action occurs ./util/plotting/merge-stats.py -R -c ./statistics-archive/ubench/v100-ubench-sass-latest.csv,v100-ubench-sass-$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT.csv \ | tee v100-ubench-sass-latest2.csv && mv v100-ubench-sass-$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT.csv ./statistics-archive/ubench/v100-ubench-sass-latest.csv - ./util/plotting/merge-stats.py -R -c ./statistics-archive/ubench/turing-ubench-sass-latest.csv,turing-ubench-sass-$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT.csv \ - | tee turing-ubench-sass-latest2.csv && mv turing-ubench-sass-$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT.csv ./statistics-archive/ubench/turing-ubench-sass-latest.csv + # ./util/plotting/merge-stats.py -R -c ./statistics-archive/ubench/turing-ubench-sass-latest.csv,turing-ubench-sass-$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT.csv \ + # | tee turing-ubench-sass-latest2.csv && mv turing-ubench-sass-$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT.csv ./statistics-archive/ubench/turing-ubench-sass-latest.csv ./util/plotting/merge-stats.py -R -c ./statistics-archive/ubench/ampere-ubench-sass-latest.csv,ampere-ubench-sass-$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT.csv \ | tee ampere-ubench-sass-latest2.csv && mv ampere-ubench-sass-$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT.csv ./statistics-archive/ubench/ampere-ubench-sass-latest.csv ./util/plotting/merge-stats.py -R -c ./statistics-archive/ubench/ampere-a100-ubench-sass-latest.csv,ampere-a100-ubench-sass-$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT.csv \ @@ -90,10 +99,10 @@ jobs: source ./env-setup/12.4_env_setup.sh ./util/hw_stats/get_hw_data.sh > /dev/null 2>&1 rm -rf ./util/plotting/correl-html/ - ./util/plotting/plot-correlation.py -c ./v100-ubench-sass-latest2.csv -H ./hw_run/volta-tesla-v100/11.2/ | tee v100-ubench-correl.txt - ./util/plotting/plot-correlation.py -c ./turing-ubench-sass-latest2.csv -H ./hw_run/TURING-RTX2060/10.2/ | tee turing-ubench-correl.txt - ./util/plotting/plot-correlation.py -c ./ampere-ubench-sass-latest2.csv -H ./hw_run/AMPERE-RTX3070/11.2/ | tee ampere-ubench-correl.txt - ./util/plotting/plot-correlation.py -c ./ampere-a100-ubench-sass-latest2.csv -H /scratch/tgrogers-disk01/a/common/for-sharing/accel-sim/A100/device-0/12.8/ | tee ampere-a100-ubench-correl.txt + ./util/plotting/plot-correlation.py -c ./v100-ubench-sass-latest2.csv -H /scratch/tgrogers-disk01/a/common/for-sharing/accel-sim/QV100/hw_run/device-0/12.8/ | tee v100-ubench-correl.txt + #./util/plotting/plot-correlation.py -c ./turing-ubench-sass-latest2.csv -H ./hw_run/TURING-RTX2060/10.2/ | tee turing-ubench-correl.txt + ./util/plotting/plot-correlation.py -c ./ampere-ubench-sass-latest2.csv -H /scratch/tgrogers-disk01/a/common/for-sharing/accel-sim/RTX3070/hw_run/device-0/12.8/ | tee ampere-ubench-correl.txt + ./util/plotting/plot-correlation.py -c ./ampere-a100-ubench-sass-latest2.csv -H /scratch/tgrogers-disk01/a/common/for-sharing/accel-sim/A100/hw_run/device-0/12.8/ | tee ampere-a100-ubench-correl.txt ssh ghci@tgrogers-pc01 mkdir -p /home/ghci/accel-sim/correl/git_${GITHUB_REF}"_"$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT/ rsync --delete -r ./util/plotting/correl-html/ ghci@tgrogers-pc01:/home/ghci/accel-sim/correl/git_${GITHUB_REF}"_"$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT/ if [[ $GITHUB_EVENT_NAME == 'push' ]]; then @@ -142,7 +151,9 @@ jobs: srun --time=8:00:00 -c20 make rodinia_2.0-ft GPU_Microbenchmark -j20 -C ./gpu-app-collection/src ./gpu-app-collection/get_regression_data.sh - ./util/job_launching/run_simulations.py -B rodinia_2.0-ft,GPU_Microbenchmark -C QV100-PTX,RTX2060-PTX,RTX3070-PTX,A100-PTX -N short-ptx-$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT + #./util/job_launching/run_simulations.py -B rodinia_2.0-ft,GPU_Microbenchmark -C QV100-PTX,RTX2060-PTX,RTX3070-PTX,A100-PTX -N short-ptx-$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT + ./util/job_launching/run_simulations.py -B rodinia_2.0-ft,GPU_Microbenchmark -C QV100-PTX,RTX3070-PTX,A100-PTX -N short-ptx-$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT + ./util/job_launching/monitor_func_test.py -v -s stats-per-app-ptx.csv -N short-ptx-$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT Tracer-Tool: if: github.repository == 'accel-sim/accel-sim-framework' diff --git a/util/job_launching/apps/define-all-apps.yml b/util/job_launching/apps/define-all-apps.yml index 3063b2857..c3ca005e1 100644 --- a/util/job_launching/apps/define-all-apps.yml +++ b/util/job_launching/apps/define-all-apps.yml @@ -54,43 +54,40 @@ GPU_Microbenchmark: data_dirs: "$GPUAPPS_ROOT/data_dirs/" execs: - l1_bw_32f: - - args: + - args: --tpb 1024 --tpsm 1024 --blocks 1 --ws 32 accel-sim-mem: 1G - l1_bw_64f: - - args: + - args: --tpb 1024 --tpsm 1024 --blocks 1 --ws 32 accel-sim-mem: 1G - l1_bw_128: - - args: + - args: --tpb 1024 --tpsm 1024 --blocks 1 --ws 32 accel-sim-mem: 2G - l1_lat: - - args: - accel-sim-mem: 1G - - l1_lat: - - args: + - args: --blocks 1 --ws 32 accel-sim-mem: 1G - l2_bw_32f: - - args: + - args: --tpb 1024 --tpsm 1024 --blocks 160 --ws 32 accel-sim-mem: 6G - l2_bw_64f: - - args: + - args: --tpb 1024 --tpsm 1024 --blocks 160 --l2 786432 --ws 32 accel-sim-mem: 6G # - l2_bw_128: # - args: # accel-sim-mem: 1G - l2_lat: - - args: + - args: --tpb 1 --tpsm 1 --blocks 1 --l2 786432 --ws 32 accel-sim-mem: 1G - mem_bw: - - args: + - args: --tpb 1024 --tpsm 1024 --blocks 80 --l2 1572864 --ws 32 --memclk 1132 --membw 64 accel-sim-mem: 2G - mem_lat: - - args: + - args: --tpb 1024 --tpsm 1024 --blocks 80 --l2 1572864 --ws 32 --memclk 1132 --membw 64 accel-sim-mem: 1G - shared_bw: - - args: + - args: --tpb 1024 --tpsm 1024 --blocks 1 --ws 32 accel-sim-mem: 2G - shared_lat: - - args: + - args: --blocks 1 --ws 32 accel-sim-mem: 1G - shared_bank_conflicts: ## argument 1 kernel has conflicts @@ -100,13 +97,13 @@ GPU_Microbenchmark: - args: 2 accel-sim-mem: 1G - MaxFlops: - - args: + - args: --tpb 1024 --blocks 1 --ws 32 accel-sim-mem: 1G - l1_shared_bw: - - args: + - args: --tpb 1024 --blocks 1 --ws 32 accel-sim-mem: 1G - l1_bw_32f_unroll: - - args: + - args: --tpb 1024 --blocks 1 --ws 32 accel-sim-mem: 1G - l1_bw_32f_unroll_large: - args: @@ -117,10 +114,10 @@ GPU_Atomic: data_dirs: "$GPUAPPS_ROOT/data_dirs/" execs: - atomic_add_bw: - - args: + - args: --tpb 1 --tpsm 1 --blocks 1 --ws 32 accel-sim-mem: 1G - atomic_add_bw_conflict: - - args: + - args: --tpb 1024 --tpsm 2048 --blocks 160 --ws 32 accel-sim-mem: 1G - atomic_add_bw_profile: - args: 16 @@ -1060,4 +1057,4 @@ huggingface: execs: - helloworld: - args: - accel-sim-mem: 10G \ No newline at end of file + accel-sim-mem: 10G