Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 25 additions & 14 deletions .github/workflows/long-tests.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# This is a basic workflow to help you get started with Actions

#Turing commands are commented out , we regress QV100,RTX3070,A100

name: Long Tests

# Controls when the workflow will run
Expand Down Expand Up @@ -40,10 +42,17 @@ jobs:
run: |
source ./env-setup/12.4_env_setup.sh
source ./gpu-simulator/setup_environment.sh
./util/job_launching/run_simulations.py -B rodinia_2.0-ft,GPU_Microbenchmark -C QV100-SASS -T ~/../common/accel-sim/traces/volta-tesla-v100/latest/ -N sass-short-$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT
./util/job_launching/run_simulations.py -B rodinia_2.0-ft,GPU_Microbenchmark -C RTX2060-SASS -T ~/../common/accel-sim/traces/turing-rtx2060/latest/ -N sass-short-$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT
./util/job_launching/run_simulations.py -B rodinia_2.0-ft,GPU_Microbenchmark -C RTX3070-SASS -T ~/../common/accel-sim/traces/ampere-rtx3070/latest/ -N sass-short-$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT
./util/job_launching/run_simulations.py -B rodinia_2.0-ft,GPU_Microbenchmark -C A100-SASS -T /scratch/tgrogers-disk01/a/common/for-sharing/accel-sim/A100/traces/device-0/12.8/ -N sass-short-$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT
./util/job_launching/run_simulations.py -B GPU_Microbenchmark -C QV100-SASS -T /scratch/tgrogers-disk01/a/common/for-sharing/accel-sim/QV100//hw_run/traces/device-0/12.8/ -N sass-short-$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT
./util/job_launching/run_simulations.py -B rodinia_2.0-ft -C QV100-SASS -T /scratch/tgrogers-disk01/a/common/for-sharing/accel-sim/QV100/hw_run/traces/device-0/12.8/ -N sass-short-$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT

#./util/job_launching/run_simulations.py -B rodinia_2.0-ft -C RTX2060-SASS -T ~/../common/accel-sim/traces/turing-rtx2060/latest/ -N sass-short-$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT

./util/job_launching/run_simulations.py -B GPU_Microbenchmark -C RTX3070-SASS -T /scratch/tgrogers-disk01/a/common/for-sharing/accel-sim/RTX3070/hw_run/traces/device-0/12.8/ -N sass-short-$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT
./util/job_launching/run_simulations.py -B rodinia_2.0-ft -C RTX3070-SASS -T /scratch/tgrogers-disk01/a/common/for-sharing/accel-sim/RTX3070/hw_run/traces/device-0/12.8/ -N sass-short-$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT

./util/job_launching/run_simulations.py -B GPU_Microbenchmark -C A100-SASS -T /scratch/tgrogers-disk01/a/common/for-sharing/accel-sim/A100/hw_run/traces/device-0/12.8/ -N sass-short-$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT
./util/job_launching/run_simulations.py -B rodinia_2.0-ft -C A100-SASS -T /scratch/tgrogers-disk01/a/common/for-sharing/accel-sim/A100/hw_run/traces/device-0/12.8/ -N sass-short-$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT

./util/job_launching/run_simulations.py -B mlperf_inference -C RTX3070-SASS -T /scratch/tgrogers-disk01/a/common/for-sharing/accel-sim/mlperf_rtx3070/traces/device-0/12.8/ -N sass-short-$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT

./util/job_launching/monitor_func_test.py -v -s --sleep_time 300 stats-per-app-sass.csv -N sass-short-$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT
Expand All @@ -56,25 +65,25 @@ jobs:
# either create a new branch or check it out if it already exists
git -C ./statistics-archive checkout $BRANCH_NAME 2>/dev/null || git -C ./statistics-archive checkout -b $BRANCH_NAME
./util/job_launching/get_stats.py -k -K -R -B GPU_Microbenchmark -C QV100-SASS -A | tee v100-ubench-sass-$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT.csv
./util/job_launching/get_stats.py -k -K -R -B GPU_Microbenchmark -C RTX2060-SASS -A | tee turing-ubench-sass-$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT.csv
#./util/job_launching/get_stats.py -k -K -R -B GPU_Microbenchmark -C RTX2060-SASS -A | tee turing-ubench-sass-$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT.csv
./util/job_launching/get_stats.py -k -K -R -B GPU_Microbenchmark -C RTX3070-SASS -A | tee ampere-ubench-sass-$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT.csv
./util/job_launching/get_stats.py -k -K -R -B GPU_Microbenchmark -C A100-SASS -A | tee ampere-a100-ubench-sass-$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT.csv

mkdir -p statistics-archive/ubench/
# First we merge and archive this run to the main csv that contains all previous runs
./util/plotting/merge-stats.py -R -c ./statistics-archive/ubench/v100-ubench-sass.csv,v100-ubench-sass-$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT.csv \
| tee v100-ubench-sass.csv && mv v100-ubench-sass.csv ./statistics-archive/ubench/
./util/plotting/merge-stats.py -R -c ./statistics-archive/ubench/turing-ubench-sass.csv,turing-ubench-sass-$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT.csv \
| tee turing-ubench-sass.csv && mv turing-ubench-sass.csv ./statistics-archive/ubench/
# ./util/plotting/merge-stats.py -R -c ./statistics-archive/ubench/turing-ubench-sass.csv,turing-ubench-sass-$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT.csv \
# | tee turing-ubench-sass.csv && mv turing-ubench-sass.csv ./statistics-archive/ubench/
./util/plotting/merge-stats.py -R -c ./statistics-archive/ubench/ampere-ubench-sass.csv,ampere-ubench-sass-$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT.csv \
| tee ampere-ubench-sass.csv && mv ampere-ubench-sass.csv ./statistics-archive/ubench/
./util/plotting/merge-stats.py -R -c ./statistics-archive/ubench/ampere-a100-ubench-sass.csv,ampere-ubench-sass-$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT.csv \
| tee ampere-a100-ubench-sass.csv && mv ampere-a100-ubench-sass.csv ./statistics-archive/ubench/
# Next we merge the latest run with the current run (used for correlation plots) then archive the current run as the new latest for the next time this action occurs
./util/plotting/merge-stats.py -R -c ./statistics-archive/ubench/v100-ubench-sass-latest.csv,v100-ubench-sass-$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT.csv \
| tee v100-ubench-sass-latest2.csv && mv v100-ubench-sass-$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT.csv ./statistics-archive/ubench/v100-ubench-sass-latest.csv
./util/plotting/merge-stats.py -R -c ./statistics-archive/ubench/turing-ubench-sass-latest.csv,turing-ubench-sass-$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT.csv \
| tee turing-ubench-sass-latest2.csv && mv turing-ubench-sass-$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT.csv ./statistics-archive/ubench/turing-ubench-sass-latest.csv
# ./util/plotting/merge-stats.py -R -c ./statistics-archive/ubench/turing-ubench-sass-latest.csv,turing-ubench-sass-$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT.csv \
# | tee turing-ubench-sass-latest2.csv && mv turing-ubench-sass-$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT.csv ./statistics-archive/ubench/turing-ubench-sass-latest.csv
./util/plotting/merge-stats.py -R -c ./statistics-archive/ubench/ampere-ubench-sass-latest.csv,ampere-ubench-sass-$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT.csv \
| tee ampere-ubench-sass-latest2.csv && mv ampere-ubench-sass-$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT.csv ./statistics-archive/ubench/ampere-ubench-sass-latest.csv
./util/plotting/merge-stats.py -R -c ./statistics-archive/ubench/ampere-a100-ubench-sass-latest.csv,ampere-a100-ubench-sass-$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT.csv \
Expand All @@ -90,10 +99,10 @@ jobs:
source ./env-setup/12.4_env_setup.sh
./util/hw_stats/get_hw_data.sh > /dev/null 2>&1
rm -rf ./util/plotting/correl-html/
./util/plotting/plot-correlation.py -c ./v100-ubench-sass-latest2.csv -H ./hw_run/volta-tesla-v100/11.2/ | tee v100-ubench-correl.txt
./util/plotting/plot-correlation.py -c ./turing-ubench-sass-latest2.csv -H ./hw_run/TURING-RTX2060/10.2/ | tee turing-ubench-correl.txt
./util/plotting/plot-correlation.py -c ./ampere-ubench-sass-latest2.csv -H ./hw_run/AMPERE-RTX3070/11.2/ | tee ampere-ubench-correl.txt
./util/plotting/plot-correlation.py -c ./ampere-a100-ubench-sass-latest2.csv -H /scratch/tgrogers-disk01/a/common/for-sharing/accel-sim/A100/device-0/12.8/ | tee ampere-a100-ubench-correl.txt
./util/plotting/plot-correlation.py -c ./v100-ubench-sass-latest2.csv -H /scratch/tgrogers-disk01/a/common/for-sharing/accel-sim/QV100/hw_run/device-0/12.8/ | tee v100-ubench-correl.txt
#./util/plotting/plot-correlation.py -c ./turing-ubench-sass-latest2.csv -H ./hw_run/TURING-RTX2060/10.2/ | tee turing-ubench-correl.txt
./util/plotting/plot-correlation.py -c ./ampere-ubench-sass-latest2.csv -H /scratch/tgrogers-disk01/a/common/for-sharing/accel-sim/RTX3070/hw_run/device-0/12.8/ | tee ampere-ubench-correl.txt
./util/plotting/plot-correlation.py -c ./ampere-a100-ubench-sass-latest2.csv -H /scratch/tgrogers-disk01/a/common/for-sharing/accel-sim/A100/hw_run/device-0/12.8/ | tee ampere-a100-ubench-correl.txt
ssh ghci@tgrogers-pc01 mkdir -p /home/ghci/accel-sim/correl/git_${GITHUB_REF}"_"$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT/
rsync --delete -r ./util/plotting/correl-html/ ghci@tgrogers-pc01:/home/ghci/accel-sim/correl/git_${GITHUB_REF}"_"$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT/
if [[ $GITHUB_EVENT_NAME == 'push' ]]; then
Expand Down Expand Up @@ -142,7 +151,9 @@ jobs:
srun --time=8:00:00 -c20 make rodinia_2.0-ft GPU_Microbenchmark -j20 -C ./gpu-app-collection/src
./gpu-app-collection/get_regression_data.sh

./util/job_launching/run_simulations.py -B rodinia_2.0-ft,GPU_Microbenchmark -C QV100-PTX,RTX2060-PTX,RTX3070-PTX,A100-PTX -N short-ptx-$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT
#./util/job_launching/run_simulations.py -B rodinia_2.0-ft,GPU_Microbenchmark -C QV100-PTX,RTX2060-PTX,RTX3070-PTX,A100-PTX -N short-ptx-$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT
./util/job_launching/run_simulations.py -B rodinia_2.0-ft,GPU_Microbenchmark -C QV100-PTX,RTX3070-PTX,A100-PTX -N short-ptx-$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT

./util/job_launching/monitor_func_test.py -v -s stats-per-app-ptx.csv -N short-ptx-$GITHUB_RUN_NUMBER"_"$GITHUB_RUN_ATTEMPT
Tracer-Tool:
if: github.repository == 'accel-sim/accel-sim-framework'
Expand Down
37 changes: 17 additions & 20 deletions util/job_launching/apps/define-all-apps.yml
Original file line number Diff line number Diff line change
Expand Up @@ -54,43 +54,40 @@ GPU_Microbenchmark:
data_dirs: "$GPUAPPS_ROOT/data_dirs/"
execs:
- l1_bw_32f:
- args:
- args: --tpb 1024 --tpsm 1024 --blocks 1 --ws 32
accel-sim-mem: 1G
- l1_bw_64f:
- args:
- args: --tpb 1024 --tpsm 1024 --blocks 1 --ws 32
accel-sim-mem: 1G
- l1_bw_128:
- args:
- args: --tpb 1024 --tpsm 1024 --blocks 1 --ws 32
accel-sim-mem: 2G
- l1_lat:
- args:
accel-sim-mem: 1G
- l1_lat:
- args:
- args: --blocks 1 --ws 32
accel-sim-mem: 1G
- l2_bw_32f:
- args:
- args: --tpb 1024 --tpsm 1024 --blocks 160 --ws 32
accel-sim-mem: 6G
- l2_bw_64f:
- args:
- args: --tpb 1024 --tpsm 1024 --blocks 160 --l2 786432 --ws 32
accel-sim-mem: 6G
# - l2_bw_128:
# - args:
# accel-sim-mem: 1G
- l2_lat:
- args:
- args: --tpb 1 --tpsm 1 --blocks 1 --l2 786432 --ws 32
accel-sim-mem: 1G
- mem_bw:
- args:
- args: --tpb 1024 --tpsm 1024 --blocks 80 --l2 1572864 --ws 32 --memclk 1132 --membw 64
accel-sim-mem: 2G
- mem_lat:
- args:
- args: --tpb 1024 --tpsm 1024 --blocks 80 --l2 1572864 --ws 32 --memclk 1132 --membw 64
accel-sim-mem: 1G
- shared_bw:
- args:
- args: --tpb 1024 --tpsm 1024 --blocks 1 --ws 32
accel-sim-mem: 2G
- shared_lat:
- args:
- args: --blocks 1 --ws 32
accel-sim-mem: 1G
- shared_bank_conflicts:
## argument 1 kernel has conflicts
Expand All @@ -100,13 +97,13 @@ GPU_Microbenchmark:
- args: 2
accel-sim-mem: 1G
- MaxFlops:
- args:
- args: --tpb 1024 --blocks 1 --ws 32
accel-sim-mem: 1G
- l1_shared_bw:
- args:
- args: --tpb 1024 --blocks 1 --ws 32
accel-sim-mem: 1G
- l1_bw_32f_unroll:
- args:
- args: --tpb 1024 --blocks 1 --ws 32
accel-sim-mem: 1G
- l1_bw_32f_unroll_large:
- args:
Expand All @@ -117,10 +114,10 @@ GPU_Atomic:
data_dirs: "$GPUAPPS_ROOT/data_dirs/"
execs:
- atomic_add_bw:
- args:
- args: --tpb 1 --tpsm 1 --blocks 1 --ws 32
accel-sim-mem: 1G
- atomic_add_bw_conflict:
- args:
- args: --tpb 1024 --tpsm 2048 --blocks 160 --ws 32
accel-sim-mem: 1G
- atomic_add_bw_profile:
- args: 16
Expand Down Expand Up @@ -1060,4 +1057,4 @@ huggingface:
execs:
- helloworld:
- args:
accel-sim-mem: 10G
accel-sim-mem: 10G