diff --git a/.azure-pipelines/model-test.yml b/.azure-pipelines/model-test.yml index f604b176e13..c63d03cc6a4 100644 --- a/.azure-pipelines/model-test.yml +++ b/.azure-pipelines/model-test.yml @@ -47,13 +47,12 @@ parameters: type: object default: - resnet50-v1-12 - - bert_base_MRPC_dynamic + # - bert_base_MRPC_dynamic - name: MXNetModelList type: object default: - resnet50v1 - stages: - stage: TensorFlowModels displayName: Run TensorFlow Model @@ -111,7 +110,6 @@ stages: modelName: ${{ model }} framework: "mxnet" - - stage: GenerateLogs displayName: Generate Report dependsOn: [TensorFlowModels, PyTorchModels, MXNetModels, ONNXModels] @@ -138,9 +136,9 @@ stages: patterns: "**/*_tuning_info.log" path: $(OUT_SCRIPT_PATH) - task: UsePythonVersion@0 - displayName: "Use Python 3." + displayName: "Use Python 3.8" inputs: - versionSpec: "3" + versionSpec: "3.8" - script: | cd ${OUT_SCRIPT_PATH} mkdir generated @@ -154,15 +152,15 @@ stages: patterns: "**.log" path: $(OUT_SCRIPT_PATH)/last_generated project: $(System.TeamProject) - pipeline: "Baseline" + pipeline: "Model-Test" runVersion: "specific" - runId: $(model_runID) + runId: $(refer_buildId) retryDownloadCount: 3 displayName: "Download last logs" - script: | echo "------ Generating final report.html ------" cd ${OUT_SCRIPT_PATH} - /usr/bin/bash generate_report.sh --WORKSPACE generated --output_dir generated --last_logt_dir last_generated --ghprbPullId $(System.PullRequest.PullRequestNumber) --MR_source_branch $(System.PullRequest.SourceBranch) --MR_source_repo $(System.PullRequest.SourceRepositoryURI) --MR_target_branch $(System.PullRequest.TargetBranch) --repo_url $(Build.Repository.Uri) --source_commit_id $(Build.SourceVersion) --build_id $(Build.BuildId) + /usr/bin/bash generate_report.sh --WORKSPACE generated --output_dir generated --last_logt_dir last_generated displayName: "Generate report" - task: PublishPipelineArtifact@1 inputs: diff --git a/.azure-pipelines/scripts/install_nc.sh b/.azure-pipelines/scripts/install_nc.sh index 38fd439404b..fa2daca5d19 100644 --- a/.azure-pipelines/scripts/install_nc.sh +++ b/.azure-pipelines/scripts/install_nc.sh @@ -4,4 +4,4 @@ cd /neural-compressor python -m pip install --no-cache-dir -r requirements.txt python setup.py sdist bdist_wheel pip install dist/neural_compressor*.whl -pip list \ No newline at end of file +pip list diff --git a/.azure-pipelines/scripts/install_nc_full.sh b/.azure-pipelines/scripts/install_nc_full.sh index b3f59626dfd..7513baeb254 100644 --- a/.azure-pipelines/scripts/install_nc_full.sh +++ b/.azure-pipelines/scripts/install_nc_full.sh @@ -4,4 +4,4 @@ cd /neural-compressor python -m pip install --no-cache-dir -r requirements.txt python setup.py --full sdist bdist_wheel pip install dist/neural_compressor*.whl -pip list \ No newline at end of file +pip list diff --git a/.azure-pipelines/scripts/models/collect_log_all.py b/.azure-pipelines/scripts/models/collect_log_all.py index 61fe9454c2e..fb9db0d6721 100644 --- a/.azure-pipelines/scripts/models/collect_log_all.py +++ b/.azure-pipelines/scripts/models/collect_log_all.py @@ -1,7 +1,5 @@ -import re -import os -import platform import argparse +import os parser = argparse.ArgumentParser(allow_abbrev=False) parser.add_argument("--logs_dir", type=str, default=".") diff --git a/.azure-pipelines/scripts/models/collect_log_model.py b/.azure-pipelines/scripts/models/collect_log_model.py index 37d305423bf..7fbfb55dfac 100644 --- a/.azure-pipelines/scripts/models/collect_log_model.py +++ b/.azure-pipelines/scripts/models/collect_log_model.py @@ -1,8 +1,6 @@ -import re -import os -import platform import argparse - +import os +import re parser = argparse.ArgumentParser(allow_abbrev=False) parser.add_argument("--framework", type=str, required=True) @@ -11,6 +9,7 @@ parser.add_argument("--logs_dir", type=str, default=".") parser.add_argument("--output_dir", type=str, default=".") parser.add_argument("--build_id", type=str, default="3117") +parser.add_argument("--stage", type=str, default="collect_log") args = parser.parse_args() print('===== collecting log model =======') print('build_id: '+args.build_id) @@ -18,13 +17,119 @@ PLATFORM='icx' URL ='https://dev.azure.com/lpot-inc/neural-compressor/_build/results?buildId='+args.build_id+'&view=artifacts&pathAsName=false&type=publishedArtifacts' -print(args) + +def get_model_tuning_dict_results(): + tuning_result_dict = {} + + if os.path.exists(tuning_log): + print('tuning log found') + tmp = {'fp32_acc': 0, 'int8_acc': 0, 'tuning_trials': 0} + with open(tuning_log, "r") as f: + for line in f: + parse_tuning_line(line, tmp) + print(tmp) + # set model status failed + if tmp['fp32_acc'] == 0 or tmp['int8_acc'] == 0: + os.system('echo "##vso[task.setvariable variable=' + args.framework + '_' + args.model + '_failed]true"') + + tuning_result_dict = { + "OS": OS, + "Platform": PLATFORM, + "Framework": args.framework, + "Version": args.fwk_ver, + "Model": args.model, + "Strategy": tmp['strategy'], + "Tune_time": tmp['tune_time'], + } + benchmark_accuracy_result_dict = { + 'int8': { + "OS": OS, + "Platform": PLATFORM, + "Framework": args.framework, + "Version": args.fwk_ver, + "Model": args.model, + "Mode": "Inference", + "Type": "Accuracy", + "BS": 1, + "Value": tmp['int8_acc'], + "Url": URL, + }, + 'fp32': { + "OS": OS, + "Platform": PLATFORM, + "Framework": args.framework, + "Version": args.fwk_ver, + "Model": args.model, + "Mode": "Inference", + "Type": "Accuracy", + "BS": 1, + "Value": tmp['fp32_acc'], + "Url": URL, + } + } + + return tuning_result_dict, benchmark_accuracy_result_dict + else: + return {}, {} + + +def get_model_benchmark_dict_results(): + benchmark_performance_result_dict = {"int8": {}, "fp32": {}} + for precision in ["int8", "fp32"]: + throughput = 0.0 + bs = 1 + for root, dirs, files in os.walk(args.logs_dir): + for name in files: + file_name = os.path.join(root, name) + print(file_name) + if "performance-" + precision in name: + for line in open(file_name, "r"): + result = parse_perf_line(line) + if result.get("throughput"): + throughput += result.get("throughput") + if result.get("batch_size"): + bs = result.get("batch_size") + + # set model status failed + if throughput == 0.0: + os.system('echo "##vso[task.setvariable variable=' + args.framework + '_' + args.model + '_failed]true"') + benchmark_performance_result_dict[precision] = { + "OS": OS, + "Platform": PLATFORM, + "Framework": args.framework, + "Version": args.fwk_ver, + "Model": args.model, + "Mode": "Inference", + "Type": "Performance", + "BS": 1, + "Value": throughput, + "Url": URL, + } + + return benchmark_performance_result_dict + + +def get_refer_data(): + refer_log = os.path.join(f"{args.logs_dir}_refer_log", f"{args.framework}_{args.model}_summary.log") + result = {} + if os.path.exists(refer_log): + with open(refer_log, "r") as f: + lines = f.readlines() + keys = lines[0].split(";") + values = [lines[i].split(";") for i in range(1, len(lines))] + for value in values: + precision = value[keys.index("Precision")] + Type = value[keys.index("Type")] + result[f"{precision}_{Type}"] = float(value[keys.index("Value")]) + return result + else: + print(f"refer log file: {refer_log} not found") + return 0 -def main(): +def collect_log(): results = [] tuning_infos = [] - tuning_log = os.path.join(args.logs_dir, f"{args.framework}-{args.model}-tune.log") print("tuning log dir is {}".format(tuning_log)) # get model tuning results if os.path.exists(tuning_log): @@ -123,5 +228,29 @@ def parse_perf_line(line) -> float: return perf_data +def check_status(precision, precision_upper, check_accuracy = False): + performance_result = get_model_benchmark_dict_results() + current_performance = performance_result.get(precision).get("Value") + refer_performance = refer.get(f"{precision_upper}_Performance") + print(f"current_performance_data = {current_performance}, refer_performance_data = {refer_performance}") + assert abs(current_performance - refer_performance) / refer_performance <= 0.05 + + if check_accuracy: + _, accuracy_result = get_model_tuning_dict_results() + current_accuracy = accuracy_result.get(precision).get("Value") + refer_accuracy = refer.get(f"{precision_upper}_Accuracy") + print(f"current_accuracy_data = {current_accuracy}, refer_accuarcy_data = {refer_accuracy}") + assert abs(current_accuracy - refer_accuracy) / refer_accuracy <= 0.05 + + if __name__ == '__main__': - main() + tuning_log = os.path.join(args.logs_dir, f"{args.framework}-{args.model}-tune.log") + refer = get_refer_data() + if args.stage == "collect_log": + collect_log() + elif args.stage == "int8_benchmark": + check_status("int8", "INT8") + elif args.stage == "fp32_benchmark": + check_status("fp32", "FP32") + else: + raise ValueError(f"{args.stage} does not exist") diff --git a/.azure-pipelines/scripts/models/env_setup.sh b/.azure-pipelines/scripts/models/env_setup.sh new file mode 100644 index 00000000000..7443e3e9d25 --- /dev/null +++ b/.azure-pipelines/scripts/models/env_setup.sh @@ -0,0 +1,124 @@ +#!/bin/bash +set -eo pipefail +source /neural-compressor/.azure-pipelines/scripts/change_color.sh + +# get parameters +PATTERN='[-a-zA-Z0-9_]*=' + +for i in "$@"; do + case $i in + --yaml=*) + yaml=$(echo $i | sed "s/${PATTERN}//") + ;; + --framework=*) + framework=$(echo $i | sed "s/${PATTERN}//") + ;; + --fwk_ver=*) + fwk_ver=$(echo $i | sed "s/${PATTERN}//") + ;; + --torch_vision_ver=*) + torch_vision_ver=$(echo $i | sed "s/${PATTERN}//") + ;; + --model=*) + model=$(echo $i | sed "s/${PATTERN}//") + ;; + --model_src_dir=*) + model_src_dir=$(echo $i | sed "s/${PATTERN}//") + ;; + --dataset_location=*) + dataset_location=$(echo $i | sed "s/${PATTERN}//") + ;; + --batch_size=*) + batch_size=$(echo $i | sed "s/${PATTERN}//") + ;; + --strategy=*) + strategy=$(echo $i | sed "s/${PATTERN}//") + ;; + --new_benchmark=*) + new_benchmark=$(echo $i | sed "s/${PATTERN}//") + ;; + *) + echo "Parameter $i not recognized." + exit 1 + ;; + esac +done + +SCRIPTS_PATH="/neural-compressor/.azure-pipelines/scripts/models" +log_dir="/neural-compressor/.azure-pipelines/scripts/models" +WORK_SOURCE_DIR="/neural-compressor/examples/${framework}" +$BOLD_YELLOW && echo "processing ${framework}-${fwk_ver}-${model}" && $RESET + +$BOLD_YELLOW && echo "======= creat log_dir =========" && $RESET +if [ -d "${log_dir}/${model}" ]; then + $BOLD_GREEN && echo "${log_dir}/${model} already exists, don't need to mkdir." && $RESET +else + $BOLD_GREEN && echo "no log dir ${log_dir}/${model}, create." && $RESET + cd ${log_dir} + mkdir ${model} +fi + +$BOLD_YELLOW && echo "====== install requirements ======" && $RESET +/bin/bash /neural-compressor/.azure-pipelines/scripts/install_nc.sh + +cd ${WORK_SOURCE_DIR}/${model_src_dir} +pip install ruamel_yaml +pip install psutil +pip install protobuf==3.20.1 +if [[ "${framework}" == "tensorflow" ]]; then + pip install intel-tensorflow==${fwk_ver} +elif [[ "${framework}" == "pytorch" ]]; then + pip install torch==${fwk_ver} -f https://download.pytorch.org/whl/torch_stable.html + pip install torchvision==${torch_vision_ver} -f https://download.pytorch.org/whl/torch_stable.html +elif [[ "${framework}" == "onnxrt" ]]; then + pip install onnx==1.11.0 + pip install onnxruntime==${fwk_ver} +elif [[ "${framework}" == "mxnet" ]]; then + if [[ "${fwk_ver}" == "1.7.0" ]]; then + pip install mxnet==${fwk_ver}.post2 + elif [[ "${fwk_ver}" == "1.6.0" ]]; then + pip install mxnet-mkl==${mxnet_version} + else + pip install mxnet==${fwk_ver} + fi +fi + +if [ -f "requirements.txt" ]; then + sed -i '/neural-compressor/d' requirements.txt + if [ "${framework}" == "onnxrt" ]; then + sed -i '/^onnx>=/d;/^onnx==/d;/^onnxruntime>=/d;/^onnxruntime==/d' requirements.txt + fi + if [ "${framework}" == "tensorflow" ]; then + sed -i '/tensorflow==/d;/tensorflow$/d' requirements.txt + sed -i '/^intel-tensorflow/d' requirements.txt + fi + if [ "${framework}" == "mxnet" ]; then + sed -i '/mxnet==/d;/mxnet$/d;/mxnet-mkl==/d;/mxnet-mkl$/d' requirements.txt + fi + if [ "${framework}" == "pytorch" ]; then + sed -i '/torch==/d;/torch$/d;/torchvision==/d;/torchvision$/d' requirements.txt + fi + n=0 + until [ "$n" -ge 5 ]; do + python -m pip install -r requirements.txt && break + n=$((n + 1)) + sleep 5 + done + pip list +else + $BOLD_RED && echo "Not found requirements.txt file." && $RESET +fi + +$BOLD_YELLOW && echo "======== update yaml config ========" && $RESET +$BOLD_YELLOW && echo -e "\nPrint origin yaml..." && $RESET +cat ${yaml} +python ${SCRIPTS_PATH}/update_yaml_config.py \ + --yaml=${yaml} \ + --framework=${framework} \ + --dataset_location=${dataset_location} \ + --batch_size=${batch_size} \ + --strategy=${strategy} \ + --new_benchmark=${new_benchmark} \ + --multi_instance='true' +$BOLD_YELLOW && echo -e "\nPrint updated yaml... " && $RESET +cat ${yaml} diff --git a/.azure-pipelines/scripts/models/generate_report.sh b/.azure-pipelines/scripts/models/generate_report.sh index 51b0e3423c8..e76cba525f4 100644 --- a/.azure-pipelines/scripts/models/generate_report.sh +++ b/.azure-pipelines/scripts/models/generate_report.sh @@ -24,52 +24,24 @@ while [[ $# -gt 0 ]];do key=${1} case ${key} in -w|--WORKSPACE) - WORKSPACE=${2} - shift 2 - ;; + WORKSPACE=${2} + shift 2 + ;; --script_path) - script_path=${2} - shift 2 - ;; + script_path=${2} + shift 2 + ;; --output_dir) - output_dir=${2} - shift 2 - ;; + output_dir=${2} + shift 2 + ;; --last_logt_dir) - last_logt_dir=${2} - shift 2 - ;; - --ghprbPullId) - ghprbPullId=${2} - shift 2 - ;; - --MR_source_branch) - MR_source_branch=${2} - shift 2 - ;; - --MR_source_repo) - MR_source_repo=${2} - shift 2 - ;; - --MR_target_branch) - MR_target_branch=${2} - shift 2 - ;; - --repo_url) - repo_url=${2} - shift 2 - ;; - --source_commit_id) - source_commit_id=${2} - shift 2 - ;; - --build_id) - build_id=${2} - shift 2 - ;; + last_logt_dir=${2} + shift 2 + ;; *) - shift - ;; + shift + ;; esac done @@ -86,11 +58,19 @@ summaryLogLast="${last_logt_dir}/summary.log" tuneLogLast="${last_logt_dir}/tuning_info.log" echo "summaryLogLast: ${summaryLogLast}" echo "tuneLogLast: ${tuneLogLast}" +ghprbPullId=${SYSTEM_PULLREQUEST_PULLREQUESTNUMBER} +MR_source_branch=${SYSTEM_PULLREQUEST_SOURCEBRANCH} +MR_source_repo=${SYSTEM_PULLREQUEST_SOURCEREPOSITORYURI} +MR_target_branch=${SYSTEM_PULLREQUEST_TARGETBRANCH} +repo_url=${BUILD_REPOSITORY_URI} +source_commit_id=${BUILD_SOURCEVERSION} +build_id=${BUILD_BUILDID} echo "MR_source_branch: ${MR_source_branch}" echo "MR_source_repo: ${MR_source_repo}" echo "MR_target_branch: ${MR_target_branch}" echo "repo_url: ${repo_url}" echo "commit_id: ${source_commit_id}" +echo "ghprbPullId: ${ghprbPullId}" function main { @@ -98,7 +78,6 @@ function main { generate_html_body generate_results generate_html_footer - } function generate_inference { @@ -205,33 +184,33 @@ function generate_html_core { if(metric == "acc") { target = (int8_result - fp32_result) / fp32_result; if(target >= -0.01) { - printf("%.2f %", target*100); + printf("%.2f %", target*100); }else if(target < -0.05) { - printf("%.2f %", target*100); - job_status = "fail" + printf("%.2f %", target*100); + job_status = "fail" }else{ - printf("%.2f %", target*100); + printf("%.2f %", target*100); } }else if(metric == "perf") { target = int8_result / fp32_result; if(target >= 1.5) { - printf("%.2f", target); + printf("%.2f", target); }else if(target < 1) { - printf("%.2f", target); - job_status = "fail" + printf("%.2f", target); + job_status = "fail" }else{ - printf("%.2f", target); + printf("%.2f", target); } } else { target = int8_result / fp32_result; if(target >= 2) { - printf("%.2f", target); + printf("%.2f", target); }else if(target < 1) { - printf("%.2f", target); - job_status = "fail" + printf("%.2f", target); + job_status = "fail" }else{ - printf("%.2f", target); + printf("%.2f", target); } } }else { @@ -263,14 +242,14 @@ function generate_html_core { } } else { if(new_result == nan && previous_result == nan){ - printf(""); + printf(""); } else{ if(new_result == nan) { - job_status = "fail" - status_png = "background-color:#FFD2D2"; - printf("", status_png); + job_status = "fail" + status_png = "background-color:#FFD2D2"; + printf("", status_png); } else{ - printf(""); + printf(""); } } } @@ -367,7 +346,7 @@ function generate_html_core { printf("\n"); } END{ - printf("\n%s", job_status); + printf("\n%s", job_status); } ' >> ${output_dir}/report.html job_state=$(tail -1 ${WORKSPACE}/report.html) @@ -426,14 +405,14 @@ Test_Info='' if [ "${qtools_branch}" == "" ]; then - commit_id=$(echo ${ghprbActualCommit} |awk '{print substr($1,1,7)}') + commit_id=$(echo ${ghprbActualCommit} |awk '{print substr($1,1,7)}') - MR_TITLE="[ PR-${ghprbPullId} ]" - Test_Info_Title="Source Branch Target Branch Commit " - Test_Info="${MR_source_branch} ${MR_target_branch} ${source_commit_id:0:6}" + MR_TITLE="[ PR-${ghprbPullId} ]" + Test_Info_Title="Source Branch Target Branch Commit " + Test_Info="${MR_source_branch} ${MR_target_branch} ${source_commit_id:0:6}" else - Test_Info_Title="Test Branch Commit ID " - Test_Info="${qtools_branch} ${qtools_commit} " + Test_Info_Title="Test Branch Commit ID " + Test_Info="${qtools_branch} ${qtools_commit} " fi cat >> ${output_dir}/report.html << eof @@ -441,18 +420,20 @@ cat >> ${output_dir}/report.html << eof

Neural Compressor Tuning Tests ${MR_TITLE} - [ Job-${build_id} ]

-

Test Status: ${Jenkins_job_status}

+ [ Job-${build_id} + ] +

Test Status: ${Jenkins_job_status}

Summary

- - ${Test_Info_Title} - - - - ${Test_Info} - + + ${Test_Info_Title} + + + + ${Test_Info} +
Repo
neural-compressor
Repo
neural-compressor
eof @@ -537,68 +518,68 @@ cat > ${output_dir}/report.html << eof } .features-table { - width: 100%; - margin: 0 auto; - border-collapse: separate; - border-spacing: 0; - text-shadow: 0 1px 0 #fff; - color: #2a2a2a; - background: #fafafa; - background-image: -moz-linear-gradient(top, #fff, #eaeaea, #fff); /* Firefox 3.6 */ - background-image: -webkit-gradient(linear,center bottom,center top,from(#fff),color-stop(0.5, #eaeaea),to(#fff)); - font-family: Verdana,Arial,Helvetica + width: 100%; + margin: 0 auto; + border-collapse: separate; + border-spacing: 0; + text-shadow: 0 1px 0 #fff; + color: #2a2a2a; + background: #fafafa; + background-image: -moz-linear-gradient(top, #fff, #eaeaea, #fff); /* Firefox 3.6 */ + background-image: -webkit-gradient(linear,center bottom,center top,from(#fff),color-stop(0.5, #eaeaea),to(#fff)); + font-family: Verdana,Arial,Helvetica } .features-table th,td { - text-align: center; - height: 25px; - line-height: 25px; - padding: 0 8px; - border: 1px solid #cdcdcd; - box-shadow: 0 1px 0 white; - -moz-box-shadow: 0 1px 0 white; - -webkit-box-shadow: 0 1px 0 white; - white-space: nowrap; + text-align: center; + height: 25px; + line-height: 25px; + padding: 0 8px; + border: 1px solid #cdcdcd; + box-shadow: 0 1px 0 white; + -moz-box-shadow: 0 1px 0 white; + -webkit-box-shadow: 0 1px 0 white; + white-space: nowrap; } .no-border th { - box-shadow: none; - -moz-box-shadow: none; - -webkit-box-shadow: none; + box-shadow: none; + -moz-box-shadow: none; + -webkit-box-shadow: none; } .col-cell { - text-align: center; - width: 150px; - font: normal 1em Verdana, Arial, Helvetica; + text-align: center; + width: 150px; + font: normal 1em Verdana, Arial, Helvetica; } .col-cell3 { - background: #efefef; - background: rgba(144,144,144,0.15); + background: #efefef; + background: rgba(144,144,144,0.15); } .col-cell1, .col-cell2 { - background: #B0C4DE; - background: rgba(176,196,222,0.3); + background: #B0C4DE; + background: rgba(176,196,222,0.3); } .col-cellh { - font: bold 1.3em 'trebuchet MS', 'Lucida Sans', Arial; - -moz-border-radius-topright: 10px; - -moz-border-radius-topleft: 10px; - border-top-right-radius: 10px; - border-top-left-radius: 10px; - border-top: 1px solid #eaeaea !important; + font: bold 1.3em 'trebuchet MS', 'Lucida Sans', Arial; + -moz-border-radius-topright: 10px; + -moz-border-radius-topleft: 10px; + border-top-right-radius: 10px; + border-top-left-radius: 10px; + border-top: 1px solid #eaeaea !important; } .col-cellf { - font: bold 1.4em Georgia; - -moz-border-radius-bottomright: 10px; - -moz-border-radius-bottomleft: 10px; - border-bottom-right-radius: 10px; - border-bottom-left-radius: 10px; - border-bottom: 1px solid #dadada !important; + font: bold 1.4em Georgia; + -moz-border-radius-bottomright: 10px; + -moz-border-radius-bottomleft: 10px; + border-bottom-right-radius: 10px; + border-bottom-left-radius: 10px; + border-bottom: 1px solid #dadada !important; } diff --git a/.azure-pipelines/scripts/models/run_benchmark_common.sh b/.azure-pipelines/scripts/models/run_benchmark_common.sh index b342ac48992..ae5b8a1af36 100644 --- a/.azure-pipelines/scripts/models/run_benchmark_common.sh +++ b/.azure-pipelines/scripts/models/run_benchmark_common.sh @@ -1,11 +1,12 @@ #!/bin/bash -set -x +set -eo pipefail +source /neural-compressor/.azure-pipelines/scripts/change_color.sh # get parameters PATTERN='[-a-zA-Z0-9_]*=' +SCRIPTS_PATH="/neural-compressor/.azure-pipelines/scripts/models" -for i in "$@" -do +for i in "$@"; do case $i in --framework=*) framework=`echo $i | sed "s/${PATTERN}//"`;; @@ -15,76 +16,114 @@ do input_model=`echo $i | sed "s/${PATTERN}//"`;; --benchmark_cmd=*) benchmark_cmd=`echo $i | sed "s/${PATTERN}//"`;; - --tune_acc=*) - tune_acc=`echo $i | sed "s/${PATTERN}//"`;; --log_dir=*) log_dir=`echo $i | sed "s/${PATTERN}//"`;; --new_benchmark=*) new_benchmark=`echo $i | sed "s/${PATTERN}//"`;; --precision=*) precision=`echo $i | sed "s/${PATTERN}//"`;; + --stage=*) + stage=`echo $i | sed "s/${PATTERN}//"`;; *) echo "Parameter $i not recognized."; exit 1;; esac done -echo "-------- run_benchmark_common --------" +$BOLD_YELLOW && echo "-------- run_benchmark_common --------" && $RESET -# run accuracy -# tune_acc==true means using accuracy results from tuning log -if [ "${tune_acc}" == "false" ]; then - echo "run tuning accuracy in precision ${precision}" - eval "${benchmark_cmd} --input_model=${input_model} --mode=accuracy" 2>&1 | tee ${log_dir}/${framework}-${model}-accuracy-${precision}.log -fi +main() { + # run accuracy + # USE_TUNE_ACC==true means using accuracy results from tuning log + if [ "${USE_TUNE_ACC}" == "false" ]; then + run_accuracy + fi + + # run performance + if [ "${PERF_STABLE_CHECK}" == "false" ]; then + run_performance + else + max_loop=3 + for ((iter = 0; iter < ${max_loop}; iter++)); do + run_performance + { + check_perf_gap + exit_code=$? + } || true + if [ ${exit_code} -ne 0 ]; then + $BOLD_RED && echo "FAILED with performance gap!!" && $RESET + else + $BOLD_GREEN && echo "SUCCEED!!" && $RESET + break + fi + done + exit ${exit_code} + fi +} + +function check_perf_gap() { + python -u ${SCRIPTS_PATH}/collect_log_model.py \ + --framework=${framework} \ + --fwk_ver=${fwk_ver} \ + --model=${model} \ + --logs_dir="${log_dir}" \ + --output_dir="${log_dir}" \ + --build_id=${BUILD_BUILDID} \ + --stage=${stage} +} + +function run_performance() { + cmd="${benchmark_cmd} --input_model=${input_model}" + if [ "${new_benchmark}" == "true" ]; then + $BOLD_YELLOW && echo "run with internal benchmark..." && $RESET + eval ${cmd} 2>&1 | tee ${log_dir}/${framework}-${model}-performance-${precision}.log + else + $BOLD_YELLOW && echo "run with external multiInstance benchmark..." && $RESET + multiInstance + fi +} + +function run_accuracy() { + $BOLD_YELLOW && echo "run tuning accuracy in precision ${precision}" && $RESET + eval "${benchmark_cmd} --input_model=${input_model} --mode=accuracy" 2>&1 | tee ${log_dir}/${framework}-${model}-accuracy-${precision}.log +} function multiInstance() { - ncores_per_socket=${ncores_per_socket:=$( lscpu | grep 'Core(s) per socket' | cut -d: -f2 | xargs echo -n)} - echo "Executing multi instance benchmark" + ncores_per_socket=${ncores_per_socket:=$(lscpu | grep 'Core(s) per socket' | cut -d: -f2 | xargs echo -n)} + $BOLD_YELLOW && echo "Executing multi instance benchmark" && $RESET ncores_per_instance=4 - echo "ncores_per_socket=${ncores_per_socket}, ncores_per_instance=${ncores_per_instance}" + $BOLD_YELLOW && echo "ncores_per_socket=${ncores_per_socket}, ncores_per_instance=${ncores_per_instance}" && $RESET logFile="${log_dir}/${framework}-${model}-performance-${precision}" benchmark_pids=() - for((j=0;$j<${ncores_per_socket};j=$(($j + ${ncores_per_instance})))); - do - end_core_num=$((j + ncores_per_instance -1)) - if [ ${end_core_num} -ge ${ncores_per_socket} ]; then - end_core_num=$((ncores_per_socket-1)) - fi - numactl -m 0 -C "${j}-${end_core_num}" ${cmd} 2>&1 | tee ${logFile}-${ncores_per_socket}-${ncores_per_instance}-${j}.log & - benchmark_pids+=($!) + for ((j = 0; $j < ${ncores_per_socket}; j = $(($j + ${ncores_per_instance})))); do + end_core_num=$((j + ncores_per_instance - 1)) + if [ ${end_core_num} -ge ${ncores_per_socket} ]; then + end_core_num=$((ncores_per_socket - 1)) + fi + numactl -m 0 -C "${j}-${end_core_num}" ${cmd} 2>&1 | tee ${logFile}-${ncores_per_socket}-${ncores_per_instance}-${j}.log & + benchmark_pids+=($!) done status="SUCCESS" for pid in "${benchmark_pids[@]}"; do wait $pid exit_code=$? - echo "Detected exit code: ${exit_code}" + $BOLD_YELLOW && echo "Detected exit code: ${exit_code}" && $RESET if [ ${exit_code} == 0 ]; then - echo "Process ${pid} succeeded" + $BOLD_GREEN && echo "Process ${pid} succeeded" && $RESET else - echo "Process ${pid} failed" + $BOLD_RED && echo "Process ${pid} failed" && $RESET status="FAILURE" fi done - echo "Benchmark process status: ${status}" + $BOLD_YELLOW && echo "Benchmark process status: ${status}" && $RESET if [ ${status} == "FAILURE" ]; then - echo "Benchmark process returned non-zero exit code." + $BOLD_RED && echo "Benchmark process returned non-zero exit code." && $RESET exit 1 fi } - -# run performance -cmd="${benchmark_cmd} --input_model=${input_model}" - -if [ "${new_benchmark}" == "true" ]; then - echo "run with internal benchmark..." - eval ${cmd} 2>&1 | tee ${log_dir}/${framework}-${model}-performance-${precision}.log -else - echo "run with external multiInstance benchmark..." - multiInstance -fi +main diff --git a/.azure-pipelines/scripts/models/run_model_trigger_common.sh b/.azure-pipelines/scripts/models/run_model_trigger_common.sh index 397ccade727..d0c89560416 100644 --- a/.azure-pipelines/scripts/models/run_model_trigger_common.sh +++ b/.azure-pipelines/scripts/models/run_model_trigger_common.sh @@ -1,5 +1,6 @@ #!/bin/bash - +set -eo pipefail +source /neural-compressor/.azure-pipelines/scripts/change_color.sh # get parameters PATTERN='[-a-zA-Z0-9_]*=' @@ -32,10 +33,8 @@ do tuning_cmd=`echo $i | sed "s/${PATTERN}//"`;; --benchmark_cmd=*) benchmark_cmd=`echo $i | sed "s/${PATTERN}//"`;; - --tune_acc=*) - tune_acc=`echo $i | sed "s/${PATTERN}//"`;; - --build_id=*) - build_id=`echo $i | sed "s/${PATTERN}//"`;; + --mode=*) + mode=`echo $i | sed "s/${PATTERN}//"`;; *) echo "Parameter $i not recognized."; exit 1;; esac @@ -44,113 +43,87 @@ done log_dir="/neural-compressor/.azure-pipelines/scripts/models" WORK_SOURCE_DIR="/neural-compressor/examples/${framework}" SCRIPTS_PATH="/neural-compressor/.azure-pipelines/scripts/models" -echo "processing ${framework}-${fwk_ver}-${model}" -echo "tuning_cmd is ${tuning_cmd}" -echo "benchmark_cmd is ${benchmark_cmd}" - -echo "======= creat log_dir =========" -if [ -d "${log_dir}/${model}" ]; then - echo "${log_dir}/${model} already exists, don't need to mkdir." -else - echo "no log dir ${log_dir}/${model}, create." - cd ${log_dir} - mkdir ${model} -fi - -echo "====== install requirements ======" -/bin/bash /neural-compressor/.azure-pipelines/scripts/install_nc.sh - -cd ${WORK_SOURCE_DIR}/${model_src_dir} -pip install ruamel_yaml -pip install psutil -pip install protobuf==3.20.1 -if [[ "${framework}" == "tensorflow" ]]; then - pip install intel-tensorflow==${fwk_ver} -elif [[ "${framework}" == "pytorch" ]]; then - pip install torch==${fwk_ver} -f https://download.pytorch.org/whl/torch_stable.html - pip install torchvision==${torch_vision_ver} -f https://download.pytorch.org/whl/torch_stable.html -elif [[ "${framework}" == "onnxrt" ]]; then - pip install onnx==1.11.0 - pip install onnxruntime==${fwk_ver} -elif [[ "${framework}" == "mxnet" ]]; then - if [[ "${fwk_ver}" == "1.7.0" ]]; then - pip install mxnet==${fwk_ver}.post2 - elif [[ "${fwk_ver}" == "1.6.0" ]]; then - pip install mxnet-mkl==${mxnet_version} - else - pip install mxnet==${fwk_ver} - fi -fi - -if [ -f "requirements.txt" ]; then - sed -i '/neural-compressor/d' requirements.txt - if [ "${framework}" == "onnxrt" ]; then - sed -i '/^onnx>=/d;/^onnx==/d;/^onnxruntime>=/d;/^onnxruntime==/d' requirements.txt - fi - if [ "${framework}" == "tensorflow" ]; then - sed -i '/tensorflow==/d;/tensorflow$/d' requirements.txt - sed -i '/^intel-tensorflow/d' requirements.txt +$BOLD_YELLOW && echo "processing ${framework}-${fwk_ver}-${model}" && $RESET +$BOLD_YELLOW && echo "benchmark_cmd is ${benchmark_cmd}" && $RESET + +if [ "${mode}" == "env_setup" ]; then + /bin/bash env_setup.sh \ + --yaml=${yaml} \ + --framework=${framework} \ + --fwk_ver=${fwk_ver} \ + --torch_vision_ver=${torch_vision_ver} \ + --model=${model} \ + --model_src_dir=${model_src_dir} \ + --dataset_location=${dataset_location} \ + --batch_size=${batch_size} \ + --strategy=${strategy} \ + --new_benchmark=${new_benchmark} +elif [ "${mode}" == "tuning" ]; then + cd ${WORK_SOURCE_DIR}/${model_src_dir} + $BOLD_YELLOW && echo "tuning_cmd is ${tuning_cmd}" && $RESET + $BOLD_YELLOW && echo "======== run tuning ========" && $RESET + /bin/bash ${SCRIPTS_PATH}/run_tuning_common.sh \ + --framework=${framework} \ + --model=${model} \ + --tuning_cmd="${tuning_cmd}" \ + --log_dir="${log_dir}/${model}" \ + --input_model=${input_model} \ + --strategy=${strategy} \ + 2>&1 | tee -a ${log_dir}/${model}/${framework}-${model}-tune.log + $BOLD_YELLOW && echo "====== check tuning status. ======" && $RESET + control_phrase="model which meet accuracy goal." + if [ $(grep "${control_phrase}" ${log_dir}/${model}/${framework}-${model}-tune.log | wc -l) == 0 ];then + $BOLD_RED && echo "====== tuning FAILED!! ======" && $RESET; exit 1 fi - if [ "${framework}" == "mxnet" ]; then - sed -i '/mxnet==/d;/mxnet$/d;/mxnet-mkl==/d;/mxnet-mkl$/d' requirements.txt + if [ $(grep "${control_phrase}" ${log_dir}/${model}/${framework}-${model}-tune.log | grep "Not found" | wc -l) == 1 ];then + $BOLD_RED && echo "====== tuning FAILED!! ======" && $RESET; exit 1 fi - if [ "${framework}" == "pytorch" ]; then - sed -i '/torch==/d;/torch$/d;/torchvision==/d;/torchvision$/d' requirements.txt + $BOLD_GREEN && echo "====== tuning SUCCEED!! ======" && $RESET +elif [ "${mode}" == "fp32_benchmark" ]; then + cd ${WORK_SOURCE_DIR}/${model_src_dir} + $BOLD_YELLOW && echo "benchmark_cmd is ${benchmark_cmd}" && $RESET + $BOLD_YELLOW && echo "====== run benchmark fp32 =======" && $RESET + /bin/bash ${SCRIPTS_PATH}/run_benchmark_common.sh \ + --framework=${framework} \ + --model=${model} \ + --input_model=${input_model} \ + --benchmark_cmd="${benchmark_cmd}" \ + --log_dir="${log_dir}/${model}" \ + --new_benchmark=${new_benchmark} \ + --precision="fp32" \ + --stage=${mode} +elif [ "${mode}" == "int8_benchmark" ]; then + cd ${WORK_SOURCE_DIR}/${model_src_dir} + $BOLD_YELLOW && echo "benchmark_cmd is ${benchmark_cmd}" && $RESET + $BOLD_YELLOW && echo "====== run benchmark int8 =======" && $RESET + if [[ "${framework}" == "onnxrt" ]]; then + model_name="${log_dir}/${model}/${framework}-${model}-tune.onnx" + elif [[ "${framework}" == "mxnet" ]]; then + model_name="${log_dir}/${model}" + elif [[ "${framework}" == "tensorflow" ]]; then + model_name="${log_dir}/${model}/${framework}-${model}-tune.pb" + elif [[ "${framework}" == "pytorch" ]]; then + model_name=${input_model} + benchmark_cmd="${benchmark_cmd} --int8=true" fi - n=0 - until [ "$n" -ge 5 ] - do - python -m pip install -r requirements.txt && break - n=$((n+1)) - sleep 5 - done - pip list -else - echo "Not found requirements.txt file." + /bin/bash ${SCRIPTS_PATH}/run_benchmark_common.sh \ + --framework=${framework} \ + --model=${model} \ + --input_model="${model_name}" \ + --benchmark_cmd="${benchmark_cmd}" \ + --log_dir="${log_dir}/${model}" \ + --new_benchmark=${new_benchmark} \ + --precision="int8" \ + --stage=${mode} +elif [ "${mode}" == "collect_log" ]; then + cd ${WORK_SOURCE_DIR}/${model_src_dir} + $BOLD_YELLOW && echo "====== collect logs of model ${model} =======" && $RESET + python -u ${SCRIPTS_PATH}/collect_log_model.py \ + --framework=${framework} \ + --fwk_ver=${fwk_ver} \ + --model=${model} \ + --logs_dir="${log_dir}/${model}" \ + --output_dir="${log_dir}/${model}" \ + --build_id=${BUILD_BUILDID} + $BOLD_YELLOW && echo "====== Finish collect logs =======" && $RESET fi - - -echo "======== update yaml config ========" -echo -e "\nPrint origin yaml..." -cat ${yaml} -python ${SCRIPTS_PATH}/update_yaml_config.py --yaml=${yaml} --framework=${framework} \ ---dataset_location=${dataset_location} --batch_size=${batch_size} --strategy=${strategy} \ ---new_benchmark=${new_benchmark} --multi_instance='true' -echo -e "\nPrint updated yaml... " -cat ${yaml} - - -echo "======== run tuning ========" -/bin/bash ${SCRIPTS_PATH}/run_tuning_common.sh --framework=${framework} --model=${model} \ ---tuning_cmd="${tuning_cmd}" --log_dir="${log_dir}/${model}" --input_model=${input_model} --strategy=${strategy} \ -2>&1 | tee -a ${log_dir}/${model}/${framework}-${model}-tune.log - - -echo "====== run benchmark fp32 =======" -/bin/bash ${SCRIPTS_PATH}/run_benchmark_common.sh --framework=${framework} --model=${model} \ - --input_model=${input_model} --benchmark_cmd="${benchmark_cmd}" --tune_acc=${tune_acc} \ - --log_dir="${log_dir}/${model}" --new_benchmark=${new_benchmark} --precision="fp32" - - -echo "====== run benchmark int8 =======" -if [[ "${framework}" == "onnxrt" ]]; then - model_name="${log_dir}/${model}/${framework}-${model}-tune.onnx" -elif [[ "${framework}" == "mxnet" ]]; then - model_name="${log_dir}/${model}" -elif [[ "${framework}" == "tensorflow" ]]; then - model_name="${log_dir}/${model}/${framework}-${model}-tune.pb" -elif [[ "${framework}" == "pytorch" ]]; then - model_name=${input_model} - benchmark_cmd="${benchmark_cmd} --int8=true" -fi -/bin/bash ${SCRIPTS_PATH}/run_benchmark_common.sh --framework=${framework} --model=${model} \ - --input_model="${model_name}" --benchmark_cmd="${benchmark_cmd}" \ - --tune_acc=${tune_acc} --log_dir="${log_dir}/${model}" --new_benchmark=${new_benchmark} --precision="int8" - - -echo "====== collect logs of model ${model} =======" -python -u ${SCRIPTS_PATH}/collect_log_model.py --framework=${framework} --fwk_ver=${fwk_ver} --model=${model} \ ---logs_dir="${log_dir}/${model}" --output_dir="${log_dir}/${model}" --build_id=${build_id} - - -echo "====== Finish model test =======" \ No newline at end of file diff --git a/.azure-pipelines/scripts/models/run_mxnet_models_trigger.sh b/.azure-pipelines/scripts/models/run_mxnet_models_trigger.sh index 455ee809c61..8bf3b293fc2 100644 --- a/.azure-pipelines/scripts/models/run_mxnet_models_trigger.sh +++ b/.azure-pipelines/scripts/models/run_mxnet_models_trigger.sh @@ -1,5 +1,5 @@ #!/bin/bash - +set -eo pipefail # get parameters PATTERN='[-a-zA-Z0-9_]*=' @@ -8,10 +8,8 @@ do case $i in --model=*) model=`echo $i | sed "s/${PATTERN}//"`;; - --tune_acc=*) - tune_acc=`echo $i | sed "s/${PATTERN}//"`;; - --build_id=*) - build_id=`echo $i | sed "s/${PATTERN}//"`;; + --mode=*) + mode=`echo $i | sed "s/${PATTERN}//"`;; *) echo "Parameter $i not recognized."; exit 1;; esac @@ -35,7 +33,17 @@ if [ "${model}" == "resnet50v1" ]; then fi -/bin/bash run_model_trigger_common.sh --yaml=${yaml} --framework=${FRAMEWORK} --fwk_ver=${FRAMEWORK_VERSION} \ ---model=${model} --model_src_dir=${model_src_dir} --dataset_location=${dataset_location} \ ---input_model=${input_model} --batch_size=${batch_size} --strategy=${strategy} --new_benchmark=${new_benchmark} \ ---tuning_cmd="${tuning_cmd}" --benchmark_cmd="${benchmark_cmd}" --tune_acc=${tune_acc} --build_id=${build_id} +/bin/bash run_model_trigger_common.sh \ + --yaml=${yaml} \ + --framework=${FRAMEWORK} \ + --fwk_ver=${FRAMEWORK_VERSION} \ + --model=${model} \ + --model_src_dir=${model_src_dir} \ + --dataset_location=${dataset_location} \ + --input_model=${input_model} \ + --batch_size=${batch_size} \ + --strategy=${strategy} \ + --new_benchmark=${new_benchmark} \ + --tuning_cmd="${tuning_cmd}" \ + --benchmark_cmd="${benchmark_cmd}" \ + --mode=${mode} diff --git a/.azure-pipelines/scripts/models/run_onnxrt_models_trigger.sh b/.azure-pipelines/scripts/models/run_onnxrt_models_trigger.sh index d71081f61b3..a69852f01a5 100644 --- a/.azure-pipelines/scripts/models/run_onnxrt_models_trigger.sh +++ b/.azure-pipelines/scripts/models/run_onnxrt_models_trigger.sh @@ -1,5 +1,5 @@ #!/bin/bash - +set -eo pipefail # get parameters PATTERN='[-a-zA-Z0-9_]*=' @@ -8,10 +8,8 @@ do case $i in --model=*) model=`echo $i | sed "s/${PATTERN}//"`;; - --tune_acc=*) - tune_acc=`echo $i | sed "s/${PATTERN}//"`;; - --build_id=*) - build_id=`echo $i | sed "s/${PATTERN}//"`;; + --mode=*) + mode=`echo $i | sed "s/${PATTERN}//"`;; *) echo "Parameter $i not recognized."; exit 1;; esac @@ -31,7 +29,7 @@ if [ "${model}" == "resnet50-v1-12" ]; then batch_size=1 new_benchmark=true tuning_cmd="bash run_tuning.sh --input_model=${input_model} --config=${yaml}" - benchmark_cmd="bash run_benchmark.sh --config=${yaml}" + benchmark_cmd="bash run_benchmark.sh --config=${yaml} --mode=performance" elif [ "${model}" == "bert_base_MRPC_static" ]; then model_src_dir="language_translation/bert/quantization/ptq" dataset_location="/tf_dataset/pytorch/glue_data/MRPC" @@ -41,7 +39,7 @@ elif [ "${model}" == "bert_base_MRPC_static" ]; then batch_size=1 new_benchmark=true tuning_cmd="bash run_tuning.sh --input_model=${input_model} --config=${yaml}" - benchmark_cmd="bash run_benchmark.sh --config=${yaml}" + benchmark_cmd="bash run_benchmark.sh --config=${yaml} --mode=performance" elif [ "${model}" == "bert_base_MRPC_dynamic" ]; then model_src_dir="language_translation/bert/quantization/ptq" dataset_location="/tf_dataset/pytorch/glue_data/MRPC" @@ -51,7 +49,7 @@ elif [ "${model}" == "bert_base_MRPC_dynamic" ]; then batch_size=1 new_benchmark=true tuning_cmd="bash run_tuning.sh --input_model=${input_model} --config=${yaml}" - benchmark_cmd="bash run_benchmark.sh --config=${yaml}" + benchmark_cmd="bash run_benchmark.sh --config=${yaml} --mode=performance" elif [ "${model}" == "distilbert_base_MRPC_qdq" ]; then model_src_dir="language_translation/distilbert/quantization/ptq" dataset_location="/tf_dataset/pytorch/glue_data/MRPC" @@ -61,11 +59,21 @@ elif [ "${model}" == "distilbert_base_MRPC_qdq" ]; then batch_size=1 new_benchmark=true tuning_cmd="bash run_tuning.sh --input_model=${input_model} --config=${yaml}" - benchmark_cmd="bash run_benchmark.sh --config=${yaml}" + benchmark_cmd="bash run_benchmark.sh --config=${yaml} --mode=performance" fi -/bin/bash run_model_trigger_common.sh --yaml=${yaml} --framework=${FRAMEWORK} --fwk_ver=${FRAMEWORK_VERSION} \ ---model=${model} --model_src_dir=${model_src_dir} --dataset_location=${dataset_location} \ ---input_model=${input_model} --batch_size=${batch_size} --strategy=${strategy} --new_benchmark=${new_benchmark} \ ---tuning_cmd="${tuning_cmd}" --benchmark_cmd="${benchmark_cmd} --mode=performance" --tune_acc=${tune_acc} --build_id=${build_id} +/bin/bash run_model_trigger_common.sh \ + --yaml=${yaml} \ + --framework=${FRAMEWORK} \ + --fwk_ver=${FRAMEWORK_VERSION} \ + --model=${model} \ + --model_src_dir=${model_src_dir} \ + --dataset_location=${dataset_location} \ + --input_model=${input_model} \ + --batch_size=${batch_size} \ + --strategy=${strategy} \ + --new_benchmark=${new_benchmark} \ + --tuning_cmd="${tuning_cmd}" \ + --benchmark_cmd="${benchmark_cmd}" \ + --mode=${mode} diff --git a/.azure-pipelines/scripts/models/run_pytorch_models_trigger.sh b/.azure-pipelines/scripts/models/run_pytorch_models_trigger.sh index 9aef437666d..5cd776816f4 100644 --- a/.azure-pipelines/scripts/models/run_pytorch_models_trigger.sh +++ b/.azure-pipelines/scripts/models/run_pytorch_models_trigger.sh @@ -1,5 +1,5 @@ #!/bin/bash - +set -eo pipefail # get parameters PATTERN='[-a-zA-Z0-9_]*=' @@ -8,10 +8,8 @@ do case $i in --model=*) model=`echo $i | sed "s/${PATTERN}//"`;; - --tune_acc=*) - tune_acc=`echo $i | sed "s/${PATTERN}//"`;; - --build_id=*) - build_id=`echo $i | sed "s/${PATTERN}//"`;; + --mode=*) + mode=`echo $i | sed "s/${PATTERN}//"`;; *) echo "Parameter $i not recognized."; exit 1;; esac @@ -46,8 +44,18 @@ elif [ "${model}" == "resnet18_fx" ]; then fi -/bin/bash run_model_trigger_common.sh --yaml=${yaml} --framework=${FRAMEWORK} --fwk_ver=${FRAMEWORK_VERSION} \ ---torch_vision_ver=${TORCH_VISION_VERSION} --model=${model} --model_src_dir=${model_src_dir} \ ---dataset_location=${dataset_location} --input_model=${input_model} --batch_size=${batch_size} --strategy=${strategy} \ ---new_benchmark=${new_benchmark} --tuning_cmd="${tuning_cmd}" --benchmark_cmd="${benchmark_cmd}" \ ---tune_acc=${tune_acc} --build_id=${build_id} +/bin/bash run_model_trigger_common.sh \ + --yaml=${yaml} \ + --framework=${FRAMEWORK} \ + --fwk_ver=${FRAMEWORK_VERSION} \ + --torch_vision_ver=${TORCH_VISION_VERSION} \ + --model=${model} \ + --model_src_dir=${model_src_dir} \ + --dataset_location=${dataset_location} \ + --input_model=${input_model} \ + --batch_size=${batch_size} \ + --strategy=${strategy} \ + --new_benchmark=${new_benchmark} \ + --tuning_cmd="${tuning_cmd}" \ + --benchmark_cmd="${benchmark_cmd}" \ + --mode=${mode} diff --git a/.azure-pipelines/scripts/models/run_tensorflow_models_trigger.sh b/.azure-pipelines/scripts/models/run_tensorflow_models_trigger.sh index 1b3a6c6815d..b3eee910900 100644 --- a/.azure-pipelines/scripts/models/run_tensorflow_models_trigger.sh +++ b/.azure-pipelines/scripts/models/run_tensorflow_models_trigger.sh @@ -1,5 +1,5 @@ #!/bin/bash - +set -eo pipefail # get parameters PATTERN='[-a-zA-Z0-9_]*=' @@ -8,10 +8,8 @@ do case $i in --model=*) model=`echo $i | sed "s/${PATTERN}//"`;; - --tune_acc=*) - tune_acc=`echo $i | sed "s/${PATTERN}//"`;; - --build_id=*) - build_id=`echo $i | sed "s/${PATTERN}//"`;; + --mode=*) + mode=`echo $i | sed "s/${PATTERN}//"`;; *) echo "Parameter $i not recognized."; exit 1;; esac @@ -104,7 +102,17 @@ elif [ "${model}" == "resnet50_fashion" ]; then fi -/bin/bash run_model_trigger_common.sh --yaml=${yaml} --framework=${FRAMEWORK} --fwk_ver=${FRAMEWORK_VERSION} \ ---model=${model} --model_src_dir=${model_src_dir} --dataset_location=${dataset_location} \ ---input_model=${input_model} --batch_size=${batch_size} --strategy=${strategy} --new_benchmark=${new_benchmark} \ ---tuning_cmd="${tuning_cmd}" --benchmark_cmd="${benchmark_cmd}" --tune_acc=${tune_acc} --build_id=${build_id} +/bin/bash run_model_trigger_common.sh \ + --yaml=${yaml} \ + --framework=${FRAMEWORK} \ + --fwk_ver=${FRAMEWORK_VERSION} \ + --model=${model} \ + --model_src_dir=${model_src_dir} \ + --dataset_location=${dataset_location} \ + --input_model=${input_model} \ + --batch_size=${batch_size} \ + --strategy=${strategy} \ + --new_benchmark=${new_benchmark} \ + --tuning_cmd="${tuning_cmd}" \ + --benchmark_cmd="${benchmark_cmd}" \ + --mode=${mode} diff --git a/.azure-pipelines/scripts/models/run_tuning_common.sh b/.azure-pipelines/scripts/models/run_tuning_common.sh index 26b223bf994..fbb68d65605 100644 --- a/.azure-pipelines/scripts/models/run_tuning_common.sh +++ b/.azure-pipelines/scripts/models/run_tuning_common.sh @@ -1,5 +1,6 @@ #!/bin/bash -set -x +set -eo pipefail +source /neural-compressor/.azure-pipelines/scripts/change_color.sh # get parameters PATTERN='[-a-zA-Z0-9_]*=' @@ -35,16 +36,15 @@ else output_model=${log_dir}/${framework}-${model}-tune.pb fi -echo -e "-------- run_tuning_common --------" -echo ${tuning_cmd} +$BOLD_YELLOW && echo -e "-------- run_tuning_common --------" && $RESET +$BOLD_YELLOW && echo ${tuning_cmd} && $RESET + eval "/usr/bin/time -v ${tuning_cmd} --output_model=${output_model}" -echo "====== finish tuning. echo information. ======" +$BOLD_YELLOW && echo "====== finish tuning. echo information. ======" && $RESET endtime=`date +'%Y-%m-%d %H:%M:%S'` start_seconds=$(date --date="$starttime" +%s); end_seconds=$(date --date="$endtime" +%s); -echo "Tuning time spend: "$((end_seconds-start_seconds))"s " - -echo "Tuning strategy: ${strategy}" - -echo "Total resident size (kbytes): $(cat /proc/meminfo |grep 'MemTotal' |sed 's/[^0-9]//g')" +$BOLD_GREEN && echo "Tuning time spend: "$((end_seconds-start_seconds))"s " && $RESET +$BOLD_GREEN && echo "Tuning strategy: ${strategy}" && $RESET +$BOLD_GREEN && echo "Total resident size (kbytes): $(cat /proc/meminfo | grep 'MemTotal' | sed 's/[^0-9]//g')" && $RESET diff --git a/.azure-pipelines/scripts/models/update_yaml_config.py b/.azure-pipelines/scripts/models/update_yaml_config.py index 9d92a457f6d..c305134e18d 100644 --- a/.azure-pipelines/scripts/models/update_yaml_config.py +++ b/.azure-pipelines/scripts/models/update_yaml_config.py @@ -64,9 +64,9 @@ def update_yaml_dataset(yaml, framework, dataset_location): line = re.sub(key_patterns["pattern"], key_patterns["replacement"], line) config.write(line) - if framework == "pytorch": - val_dataset = dataset_location + f"\{os.path.sep}" + "val" - train_dataset = dataset_location + f"\{os.path.sep}" + "train" + else: + val_dataset = dataset_location + f"{os.path.sep}" + "val" + train_dataset = dataset_location + f"{os.path.sep}" + "train" patterns = { "calibration_dataset": { "pattern": r'root:.*/path/to/calibration/dataset/?', @@ -78,14 +78,6 @@ def update_yaml_dataset(yaml, framework, dataset_location): }, } - if topology == "distilbert_base_MRPC": - patterns.update({ - "bert_name": { - "pattern": r'name:/s+bert', - "replacement": "name: distilbert", - } - }) - print("======= update_yaml_dataset =======") with open(yaml, "w") as config: for line in lines: diff --git a/.azure-pipelines/template/model-template.yml b/.azure-pipelines/template/model-template.yml index 1bc9dd4a7bf..f145025faf2 100644 --- a/.azure-pipelines/template/model-template.yml +++ b/.azure-pipelines/template/model-template.yml @@ -13,18 +13,56 @@ parameters: steps: - template: docker-template.yml parameters: - dockerConfigName: 'commonDockerConfig' - repoName: 'neural-compressor' - repoTag: 'py38' - dockerFileName: 'Dockerfile' + dockerConfigName: "commonDockerConfig" + repoName: "neural-compressor" + repoTag: "py38" + dockerFileName: "Dockerfile" containerName: ${{ parameters.modelContainerName }} - script: | docker exec ${{ parameters.modelContainerName }} bash -c "cd /neural-compressor/.azure-pipelines/scripts/models \ - && bash run_${{ parameters.framework }}_models_trigger.sh --model=${{ parameters.modelName }} --tune_acc=true --build_id=$(Build.BuildId)" - displayName: Tune&Benchmark ${{ parameters.modelName }} + && bash run_${{ parameters.framework }}_models_trigger.sh --model=${{ parameters.modelName }} --mode='env_setup'" + displayName: Env setup + + - task: DownloadPipelineArtifact@2 + inputs: + source: "specific" + artifact: ${{ parameters.framework }}_${{ parameters.modelName }} + patterns: "**_summary.log" + path: $(Build.SourcesDirectory)/.azure-pipelines/scripts/models/${{ parameters.modelName }}_refer_log + project: $(System.TeamProject) + pipeline: "Model-Test" + runVersion: "specific" + runId: $(refer_buildId) + retryDownloadCount: 3 + displayName: "Download refer logs" + + - script: | + docker exec ${{ parameters.modelContainerName }} bash -c "cd /neural-compressor/.azure-pipelines/scripts/models \ + && bash run_${{ parameters.framework }}_models_trigger.sh --model=${{ parameters.modelName }} --mode='tuning'" + displayName: Tuning + + - script: | + docker exec ${{ parameters.modelContainerName }} bash -c "cd /neural-compressor/.azure-pipelines/scripts/models \ + && bash run_${{ parameters.framework }}_models_trigger.sh --model=${{ parameters.modelName }} --mode='int8_benchmark'" + displayName: INT8 Benchmark + + - script: | + docker exec ${{ parameters.modelContainerName }} bash -c "cd /neural-compressor/.azure-pipelines/scripts/models \ + && bash run_${{ parameters.framework }}_models_trigger.sh --model=${{ parameters.modelName }} --mode='fp32_benchmark'" + displayName: FP32 Benchmark + + - task: Bash@3 + condition: always() + inputs: + targetType: "inline" + script: | + docker exec ${{ parameters.modelContainerName }} bash -c "cd /neural-compressor/.azure-pipelines/scripts/models \ + && bash run_${{ parameters.framework }}_models_trigger.sh --model=${{ parameters.modelName }} --mode='collect_log'" + displayName: Collect log - task: PublishPipelineArtifact@1 + condition: always() inputs: targetPath: $(Build.SourcesDirectory)/.azure-pipelines/scripts/models/${{ parameters.modelName }}/ artifact: ${{ parameters.framework }}_${{ parameters.modelName }}