From c0b92423769460ac20074b3e9cc47d14888ce73d Mon Sep 17 00:00:00 2001 From: WenjiaoYue <108783334+WenjiaoYue@users.noreply.github.com> Date: Tue, 1 Nov 2022 14:26:07 +0800 Subject: [PATCH 01/34] optimize model code (#42) --- .azure-pipelines/model-test.yml | 353 ++++++------------- .azure-pipelines/template/model-template.yml | 27 +- 2 files changed, 120 insertions(+), 260 deletions(-) diff --git a/.azure-pipelines/model-test.yml b/.azure-pipelines/model-test.yml index 1f23a460563..f604b176e13 100644 --- a/.azure-pipelines/model-test.yml +++ b/.azure-pipelines/model-test.yml @@ -10,284 +10,137 @@ variables: SCRIPT_PATH: /neural-compressor/.azure-pipelines/scripts parameters: -- name: test_tf - displayName: Run tensorflow models? - type: boolean - default: true -- name: test_pt - displayName: Run pytorch models? - type: boolean - default: true -- name: test_onnx - displayName: Run onnxrt models? - type: boolean - default: true -- name: test_mxnet - displayName: Run mxnet models? - type: boolean - default: true + - name: TensorFlow_Model + displayName: Run TensorFlow models? + type: boolean + default: true + - name: PyTorch_Model + displayName: Run PyTorch models? + type: boolean + default: true + - name: ONNX_Model + displayName: Run ONNX models? + type: boolean + default: true + - name: MXNet_Model + displayName: Run MXNet models? + type: boolean + default: true + + - name: TensorFlowModelList + type: object + default: + - resnet50v1.5 + - ssd_resnet50_v1 + - ssd_mobilenet_v1_ckpt + - inception_v1 + - resnet50_fashion + - darknet19 + - densenet-121 + - resnet-101 + - name: PyTorchModelList + type: object + default: + - resnet18 + - resnet18_fx + - name: ONNXModelList + type: object + default: + - resnet50-v1-12 + - bert_base_MRPC_dynamic + - name: MXNetModelList + type: object + default: + - resnet50v1 + stages: -- ${{ if eq(parameters.test_tf, true) }}: - - stage: TensorflowModels - displayName: Tensorflow Model Test + - stage: TensorFlowModels + displayName: Run TensorFlow Model dependsOn: [] + condition: eq('${{ parameters.TensorFlow_Model }}', 'true') jobs: - - job: resnet50v1_5 - displayName: resnet50v1.5 - steps: - - template: template/model-template.yml - parameters: - modelName: 'resnet50v1.5' - framework: 'tensorflow' - - - script: | - if [ $(tensorflow_resnet50v1.5_failed) == 'true' ]; then - echo "[Failed] Model resnet50v1.5 failed, please check artifacts and logs." - exit 1 - fi - displayName: "Check Test Status" - - - job: ssd_resnet50_v1 - displayName: ssd_resnet50_v1 - steps: - - template: template/model-template.yml - parameters: - modelName: 'ssd_resnet50_v1' - framework: 'tensorflow' - - - script: | - if [ $(tensorflow_ssd_resnet50_v1_failed) == 'true' ]; then - echo "[Failed] Model ssd_resnet50_v1 failed, please check artifacts and logs." - exit 1 - fi - displayName: "Check Test Status" - - - job: ssd_mobilenet_v1_ckpt - displayName: ssd_mobilenet_v1_ckpt - steps: - - template: template/model-template.yml - parameters: - modelName: 'ssd_mobilenet_v1_ckpt' - framework: 'tensorflow' - - - script: | - if [ $(tensorflow_ssd_mobilenet_v1_ckpt_failed) == 'true' ]; then - echo "[Failed] Model ssd_mobilenet_v1_ckpt failed, please check artifacts and logs." - exit 1 - fi - displayName: "Check Test Status" - - - job: inception_v1 - displayName: inception_v1 - steps: - - template: template/model-template.yml - parameters: - modelName: 'inception_v1' - framework: 'tensorflow' - - - script: | - if [ $(tensorflow_inception_v1_failed) == 'true' ]; then - echo "[Failed] Model inception_v1 failed, please check artifacts and logs." - exit 1 - fi - displayName: "Check Test Status" - - - job: resnet50_fashion - displayName: resnet50_fashion - steps: - - template: template/model-template.yml - parameters: - modelName: 'resnet50_fashion' - framework: 'tensorflow' - - - script: | - if [ $(tensorflow_resnet50_fashion_failed) == 'true' ]; then - echo "[Failed] Model resnet50_fashion failed, please check artifacts and logs." - exit 1 - fi - displayName: "Check Test Status" - - - job: darknet19 - displayName: darknet19 - steps: - - template: template/model-template.yml - parameters: - modelName: 'darknet19' - framework: 'tensorflow' - - - script: | - if [ $(tensorflow_darknet19_failed) == 'true' ]; then - echo "[Failed] Model darknet19 failed, please check artifacts and logs." - exit 1 - fi - displayName: "Check Test Status" - - - job: densenet121 - displayName: densenet-121 - steps: - - template: template/model-template.yml - parameters: - modelName: 'densenet-121' - framework: 'tensorflow' - - - script: | - if [ $(tensorflow_densenet-121_failed) == 'true' ]; then - echo "[Failed] Model densenet-121 failed, please check artifacts and logs." - exit 1 - fi - displayName: "Check Test Status" + - ${{ each model in parameters.TensorFlowModelList }}: + - job: + displayName: ${{ model }} + steps: + - template: template/model-template.yml + parameters: + modelName: ${{ model }} + framework: "tensorflow" - - job: resnet101 - displayName: resnet-101 - steps: - - template: template/model-template.yml - parameters: - modelName: 'resnet-101' - framework: 'tensorflow' - - - script: | - if [ $(tensorflow_resnet-101_failed) == 'true' ]; then - echo "[Failed] Model resnet-101 failed, please check artifacts and logs." - exit 1 - fi - displayName: "Check Test Status" - -- ${{ if eq(parameters.test_pt, true) }}: - stage: PyTorchModels - displayName: PyTorch Model Test + displayName: Run PyTorch Model dependsOn: [] + condition: eq('${{ parameters.PyTorch_Model }}', 'true') jobs: - - job: resnet18 - displayName: resnet18 - steps: - - template: template/model-template.yml - parameters: - modelName: 'resnet18' - framework: 'pytorch' - - - script: | - if [ $(pytorch_resnet18_failed) == 'true' ]; then - echo "[Failed] Model resnet18 failed, please check artifacts and logs." - exit 1 - fi - displayName: "Check Test Status" - - - job: resnet18_fx - displayName: resnet18_fx - steps: - - template: template/model-template.yml - parameters: - modelName: 'resnet18_fx' - framework: 'pytorch' - - - script: | - if [ $(pytorch_resnet18_fx_failed) == 'true' ]; then - echo "[Failed] Model resnet18_fx failed, please check artifacts and logs." - exit 1 - fi - displayName: "Check Test Status" + - ${{ each model in parameters.PyTorchModelList }}: + - job: + displayName: ${{ model }} + steps: + - template: template/model-template.yml + parameters: + modelName: ${{ model }} + framework: "pytorch" -- ${{ if eq(parameters.test_mxnet, true) }}: - - stage: MXNetModels - displayName: Mxnet Model Test + - stage: ONNXModels + displayName: Run ONNX Model dependsOn: [] + condition: eq('${{ parameters.ONNX_Model }}', 'true') jobs: - - job: resnet50v1 - displayName: resnet50v1 - steps: - - template: template/model-template.yml - parameters: - modelName: 'resnet50v1' - framework: 'mxnet' - - - script: | - if [ $(mxnet_resnet50v1_failed) == 'true' ]; then - echo "[Failed] Model resnet50v1 failed, please check artifacts and logs." - exit 1 - fi - displayName: "Check Test Status" + - ${{ each model in parameters.ONNXModelList }}: + - job: + displayName: ${{ model }} + steps: + - template: template/model-template.yml + parameters: + modelName: ${{ model }} + framework: "onnxrt" -- ${{ if eq(parameters.test_onnx, true) }}: - - stage: ONNXModels - displayName: ONNXrt Model Test + - stage: MXNetModels + displayName: Run MXNet Model dependsOn: [] + condition: eq('${{ parameters.MXNet_Model }}', 'true') jobs: - - job: resnet50_v1_12 - displayName: resnet50-v1-12 - steps: - - template: template/model-template.yml - parameters: - modelName: 'resnet50-v1-12' - framework: 'onnxrt' - - - script: | - if [ $(onnxrt_resnet50-v1-12_failed) == 'true' ]; then - echo "[Failed] Model resnet50-v1-12 failed, please check artifacts and logs." - exit 1 - fi - displayName: "Check Test Status" - -# - job: distilbert_base_MRPC_qdq -# displayName: distilbert_base_MRPC_qdq -# steps: -# - template: template/model-template.yml -# parameters: -# modelName: 'distilbert_base_MRPC_qdq' -# framework: 'onnxrt' -# -# - script: | -# if [ $(onnxrt_distilbert_base_MRPC_qdq_failed) == 'true' ]; then -# echo "[Failed] Model distilbert_base_MRPC_qdq failed, please check artifacts and logs." -# exit 1 -# fi -# displayName: "Check Test Status" + - ${{ each model in parameters.MXNetModelList }}: + - job: + displayName: ${{ model }} + steps: + - template: template/model-template.yml + parameters: + modelName: ${{ model }} + framework: "mxnet" - - job: bert_base_MRPC_dynamic - displayName: bert_base_MRPC_dynamic - steps: - - template: template/model-template.yml - parameters: - modelName: 'bert_base_MRPC_dynamic' - framework: 'onnxrt' - - - script: | - if [ $(onnxrt_bert_base_MRPC_dynamic_failed) == 'true' ]; then - echo "[Failed] Model bert_base_MRPC_dynamic failed, please check artifacts and logs." - exit 1 - fi - displayName: "Check Test Status" - stage: GenerateLogs displayName: Generate Report - dependsOn: [ TensorflowModels, PyTorchModels, MXNetModels, ONNXModels ] - condition: succeededOrFailed() + dependsOn: [TensorFlowModels, PyTorchModels, MXNetModels, ONNXModels] + condition: always() jobs: - job: GenerateReport - displayName: generate report steps: - script: | echo ${BUILD_SOURCESDIRECTORY} sudo rm -fr ${BUILD_SOURCESDIRECTORY} || true echo y | docker system prune - displayName: 'Clean workspace' + displayName: "Clean workspace" - checkout: self clean: true - displayName: 'Checkout out Repo' + displayName: "Checkout out Repo" - task: DownloadPipelineArtifact@2 inputs: artifact: - patterns: '**/*_summary.log' + patterns: "**/*_summary.log" path: $(OUT_SCRIPT_PATH) - task: DownloadPipelineArtifact@2 inputs: artifact: - patterns: '**/*_tuning_info.log' + patterns: "**/*_tuning_info.log" path: $(OUT_SCRIPT_PATH) - task: UsePythonVersion@0 - displayName: 'Use Python 3.8.0' + displayName: "Use Python 3." inputs: - versionSpec: '3.8.0' + versionSpec: "3" - script: | cd ${OUT_SCRIPT_PATH} mkdir generated @@ -296,30 +149,30 @@ stages: displayName: "Collect all logs" - task: DownloadPipelineArtifact@2 inputs: - source: 'specific' - artifact: 'FinalReport' - patterns: '**.log' + source: "specific" + artifact: "FinalReport" + patterns: "**.log" path: $(OUT_SCRIPT_PATH)/last_generated project: $(System.TeamProject) - pipeline: 'Baseline' - runVersion: 'specific' + pipeline: "Baseline" + runVersion: "specific" runId: $(model_runID) retryDownloadCount: 3 - displayName: 'Download last logs' + displayName: "Download last logs" - script: | echo "------ Generating final report.html ------" cd ${OUT_SCRIPT_PATH} /usr/bin/bash generate_report.sh --WORKSPACE generated --output_dir generated --last_logt_dir last_generated --ghprbPullId $(System.PullRequest.PullRequestNumber) --MR_source_branch $(System.PullRequest.SourceBranch) --MR_source_repo $(System.PullRequest.SourceRepositoryURI) --MR_target_branch $(System.PullRequest.TargetBranch) --repo_url $(Build.Repository.Uri) --source_commit_id $(Build.SourceVersion) --build_id $(Build.BuildId) - displayName: 'Generate report' + displayName: "Generate report" - task: PublishPipelineArtifact@1 inputs: targetPath: $(OUT_SCRIPT_PATH)/generated artifact: FinalReport - publishLocation: 'pipeline' + publishLocation: "pipeline" displayName: "Publish report" - script: | if [ $(is_perf_reg) == 'true' ]; then echo "[Performance Regression] Some model performance regression occurred, please check artifacts and reports." exit 1 fi - displayName: "Specify performance regression" \ No newline at end of file + displayName: "Specify performance regression" diff --git a/.azure-pipelines/template/model-template.yml b/.azure-pipelines/template/model-template.yml index fdab45aaaea..1bc9dd4a7bf 100644 --- a/.azure-pipelines/template/model-template.yml +++ b/.azure-pipelines/template/model-template.yml @@ -1,14 +1,14 @@ parameters: -- name: modelName - type: string - default: 'resnet50v1.5' -- name: framework - type: string - default: 'tensorflow' + - name: modelName + type: string + default: "resnet50v1.5" + - name: framework + type: string + default: "tensorflow" -- name: modelContainerName - type: string - default: 'model' + - name: modelContainerName + type: string + default: "model" steps: - template: docker-template.yml @@ -28,4 +28,11 @@ steps: inputs: targetPath: $(Build.SourcesDirectory)/.azure-pipelines/scripts/models/${{ parameters.modelName }}/ artifact: ${{ parameters.framework }}_${{ parameters.modelName }} - publishLocation: 'pipeline' \ No newline at end of file + publishLocation: "pipeline" + + - script: | + if [ ${{ parameters.framework }}_${{ parameters.modelName }}_failed == 'true' ]; then + echo "[Failed] Model ${{ parameters.modelName }} failed, please check artifacts and logs." + exit 1 + fi + displayName: "Check Test Status" From 7b10876f57647ee60e6238eca003567f070549c3 Mon Sep 17 00:00:00 2001 From: "Sun, Xuehao" Date: Tue, 1 Nov 2022 16:00:21 +0800 Subject: [PATCH 02/34] perf check template --- .azure-pipelines/model-test.yml | 4 +- .../scripts/models/generate_report.sh | 215 ++++++++-------- .../scripts/models/run_benchmark_common.sh | 12 +- .../models/run_model_trigger_common.sh | 230 ++++++++++-------- .../models/run_mxnet_models_trigger.sh | 21 +- .../models/run_onnxrt_models_trigger.sh | 19 ++ .../models/run_pytorch_models_trigger.sh | 23 +- .../models/run_tensorflow_models_trigger.sh | 21 +- .azure-pipelines/template/model-template.yml | 45 +++- 9 files changed, 343 insertions(+), 247 deletions(-) diff --git a/.azure-pipelines/model-test.yml b/.azure-pipelines/model-test.yml index f604b176e13..93550fa65e6 100644 --- a/.azure-pipelines/model-test.yml +++ b/.azure-pipelines/model-test.yml @@ -53,7 +53,6 @@ parameters: default: - resnet50v1 - stages: - stage: TensorFlowModels displayName: Run TensorFlow Model @@ -111,7 +110,6 @@ stages: modelName: ${{ model }} framework: "mxnet" - - stage: GenerateLogs displayName: Generate Report dependsOn: [TensorFlowModels, PyTorchModels, MXNetModels, ONNXModels] @@ -162,7 +160,7 @@ stages: - script: | echo "------ Generating final report.html ------" cd ${OUT_SCRIPT_PATH} - /usr/bin/bash generate_report.sh --WORKSPACE generated --output_dir generated --last_logt_dir last_generated --ghprbPullId $(System.PullRequest.PullRequestNumber) --MR_source_branch $(System.PullRequest.SourceBranch) --MR_source_repo $(System.PullRequest.SourceRepositoryURI) --MR_target_branch $(System.PullRequest.TargetBranch) --repo_url $(Build.Repository.Uri) --source_commit_id $(Build.SourceVersion) --build_id $(Build.BuildId) + /usr/bin/bash generate_report.sh --WORKSPACE generated --output_dir generated --last_logt_dir last_generated displayName: "Generate report" - task: PublishPipelineArtifact@1 inputs: diff --git a/.azure-pipelines/scripts/models/generate_report.sh b/.azure-pipelines/scripts/models/generate_report.sh index 51b0e3423c8..e76cba525f4 100644 --- a/.azure-pipelines/scripts/models/generate_report.sh +++ b/.azure-pipelines/scripts/models/generate_report.sh @@ -24,52 +24,24 @@ while [[ $# -gt 0 ]];do key=${1} case ${key} in -w|--WORKSPACE) - WORKSPACE=${2} - shift 2 - ;; + WORKSPACE=${2} + shift 2 + ;; --script_path) - script_path=${2} - shift 2 - ;; + script_path=${2} + shift 2 + ;; --output_dir) - output_dir=${2} - shift 2 - ;; + output_dir=${2} + shift 2 + ;; --last_logt_dir) - last_logt_dir=${2} - shift 2 - ;; - --ghprbPullId) - ghprbPullId=${2} - shift 2 - ;; - --MR_source_branch) - MR_source_branch=${2} - shift 2 - ;; - --MR_source_repo) - MR_source_repo=${2} - shift 2 - ;; - --MR_target_branch) - MR_target_branch=${2} - shift 2 - ;; - --repo_url) - repo_url=${2} - shift 2 - ;; - --source_commit_id) - source_commit_id=${2} - shift 2 - ;; - --build_id) - build_id=${2} - shift 2 - ;; + last_logt_dir=${2} + shift 2 + ;; *) - shift - ;; + shift + ;; esac done @@ -86,11 +58,19 @@ summaryLogLast="${last_logt_dir}/summary.log" tuneLogLast="${last_logt_dir}/tuning_info.log" echo "summaryLogLast: ${summaryLogLast}" echo "tuneLogLast: ${tuneLogLast}" +ghprbPullId=${SYSTEM_PULLREQUEST_PULLREQUESTNUMBER} +MR_source_branch=${SYSTEM_PULLREQUEST_SOURCEBRANCH} +MR_source_repo=${SYSTEM_PULLREQUEST_SOURCEREPOSITORYURI} +MR_target_branch=${SYSTEM_PULLREQUEST_TARGETBRANCH} +repo_url=${BUILD_REPOSITORY_URI} +source_commit_id=${BUILD_SOURCEVERSION} +build_id=${BUILD_BUILDID} echo "MR_source_branch: ${MR_source_branch}" echo "MR_source_repo: ${MR_source_repo}" echo "MR_target_branch: ${MR_target_branch}" echo "repo_url: ${repo_url}" echo "commit_id: ${source_commit_id}" +echo "ghprbPullId: ${ghprbPullId}" function main { @@ -98,7 +78,6 @@ function main { generate_html_body generate_results generate_html_footer - } function generate_inference { @@ -205,33 +184,33 @@ function generate_html_core { if(metric == "acc") { target = (int8_result - fp32_result) / fp32_result; if(target >= -0.01) { - printf("%.2f %", target*100); + printf("%.2f %", target*100); }else if(target < -0.05) { - printf("%.2f %", target*100); - job_status = "fail" + printf("%.2f %", target*100); + job_status = "fail" }else{ - printf("%.2f %", target*100); + printf("%.2f %", target*100); } }else if(metric == "perf") { target = int8_result / fp32_result; if(target >= 1.5) { - printf("%.2f", target); + printf("%.2f", target); }else if(target < 1) { - printf("%.2f", target); - job_status = "fail" + printf("%.2f", target); + job_status = "fail" }else{ - printf("%.2f", target); + printf("%.2f", target); } } else { target = int8_result / fp32_result; if(target >= 2) { - printf("%.2f", target); + printf("%.2f", target); }else if(target < 1) { - printf("%.2f", target); - job_status = "fail" + printf("%.2f", target); + job_status = "fail" }else{ - printf("%.2f", target); + printf("%.2f", target); } } }else { @@ -263,14 +242,14 @@ function generate_html_core { } } else { if(new_result == nan && previous_result == nan){ - printf(""); + printf(""); } else{ if(new_result == nan) { - job_status = "fail" - status_png = "background-color:#FFD2D2"; - printf("", status_png); + job_status = "fail" + status_png = "background-color:#FFD2D2"; + printf("", status_png); } else{ - printf(""); + printf(""); } } } @@ -367,7 +346,7 @@ function generate_html_core { printf("\n"); } END{ - printf("\n%s", job_status); + printf("\n%s", job_status); } ' >> ${output_dir}/report.html job_state=$(tail -1 ${WORKSPACE}/report.html) @@ -426,14 +405,14 @@ Test_Info='' if [ "${qtools_branch}" == "" ]; then - commit_id=$(echo ${ghprbActualCommit} |awk '{print substr($1,1,7)}') + commit_id=$(echo ${ghprbActualCommit} |awk '{print substr($1,1,7)}') - MR_TITLE="[ PR-${ghprbPullId} ]" - Test_Info_Title="Source Branch Target Branch Commit " - Test_Info="${MR_source_branch} ${MR_target_branch} ${source_commit_id:0:6}" + MR_TITLE="[ PR-${ghprbPullId} ]" + Test_Info_Title="Source Branch Target Branch Commit " + Test_Info="${MR_source_branch} ${MR_target_branch} ${source_commit_id:0:6}" else - Test_Info_Title="Test Branch Commit ID " - Test_Info="${qtools_branch} ${qtools_commit} " + Test_Info_Title="Test Branch Commit ID " + Test_Info="${qtools_branch} ${qtools_commit} " fi cat >> ${output_dir}/report.html << eof @@ -441,18 +420,20 @@ cat >> ${output_dir}/report.html << eof

Neural Compressor Tuning Tests ${MR_TITLE} - [ Job-${build_id} ]

-

Test Status: ${Jenkins_job_status}

+ [ Job-${build_id} + ] +

Test Status: ${Jenkins_job_status}

Summary

- - ${Test_Info_Title} - - - - ${Test_Info} - + + ${Test_Info_Title} + + + + ${Test_Info} +
Repo
neural-compressor
Repo
neural-compressor
eof @@ -537,68 +518,68 @@ cat > ${output_dir}/report.html << eof } .features-table { - width: 100%; - margin: 0 auto; - border-collapse: separate; - border-spacing: 0; - text-shadow: 0 1px 0 #fff; - color: #2a2a2a; - background: #fafafa; - background-image: -moz-linear-gradient(top, #fff, #eaeaea, #fff); /* Firefox 3.6 */ - background-image: -webkit-gradient(linear,center bottom,center top,from(#fff),color-stop(0.5, #eaeaea),to(#fff)); - font-family: Verdana,Arial,Helvetica + width: 100%; + margin: 0 auto; + border-collapse: separate; + border-spacing: 0; + text-shadow: 0 1px 0 #fff; + color: #2a2a2a; + background: #fafafa; + background-image: -moz-linear-gradient(top, #fff, #eaeaea, #fff); /* Firefox 3.6 */ + background-image: -webkit-gradient(linear,center bottom,center top,from(#fff),color-stop(0.5, #eaeaea),to(#fff)); + font-family: Verdana,Arial,Helvetica } .features-table th,td { - text-align: center; - height: 25px; - line-height: 25px; - padding: 0 8px; - border: 1px solid #cdcdcd; - box-shadow: 0 1px 0 white; - -moz-box-shadow: 0 1px 0 white; - -webkit-box-shadow: 0 1px 0 white; - white-space: nowrap; + text-align: center; + height: 25px; + line-height: 25px; + padding: 0 8px; + border: 1px solid #cdcdcd; + box-shadow: 0 1px 0 white; + -moz-box-shadow: 0 1px 0 white; + -webkit-box-shadow: 0 1px 0 white; + white-space: nowrap; } .no-border th { - box-shadow: none; - -moz-box-shadow: none; - -webkit-box-shadow: none; + box-shadow: none; + -moz-box-shadow: none; + -webkit-box-shadow: none; } .col-cell { - text-align: center; - width: 150px; - font: normal 1em Verdana, Arial, Helvetica; + text-align: center; + width: 150px; + font: normal 1em Verdana, Arial, Helvetica; } .col-cell3 { - background: #efefef; - background: rgba(144,144,144,0.15); + background: #efefef; + background: rgba(144,144,144,0.15); } .col-cell1, .col-cell2 { - background: #B0C4DE; - background: rgba(176,196,222,0.3); + background: #B0C4DE; + background: rgba(176,196,222,0.3); } .col-cellh { - font: bold 1.3em 'trebuchet MS', 'Lucida Sans', Arial; - -moz-border-radius-topright: 10px; - -moz-border-radius-topleft: 10px; - border-top-right-radius: 10px; - border-top-left-radius: 10px; - border-top: 1px solid #eaeaea !important; + font: bold 1.3em 'trebuchet MS', 'Lucida Sans', Arial; + -moz-border-radius-topright: 10px; + -moz-border-radius-topleft: 10px; + border-top-right-radius: 10px; + border-top-left-radius: 10px; + border-top: 1px solid #eaeaea !important; } .col-cellf { - font: bold 1.4em Georgia; - -moz-border-radius-bottomright: 10px; - -moz-border-radius-bottomleft: 10px; - border-bottom-right-radius: 10px; - border-bottom-left-radius: 10px; - border-bottom: 1px solid #dadada !important; + font: bold 1.4em Georgia; + -moz-border-radius-bottomright: 10px; + -moz-border-radius-bottomleft: 10px; + border-bottom-right-radius: 10px; + border-bottom-left-radius: 10px; + border-bottom: 1px solid #dadada !important; } diff --git a/.azure-pipelines/scripts/models/run_benchmark_common.sh b/.azure-pipelines/scripts/models/run_benchmark_common.sh index b342ac48992..d0f4ab72818 100644 --- a/.azure-pipelines/scripts/models/run_benchmark_common.sh +++ b/.azure-pipelines/scripts/models/run_benchmark_common.sh @@ -49,12 +49,12 @@ function multiInstance() { for((j=0;$j<${ncores_per_socket};j=$(($j + ${ncores_per_instance})))); do - end_core_num=$((j + ncores_per_instance -1)) - if [ ${end_core_num} -ge ${ncores_per_socket} ]; then - end_core_num=$((ncores_per_socket-1)) - fi - numactl -m 0 -C "${j}-${end_core_num}" ${cmd} 2>&1 | tee ${logFile}-${ncores_per_socket}-${ncores_per_instance}-${j}.log & - benchmark_pids+=($!) + end_core_num=$((j + ncores_per_instance -1)) + if [ ${end_core_num} -ge ${ncores_per_socket} ]; then + end_core_num=$((ncores_per_socket-1)) + fi + numactl -m 0 -C "${j}-${end_core_num}" ${cmd} 2>&1 | tee ${logFile}-${ncores_per_socket}-${ncores_per_instance}-${j}.log & + benchmark_pids+=($!) done status="SUCCESS" diff --git a/.azure-pipelines/scripts/models/run_model_trigger_common.sh b/.azure-pipelines/scripts/models/run_model_trigger_common.sh index 397ccade727..9eeadc8c4f0 100644 --- a/.azure-pipelines/scripts/models/run_model_trigger_common.sh +++ b/.azure-pipelines/scripts/models/run_model_trigger_common.sh @@ -34,8 +34,8 @@ do benchmark_cmd=`echo $i | sed "s/${PATTERN}//"`;; --tune_acc=*) tune_acc=`echo $i | sed "s/${PATTERN}//"`;; - --build_id=*) - build_id=`echo $i | sed "s/${PATTERN}//"`;; + --mode=*) + mode=`echo $i | sed "s/${PATTERN}//"`;; *) echo "Parameter $i not recognized."; exit 1;; esac @@ -48,109 +48,135 @@ echo "processing ${framework}-${fwk_ver}-${model}" echo "tuning_cmd is ${tuning_cmd}" echo "benchmark_cmd is ${benchmark_cmd}" -echo "======= creat log_dir =========" -if [ -d "${log_dir}/${model}" ]; then - echo "${log_dir}/${model} already exists, don't need to mkdir." -else - echo "no log dir ${log_dir}/${model}, create." - cd ${log_dir} - mkdir ${model} -fi - -echo "====== install requirements ======" -/bin/bash /neural-compressor/.azure-pipelines/scripts/install_nc.sh - -cd ${WORK_SOURCE_DIR}/${model_src_dir} -pip install ruamel_yaml -pip install psutil -pip install protobuf==3.20.1 -if [[ "${framework}" == "tensorflow" ]]; then - pip install intel-tensorflow==${fwk_ver} -elif [[ "${framework}" == "pytorch" ]]; then - pip install torch==${fwk_ver} -f https://download.pytorch.org/whl/torch_stable.html - pip install torchvision==${torch_vision_ver} -f https://download.pytorch.org/whl/torch_stable.html -elif [[ "${framework}" == "onnxrt" ]]; then - pip install onnx==1.11.0 - pip install onnxruntime==${fwk_ver} -elif [[ "${framework}" == "mxnet" ]]; then - if [[ "${fwk_ver}" == "1.7.0" ]]; then - pip install mxnet==${fwk_ver}.post2 - elif [[ "${fwk_ver}" == "1.6.0" ]]; then - pip install mxnet-mkl==${mxnet_version} - else - pip install mxnet==${fwk_ver} - fi -fi - -if [ -f "requirements.txt" ]; then - sed -i '/neural-compressor/d' requirements.txt - if [ "${framework}" == "onnxrt" ]; then - sed -i '/^onnx>=/d;/^onnx==/d;/^onnxruntime>=/d;/^onnxruntime==/d' requirements.txt +if [ "${mode}" == "env_setup" ]; then + echo "======= creat log_dir =========" + if [ -d "${log_dir}/${model}" ]; then + echo "${log_dir}/${model} already exists, don't need to mkdir." + else + echo "no log dir ${log_dir}/${model}, create." + cd ${log_dir} + mkdir ${model} fi - if [ "${framework}" == "tensorflow" ]; then - sed -i '/tensorflow==/d;/tensorflow$/d' requirements.txt - sed -i '/^intel-tensorflow/d' requirements.txt - fi - if [ "${framework}" == "mxnet" ]; then - sed -i '/mxnet==/d;/mxnet$/d;/mxnet-mkl==/d;/mxnet-mkl$/d' requirements.txt - fi - if [ "${framework}" == "pytorch" ]; then - sed -i '/torch==/d;/torch$/d;/torchvision==/d;/torchvision$/d' requirements.txt - fi - n=0 - until [ "$n" -ge 5 ] - do - python -m pip install -r requirements.txt && break - n=$((n+1)) - sleep 5 - done - pip list -else - echo "Not found requirements.txt file." -fi - - -echo "======== update yaml config ========" -echo -e "\nPrint origin yaml..." -cat ${yaml} -python ${SCRIPTS_PATH}/update_yaml_config.py --yaml=${yaml} --framework=${framework} \ ---dataset_location=${dataset_location} --batch_size=${batch_size} --strategy=${strategy} \ ---new_benchmark=${new_benchmark} --multi_instance='true' -echo -e "\nPrint updated yaml... " -cat ${yaml} - - -echo "======== run tuning ========" -/bin/bash ${SCRIPTS_PATH}/run_tuning_common.sh --framework=${framework} --model=${model} \ ---tuning_cmd="${tuning_cmd}" --log_dir="${log_dir}/${model}" --input_model=${input_model} --strategy=${strategy} \ -2>&1 | tee -a ${log_dir}/${model}/${framework}-${model}-tune.log + echo "====== install requirements ======" + /bin/bash /neural-compressor/.azure-pipelines/scripts/install_nc.sh + + cd ${WORK_SOURCE_DIR}/${model_src_dir} + pip install ruamel_yaml + pip install psutil + pip install protobuf==3.20.1 + if [[ "${framework}" == "tensorflow" ]]; then + pip install intel-tensorflow==${fwk_ver} + elif [[ "${framework}" == "pytorch" ]]; then + pip install torch==${fwk_ver} -f https://download.pytorch.org/whl/torch_stable.html + pip install torchvision==${torch_vision_ver} -f https://download.pytorch.org/whl/torch_stable.html + elif [[ "${framework}" == "onnxrt" ]]; then + pip install onnx==1.11.0 + pip install onnxruntime==${fwk_ver} + elif [[ "${framework}" == "mxnet" ]]; then + if [[ "${fwk_ver}" == "1.7.0" ]]; then + pip install mxnet==${fwk_ver}.post2 + elif [[ "${fwk_ver}" == "1.6.0" ]]; then + pip install mxnet-mkl==${mxnet_version} + else + pip install mxnet==${fwk_ver} + fi + fi -echo "====== run benchmark fp32 =======" -/bin/bash ${SCRIPTS_PATH}/run_benchmark_common.sh --framework=${framework} --model=${model} \ - --input_model=${input_model} --benchmark_cmd="${benchmark_cmd}" --tune_acc=${tune_acc} \ - --log_dir="${log_dir}/${model}" --new_benchmark=${new_benchmark} --precision="fp32" + if [ -f "requirements.txt" ]; then + sed -i '/neural-compressor/d' requirements.txt + if [ "${framework}" == "onnxrt" ]; then + sed -i '/^onnx>=/d;/^onnx==/d;/^onnxruntime>=/d;/^onnxruntime==/d' requirements.txt + fi + if [ "${framework}" == "tensorflow" ]; then + sed -i '/tensorflow==/d;/tensorflow$/d' requirements.txt + sed -i '/^intel-tensorflow/d' requirements.txt + fi + if [ "${framework}" == "mxnet" ]; then + sed -i '/mxnet==/d;/mxnet$/d;/mxnet-mkl==/d;/mxnet-mkl$/d' requirements.txt + fi + if [ "${framework}" == "pytorch" ]; then + sed -i '/torch==/d;/torch$/d;/torchvision==/d;/torchvision$/d' requirements.txt + fi + n=0 + until [ "$n" -ge 5 ]; do + python -m pip install -r requirements.txt && break + n=$((n + 1)) + sleep 5 + done + pip list + else + echo "Not found requirements.txt file." + fi + echo "======== update yaml config ========" + echo -e "\nPrint origin yaml..." + cat ${yaml} + python ${SCRIPTS_PATH}/update_yaml_config.py \ + --yaml=${yaml} \ + --framework=${framework} \ + --dataset_location=${dataset_location} \ + --batch_size=${batch_size} \ + --strategy=${strategy} \ + --new_benchmark=${new_benchmark} \ + --multi_instance='true' + echo -e "\nPrint updated yaml... " + cat ${yaml} +elif [ "${mode}" == "tuning" ]; then + cd ${WORK_SOURCE_DIR}/${model_src_dir} + echo "======== run tuning ========" + /bin/bash ${SCRIPTS_PATH}/run_tuning_common.sh \ + --framework=${framework} \ + --model=${model} \ + --tuning_cmd="${tuning_cmd}" \ + --log_dir="${log_dir}/${model}" \ + --input_model=${input_model} \ + --strategy=${strategy} \ + 2>&1 | tee -a ${log_dir}/${model}/${framework}-${model}-tune.log +elif [ "${mode}" == "int8_benchmark" ]; then + cd ${WORK_SOURCE_DIR}/${model_src_dir} + echo "====== run benchmark fp32 =======" + /bin/bash ${SCRIPTS_PATH}/run_benchmark_common.sh \ + --framework=${framework} \ + --model=${model} \ + --input_model=${input_model} \ + --benchmark_cmd="${benchmark_cmd}" \ + --tune_acc=${tune_acc} \ + --log_dir="${log_dir}/${model}" \ + --new_benchmark=${new_benchmark} \ + --precision="fp32" +elif [ "${mode}" == "fp32_benchmark" ]; then + cd ${WORK_SOURCE_DIR}/${model_src_dir} + echo "====== run benchmark int8 =======" + if [[ "${framework}" == "onnxrt" ]]; then + model_name="${log_dir}/${model}/${framework}-${model}-tune.onnx" + elif [[ "${framework}" == "mxnet" ]]; then + model_name="${log_dir}/${model}" + elif [[ "${framework}" == "tensorflow" ]]; then + model_name="${log_dir}/${model}/${framework}-${model}-tune.pb" + elif [[ "${framework}" == "pytorch" ]]; then + model_name=${input_model} + benchmark_cmd="${benchmark_cmd} --int8=true" + fi -echo "====== run benchmark int8 =======" -if [[ "${framework}" == "onnxrt" ]]; then - model_name="${log_dir}/${model}/${framework}-${model}-tune.onnx" -elif [[ "${framework}" == "mxnet" ]]; then - model_name="${log_dir}/${model}" -elif [[ "${framework}" == "tensorflow" ]]; then - model_name="${log_dir}/${model}/${framework}-${model}-tune.pb" -elif [[ "${framework}" == "pytorch" ]]; then - model_name=${input_model} - benchmark_cmd="${benchmark_cmd} --int8=true" + /bin/bash ${SCRIPTS_PATH}/run_benchmark_common.sh \ + --framework=${framework} \ + --model=${model} \ + --input_model="${model_name}" \ + --benchmark_cmd="${benchmark_cmd}" \ + --tune_acc=${tune_acc} \ + --log_dir="${log_dir}/${model}" \ + --new_benchmark=${new_benchmark} \ + --precision="int8" +elif [ "${mode}" == "collect_log" ]; then + cd ${WORK_SOURCE_DIR}/${model_src_dir} + echo "====== collect logs of model ${model} =======" + python -u ${SCRIPTS_PATH}/collect_log_model.py \ + --framework=${framework} \ + --fwk_ver=${fwk_ver} \ + --model=${model} \ + --logs_dir="${log_dir}/${model}" \ + --output_dir="${log_dir}/${model}" \ + --build_id=${BUILD_BUILDID} + echo "====== Finish model test =======" fi -/bin/bash ${SCRIPTS_PATH}/run_benchmark_common.sh --framework=${framework} --model=${model} \ - --input_model="${model_name}" --benchmark_cmd="${benchmark_cmd}" \ - --tune_acc=${tune_acc} --log_dir="${log_dir}/${model}" --new_benchmark=${new_benchmark} --precision="int8" - - -echo "====== collect logs of model ${model} =======" -python -u ${SCRIPTS_PATH}/collect_log_model.py --framework=${framework} --fwk_ver=${fwk_ver} --model=${model} \ ---logs_dir="${log_dir}/${model}" --output_dir="${log_dir}/${model}" --build_id=${build_id} - - -echo "====== Finish model test =======" \ No newline at end of file diff --git a/.azure-pipelines/scripts/models/run_mxnet_models_trigger.sh b/.azure-pipelines/scripts/models/run_mxnet_models_trigger.sh index 455ee809c61..45378c388d9 100644 --- a/.azure-pipelines/scripts/models/run_mxnet_models_trigger.sh +++ b/.azure-pipelines/scripts/models/run_mxnet_models_trigger.sh @@ -12,6 +12,8 @@ do tune_acc=`echo $i | sed "s/${PATTERN}//"`;; --build_id=*) build_id=`echo $i | sed "s/${PATTERN}//"`;; + --mode=*) + mode=`echo $i | sed "s/${PATTERN}//"`;; *) echo "Parameter $i not recognized."; exit 1;; esac @@ -35,7 +37,18 @@ if [ "${model}" == "resnet50v1" ]; then fi -/bin/bash run_model_trigger_common.sh --yaml=${yaml} --framework=${FRAMEWORK} --fwk_ver=${FRAMEWORK_VERSION} \ ---model=${model} --model_src_dir=${model_src_dir} --dataset_location=${dataset_location} \ ---input_model=${input_model} --batch_size=${batch_size} --strategy=${strategy} --new_benchmark=${new_benchmark} \ ---tuning_cmd="${tuning_cmd}" --benchmark_cmd="${benchmark_cmd}" --tune_acc=${tune_acc} --build_id=${build_id} +/bin/bash run_model_trigger_common.sh \ + --yaml=${yaml} \ + --framework=${FRAMEWORK} \ + --fwk_ver=${FRAMEWORK_VERSION} \ + --model=${model} \ + --model_src_dir=${model_src_dir} \ + --dataset_location=${dataset_location} \ + --input_model=${input_model} \ + --batch_size=${batch_size} \ + --strategy=${strategy} \ + --new_benchmark=${new_benchmark} \ + --tuning_cmd="${tuning_cmd}" \ + --benchmark_cmd="${benchmark_cmd}" \ + --tune_acc=${tune_acc} \ + --mode=${mode} diff --git a/.azure-pipelines/scripts/models/run_onnxrt_models_trigger.sh b/.azure-pipelines/scripts/models/run_onnxrt_models_trigger.sh index d71081f61b3..e718c68b568 100644 --- a/.azure-pipelines/scripts/models/run_onnxrt_models_trigger.sh +++ b/.azure-pipelines/scripts/models/run_onnxrt_models_trigger.sh @@ -12,6 +12,8 @@ do tune_acc=`echo $i | sed "s/${PATTERN}//"`;; --build_id=*) build_id=`echo $i | sed "s/${PATTERN}//"`;; + --mode=*) + mode=`echo $i | sed "s/${PATTERN}//"`;; *) echo "Parameter $i not recognized."; exit 1;; esac @@ -69,3 +71,20 @@ fi --model=${model} --model_src_dir=${model_src_dir} --dataset_location=${dataset_location} \ --input_model=${input_model} --batch_size=${batch_size} --strategy=${strategy} --new_benchmark=${new_benchmark} \ --tuning_cmd="${tuning_cmd}" --benchmark_cmd="${benchmark_cmd} --mode=performance" --tune_acc=${tune_acc} --build_id=${build_id} + + +/bin/bash run_model_trigger_common.sh \ + --yaml=${yaml} \ + --framework=${FRAMEWORK} \ + --fwk_ver=${FRAMEWORK_VERSION} \ + --model=${model} \ + --model_src_dir=${model_src_dir} \ + --dataset_location=${dataset_location} \ + --input_model=${input_model} \ + --batch_size=${batch_size} \ + --strategy=${strategy} \ + --new_benchmark=${new_benchmark} \ + --tuning_cmd="${tuning_cmd}" \ + --benchmark_cmd="${benchmark_cmd} --mode=performance" \ + --tune_acc=${tune_acc} \ + --mode=${mode} diff --git a/.azure-pipelines/scripts/models/run_pytorch_models_trigger.sh b/.azure-pipelines/scripts/models/run_pytorch_models_trigger.sh index 9aef437666d..89cfbc6bbc6 100644 --- a/.azure-pipelines/scripts/models/run_pytorch_models_trigger.sh +++ b/.azure-pipelines/scripts/models/run_pytorch_models_trigger.sh @@ -12,6 +12,8 @@ do tune_acc=`echo $i | sed "s/${PATTERN}//"`;; --build_id=*) build_id=`echo $i | sed "s/${PATTERN}//"`;; + --mode=*) + mode=`echo $i | sed "s/${PATTERN}//"`;; *) echo "Parameter $i not recognized."; exit 1;; esac @@ -46,8 +48,19 @@ elif [ "${model}" == "resnet18_fx" ]; then fi -/bin/bash run_model_trigger_common.sh --yaml=${yaml} --framework=${FRAMEWORK} --fwk_ver=${FRAMEWORK_VERSION} \ ---torch_vision_ver=${TORCH_VISION_VERSION} --model=${model} --model_src_dir=${model_src_dir} \ ---dataset_location=${dataset_location} --input_model=${input_model} --batch_size=${batch_size} --strategy=${strategy} \ ---new_benchmark=${new_benchmark} --tuning_cmd="${tuning_cmd}" --benchmark_cmd="${benchmark_cmd}" \ ---tune_acc=${tune_acc} --build_id=${build_id} +/bin/bash run_model_trigger_common.sh \ + --yaml=${yaml} \ + --framework=${FRAMEWORK} \ + --fwk_ver=${FRAMEWORK_VERSION} \ + --torch_vision_ver=${TORCH_VISION_VERSION} \ + --model=${model} \ + --model_src_dir=${model_src_dir} \ + --dataset_location=${dataset_location} \ + --input_model=${input_model} \ + --batch_size=${batch_size} \ + --strategy=${strategy} \ + --new_benchmark=${new_benchmark} \ + --tuning_cmd="${tuning_cmd}" \ + --benchmark_cmd="${benchmark_cmd}" \ + --tune_acc=${tune_acc} \ + --mode=${mode} diff --git a/.azure-pipelines/scripts/models/run_tensorflow_models_trigger.sh b/.azure-pipelines/scripts/models/run_tensorflow_models_trigger.sh index 1b3a6c6815d..eec10675c74 100644 --- a/.azure-pipelines/scripts/models/run_tensorflow_models_trigger.sh +++ b/.azure-pipelines/scripts/models/run_tensorflow_models_trigger.sh @@ -12,6 +12,8 @@ do tune_acc=`echo $i | sed "s/${PATTERN}//"`;; --build_id=*) build_id=`echo $i | sed "s/${PATTERN}//"`;; + --mode=*) + mode=`echo $i | sed "s/${PATTERN}//"`;; *) echo "Parameter $i not recognized."; exit 1;; esac @@ -104,7 +106,18 @@ elif [ "${model}" == "resnet50_fashion" ]; then fi -/bin/bash run_model_trigger_common.sh --yaml=${yaml} --framework=${FRAMEWORK} --fwk_ver=${FRAMEWORK_VERSION} \ ---model=${model} --model_src_dir=${model_src_dir} --dataset_location=${dataset_location} \ ---input_model=${input_model} --batch_size=${batch_size} --strategy=${strategy} --new_benchmark=${new_benchmark} \ ---tuning_cmd="${tuning_cmd}" --benchmark_cmd="${benchmark_cmd}" --tune_acc=${tune_acc} --build_id=${build_id} +/bin/bash run_model_trigger_common.sh \ + --yaml=${yaml} \ + --framework=${FRAMEWORK} \ + --fwk_ver=${FRAMEWORK_VERSION} \ + --model=${model} \ + --model_src_dir=${model_src_dir} \ + --dataset_location=${dataset_location} \ + --input_model=${input_model} \ + --batch_size=${batch_size} \ + --strategy=${strategy} \ + --new_benchmark=${new_benchmark} \ + --tuning_cmd="${tuning_cmd}" \ + --benchmark_cmd="${benchmark_cmd}" \ + --tune_acc=${tune_acc} \ + --mode=${mode} diff --git a/.azure-pipelines/template/model-template.yml b/.azure-pipelines/template/model-template.yml index 1bc9dd4a7bf..f6dfcebfca6 100644 --- a/.azure-pipelines/template/model-template.yml +++ b/.azure-pipelines/template/model-template.yml @@ -13,16 +13,49 @@ parameters: steps: - template: docker-template.yml parameters: - dockerConfigName: 'commonDockerConfig' - repoName: 'neural-compressor' - repoTag: 'py38' - dockerFileName: 'Dockerfile' + dockerConfigName: "commonDockerConfig" + repoName: "neural-compressor" + repoTag: "py38" + dockerFileName: "Dockerfile" containerName: ${{ parameters.modelContainerName }} - script: | docker exec ${{ parameters.modelContainerName }} bash -c "cd /neural-compressor/.azure-pipelines/scripts/models \ - && bash run_${{ parameters.framework }}_models_trigger.sh --model=${{ parameters.modelName }} --tune_acc=true --build_id=$(Build.BuildId)" - displayName: Tune&Benchmark ${{ parameters.modelName }} + && bash run_${{ parameters.framework }}_models_trigger.sh --model=${{ parameters.modelName }} --mode='env_setup'" + displayName: Env setup + + - task: DownloadPipelineArtifact@2 + inputs: + source: "specific" + artifact: "FinalReport" + patterns: "**.log" + path: $(Build.SourcesDirectory)/.azure-pipelines/scripts/models/refer_log + project: $(System.TeamProject) + pipeline: "Baseline" + runVersion: "specific" + runId: $(model_runID) + retryDownloadCount: 3 + displayName: "Download refer logs" + + - script: | + docker exec ${{ parameters.modelContainerName }} bash -c "cd /neural-compressor/.azure-pipelines/scripts/models \ + && bash run_${{ parameters.framework }}_models_trigger.sh --model=${{ parameters.modelName }} --mode='tuning'" + displayName: Tuning + + - script: | + docker exec ${{ parameters.modelContainerName }} bash -c "cd /neural-compressor/.azure-pipelines/scripts/models \ + && bash run_${{ parameters.framework }}_models_trigger.sh --model=${{ parameters.modelName }} --tune_acc=true --mode='int8_benchmark'" + displayName: INT8 Benchmark + + - script: | + docker exec ${{ parameters.modelContainerName }} bash -c "cd /neural-compressor/.azure-pipelines/scripts/models \ + && bash run_${{ parameters.framework }}_models_trigger.sh --model=${{ parameters.modelName }} --tune_acc=true --mode='fp32_benchmark'" + displayName: FP32 Benchmark + + - script: | + docker exec ${{ parameters.modelContainerName }} bash -c "cd /neural-compressor/.azure-pipelines/scripts/models \ + && bash run_${{ parameters.framework }}_models_trigger.sh --model=${{ parameters.modelName }} --tune_acc=true --mode='collect_log'" + displayName: Collect log - task: PublishPipelineArtifact@1 inputs: From aee87a37ec1c4c0753f1f0790613a83b84e71dac Mon Sep 17 00:00:00 2001 From: "Sun, Xuehao" Date: Tue, 1 Nov 2022 17:22:56 +0800 Subject: [PATCH 03/34] collect json log --- .../scripts/models/collect_log_all.py | 4 +- .../scripts/models/collect_log_model.py | 97 ++++++++++++++++++- .azure-pipelines/template/model-template.yml | 1 + 3 files changed, 94 insertions(+), 8 deletions(-) diff --git a/.azure-pipelines/scripts/models/collect_log_all.py b/.azure-pipelines/scripts/models/collect_log_all.py index 61fe9454c2e..fb9db0d6721 100644 --- a/.azure-pipelines/scripts/models/collect_log_all.py +++ b/.azure-pipelines/scripts/models/collect_log_all.py @@ -1,7 +1,5 @@ -import re -import os -import platform import argparse +import os parser = argparse.ArgumentParser(allow_abbrev=False) parser.add_argument("--logs_dir", type=str, default=".") diff --git a/.azure-pipelines/scripts/models/collect_log_model.py b/.azure-pipelines/scripts/models/collect_log_model.py index 37d305423bf..fcaea56d761 100644 --- a/.azure-pipelines/scripts/models/collect_log_model.py +++ b/.azure-pipelines/scripts/models/collect_log_model.py @@ -1,8 +1,6 @@ -import re -import os -import platform import argparse - +import os +import re parser = argparse.ArgumentParser(allow_abbrev=False) parser.add_argument("--framework", type=str, required=True) @@ -21,10 +19,98 @@ print(args) +def get_model_tuning_results(): + tuning_result_dict = {} + + if os.path.exists(tuning_log): + print('tuning log found') + tmp = {'fp32_acc': 0, 'int8_acc': 0, 'tuning_trials': 0} + with open(tuning_log, "r") as f: + for line in f: + parse_tuning_line(line, tmp) + print(tmp) + # set model status failed + if tmp['fp32_acc'] == 0 or tmp['int8_acc'] == 0: + os.system('echo "##vso[task.setvariable variable=' + args.framework + '_' + args.model + '_failed]true"') + + tuning_result_dict = { + "OS": OS, + "Platform": PLATFORM, + "Framework": args.framework, + "Version": args.fwk_ver, + "Model": args.model, + "Strategy": tmp['strategy'], + "Tune_time": tmp['tune_time'], + } + benchmark_accuracy_result_dict = { + 'int8': { + "OS": OS, + "Platform": PLATFORM, + "Framework": args.framework, + "Version": args.fwk_ver, + "Model": args.model, + "Mode": "Inference", + "Type": "Accuracy", + "BS": 1, + "Value": tmp['int8_acc'], + "Url": URL, + }, + 'fp32': { + "OS": OS, + "Platform": PLATFORM, + "Framework": args.framework, + "Version": args.fwk_ver, + "Model": args.model, + "Mode": "Inference", + "Type": "Accuracy", + "BS": 1, + "Value": tmp['fp32_acc'], + "Url": URL, + } + } + + return tuning_result_dict, benchmark_accuracy_result_dict + + +def get_model_benchmark_results(): + benchmark_performance_result_dict = {'int8': {}, 'fp32': {}} + for precision in ['int8', 'fp32']: + throughput = 0.0 + bs = 1 + for root, dirs, files in os.walk(args.logs_dir): + for name in files: + file_name = os.path.join(root, name) + print(file_name) + if 'performance-' + precision in name: + for line in open(file_name, "r"): + result= parse_perf_line(line) + if result.get("throughput"): + throughput += result.get("throughput") + if result.get("batch_size"): + bs = result.get("batch_size") + + # set model status failed + if throughput==0.0: + os.system('echo "##vso[task.setvariable variable='+args.framework+'_'+args.model+'_failed]true"') + benchmark_performance_result_dict[precision] = { + "OS": OS, + "Platform": PLATFORM, + "Framework": args.framework, + "Version": args.fwk_ver, + "Model": args.model, + "Mode": "Inference", + "Type": "Performance", + "BS": 1, + "Value":throughput, + "Url":URL, + } + + return benchmark_performance_result_dict + + def main(): results = [] tuning_infos = [] - tuning_log = os.path.join(args.logs_dir, f"{args.framework}-{args.model}-tune.log") print("tuning log dir is {}".format(tuning_log)) # get model tuning results if os.path.exists(tuning_log): @@ -124,4 +210,5 @@ def parse_perf_line(line) -> float: if __name__ == '__main__': + tuning_log = os.path.join(args.logs_dir, f"{args.framework}-{args.model}-tune.log") main() diff --git a/.azure-pipelines/template/model-template.yml b/.azure-pipelines/template/model-template.yml index f6dfcebfca6..6f3a59efc9e 100644 --- a/.azure-pipelines/template/model-template.yml +++ b/.azure-pipelines/template/model-template.yml @@ -58,6 +58,7 @@ steps: displayName: Collect log - task: PublishPipelineArtifact@1 + condition: always() inputs: targetPath: $(Build.SourcesDirectory)/.azure-pipelines/scripts/models/${{ parameters.modelName }}/ artifact: ${{ parameters.framework }}_${{ parameters.modelName }} From 7624f102b98b8e2168d757bf663c5eaf8d51c79e Mon Sep 17 00:00:00 2001 From: "Sun, Xuehao" Date: Wed, 2 Nov 2022 09:32:53 +0800 Subject: [PATCH 04/34] test --- .azure-pipelines/scripts/models/run_tuning_common.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.azure-pipelines/scripts/models/run_tuning_common.sh b/.azure-pipelines/scripts/models/run_tuning_common.sh index 26b223bf994..341ab131c1d 100644 --- a/.azure-pipelines/scripts/models/run_tuning_common.sh +++ b/.azure-pipelines/scripts/models/run_tuning_common.sh @@ -48,3 +48,5 @@ echo "Tuning time spend: "$((end_seconds-start_seconds))"s " echo "Tuning strategy: ${strategy}" echo "Total resident size (kbytes): $(cat /proc/meminfo |grep 'MemTotal' |sed 's/[^0-9]//g')" + +exit 1 \ No newline at end of file From 3330f28e5194f89e13de123f0fd89c1356fcd9f8 Mon Sep 17 00:00:00 2001 From: "Sun, Xuehao" Date: Wed, 2 Nov 2022 09:57:27 +0800 Subject: [PATCH 05/34] test --- .azure-pipelines/model-test.yml | 18 +++++++++--------- .../scripts/models/run_tuning_common.sh | 1 + 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/.azure-pipelines/model-test.yml b/.azure-pipelines/model-test.yml index 93550fa65e6..0b261cb12ef 100644 --- a/.azure-pipelines/model-test.yml +++ b/.azure-pipelines/model-test.yml @@ -30,23 +30,23 @@ parameters: - name: TensorFlowModelList type: object default: - - resnet50v1.5 - - ssd_resnet50_v1 - - ssd_mobilenet_v1_ckpt - - inception_v1 - - resnet50_fashion + # - resnet50v1.5 + # - ssd_resnet50_v1 + # - ssd_mobilenet_v1_ckpt + # - inception_v1 + # - resnet50_fashion - darknet19 - - densenet-121 - - resnet-101 + # - densenet-121 + # - resnet-101 - name: PyTorchModelList type: object default: - resnet18 - - resnet18_fx + # - resnet18_fx - name: ONNXModelList type: object default: - - resnet50-v1-12 + # - resnet50-v1-12 - bert_base_MRPC_dynamic - name: MXNetModelList type: object diff --git a/.azure-pipelines/scripts/models/run_tuning_common.sh b/.azure-pipelines/scripts/models/run_tuning_common.sh index 341ab131c1d..e4116731156 100644 --- a/.azure-pipelines/scripts/models/run_tuning_common.sh +++ b/.azure-pipelines/scripts/models/run_tuning_common.sh @@ -1,5 +1,6 @@ #!/bin/bash set -x +set -eo pipefail # get parameters PATTERN='[-a-zA-Z0-9_]*=' From 18bc530327dcbd15a752251439ec6c2276f1238d Mon Sep 17 00:00:00 2001 From: "Sun, Xuehao" Date: Wed, 2 Nov 2022 10:01:14 +0800 Subject: [PATCH 06/34] test --- .azure-pipelines/scripts/models/run_tuning_common.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/.azure-pipelines/scripts/models/run_tuning_common.sh b/.azure-pipelines/scripts/models/run_tuning_common.sh index e4116731156..f1d739d03f9 100644 --- a/.azure-pipelines/scripts/models/run_tuning_common.sh +++ b/.azure-pipelines/scripts/models/run_tuning_common.sh @@ -1,5 +1,4 @@ #!/bin/bash -set -x set -eo pipefail # get parameters From ece98bddb9906f8389c7726f34de0c90fee0e4c7 Mon Sep 17 00:00:00 2001 From: "Sun, Xuehao" Date: Wed, 2 Nov 2022 10:05:55 +0800 Subject: [PATCH 07/34] test --- .azure-pipelines/scripts/models/run_model_trigger_common.sh | 2 +- .azure-pipelines/scripts/models/run_mxnet_models_trigger.sh | 2 +- .azure-pipelines/scripts/models/run_onnxrt_models_trigger.sh | 2 +- .azure-pipelines/scripts/models/run_pytorch_models_trigger.sh | 2 +- .../scripts/models/run_tensorflow_models_trigger.sh | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.azure-pipelines/scripts/models/run_model_trigger_common.sh b/.azure-pipelines/scripts/models/run_model_trigger_common.sh index 9eeadc8c4f0..a21f66ee6e8 100644 --- a/.azure-pipelines/scripts/models/run_model_trigger_common.sh +++ b/.azure-pipelines/scripts/models/run_model_trigger_common.sh @@ -1,5 +1,5 @@ #!/bin/bash - +set -eo pipefail # get parameters PATTERN='[-a-zA-Z0-9_]*=' diff --git a/.azure-pipelines/scripts/models/run_mxnet_models_trigger.sh b/.azure-pipelines/scripts/models/run_mxnet_models_trigger.sh index 45378c388d9..381ecfef28b 100644 --- a/.azure-pipelines/scripts/models/run_mxnet_models_trigger.sh +++ b/.azure-pipelines/scripts/models/run_mxnet_models_trigger.sh @@ -1,5 +1,5 @@ #!/bin/bash - +set -eo pipefail # get parameters PATTERN='[-a-zA-Z0-9_]*=' diff --git a/.azure-pipelines/scripts/models/run_onnxrt_models_trigger.sh b/.azure-pipelines/scripts/models/run_onnxrt_models_trigger.sh index e718c68b568..ae90a47cc3c 100644 --- a/.azure-pipelines/scripts/models/run_onnxrt_models_trigger.sh +++ b/.azure-pipelines/scripts/models/run_onnxrt_models_trigger.sh @@ -1,5 +1,5 @@ #!/bin/bash - +set -eo pipefail # get parameters PATTERN='[-a-zA-Z0-9_]*=' diff --git a/.azure-pipelines/scripts/models/run_pytorch_models_trigger.sh b/.azure-pipelines/scripts/models/run_pytorch_models_trigger.sh index 89cfbc6bbc6..88f45362791 100644 --- a/.azure-pipelines/scripts/models/run_pytorch_models_trigger.sh +++ b/.azure-pipelines/scripts/models/run_pytorch_models_trigger.sh @@ -1,5 +1,5 @@ #!/bin/bash - +set -eo pipefail # get parameters PATTERN='[-a-zA-Z0-9_]*=' diff --git a/.azure-pipelines/scripts/models/run_tensorflow_models_trigger.sh b/.azure-pipelines/scripts/models/run_tensorflow_models_trigger.sh index eec10675c74..1cbf8c872d7 100644 --- a/.azure-pipelines/scripts/models/run_tensorflow_models_trigger.sh +++ b/.azure-pipelines/scripts/models/run_tensorflow_models_trigger.sh @@ -1,5 +1,5 @@ #!/bin/bash - +set -eo pipefail # get parameters PATTERN='[-a-zA-Z0-9_]*=' From 87be0ec8f9dbce7136986b3bd59cb58ee94f106b Mon Sep 17 00:00:00 2001 From: "Sun, Xuehao" Date: Wed, 2 Nov 2022 10:21:02 +0800 Subject: [PATCH 08/34] update --- .azure-pipelines/scripts/models/env_setup.sh | 130 ++++++++++++++++++ .../models/run_model_trigger_common.sh | 74 +--------- .../models/run_mxnet_models_trigger.sh | 2 - .../models/run_onnxrt_models_trigger.sh | 8 -- .../models/run_pytorch_models_trigger.sh | 2 - 5 files changed, 136 insertions(+), 80 deletions(-) create mode 100644 .azure-pipelines/scripts/models/env_setup.sh diff --git a/.azure-pipelines/scripts/models/env_setup.sh b/.azure-pipelines/scripts/models/env_setup.sh new file mode 100644 index 00000000000..bfc518eb3eb --- /dev/null +++ b/.azure-pipelines/scripts/models/env_setup.sh @@ -0,0 +1,130 @@ +#!/bin/bash +set -eo pipefail +# get parameters +PATTERN='[-a-zA-Z0-9_]*=' + +for i in "$@"; do + case $i in + --yaml=*) + yaml=$(echo $i | sed "s/${PATTERN}//") + ;; + --framework=*) + framework=$(echo $i | sed "s/${PATTERN}//") + ;; + --fwk_ver=*) + fwk_ver=$(echo $i | sed "s/${PATTERN}//") + ;; + --torch_vision_ver=*) + torch_vision_ver=$(echo $i | sed "s/${PATTERN}//") + ;; + --model=*) + model=$(echo $i | sed "s/${PATTERN}//") + ;; + --model_src_dir=*) + model_src_dir=$(echo $i | sed "s/${PATTERN}//") + ;; + --dataset_location=*) + dataset_location=$(echo $i | sed "s/${PATTERN}//") + ;; + --batch_size=*) + batch_size=$(echo $i | sed "s/${PATTERN}//") + ;; + --strategy=*) + strategy=$(echo $i | sed "s/${PATTERN}//") + ;; + --new_benchmark=*) + new_benchmark=$(echo $i | sed "s/${PATTERN}//") + ;; + *) + echo "Parameter $i not recognized." + exit 1 + ;; + esac +done + +SCRIPTS_PATH="/neural-compressor/.azure-pipelines/scripts/models" +log_dir="/neural-compressor/.azure-pipelines/scripts/models" +WORK_SOURCE_DIR="/neural-compressor/examples/${framework}" +echo "processing ${framework}-${fwk_ver}-${model}" + +echo "======= creat log_dir =========" +if [ -d "${log_dir}/${model}" ]; then + echo "${log_dir}/${model} already exists, don't need to mkdir." +else + echo "no log dir ${log_dir}/${model}, create." + cd ${log_dir} + mkdir ${model} +fi + +if [ -d "${log_dir}/refer_log" ]; then + echo "${log_dir}/refer_log already exists, don't need to mkdir." +else + echo "no log dir ${log_dir}/refer_log, create." + cd ${log_dir} + mkdir refer_log +fi + +echo "====== install requirements ======" +/bin/bash /neural-compressor/.azure-pipelines/scripts/install_nc.sh + +cd ${WORK_SOURCE_DIR}/${model_src_dir} +pip install ruamel_yaml +pip install psutil +pip install protobuf==3.20.1 +if [[ "${framework}" == "tensorflow" ]]; then + pip install intel-tensorflow==${fwk_ver} +elif [[ "${framework}" == "pytorch" ]]; then + pip install torch==${fwk_ver} -f https://download.pytorch.org/whl/torch_stable.html + pip install torchvision==${torch_vision_ver} -f https://download.pytorch.org/whl/torch_stable.html +elif [[ "${framework}" == "onnxrt" ]]; then + pip install onnx==1.11.0 + pip install onnxruntime==${fwk_ver} +elif [[ "${framework}" == "mxnet" ]]; then + if [[ "${fwk_ver}" == "1.7.0" ]]; then + pip install mxnet==${fwk_ver}.post2 + elif [[ "${fwk_ver}" == "1.6.0" ]]; then + pip install mxnet-mkl==${mxnet_version} + else + pip install mxnet==${fwk_ver} + fi +fi + +if [ -f "requirements.txt" ]; then + sed -i '/neural-compressor/d' requirements.txt + if [ "${framework}" == "onnxrt" ]; then + sed -i '/^onnx>=/d;/^onnx==/d;/^onnxruntime>=/d;/^onnxruntime==/d' requirements.txt + fi + if [ "${framework}" == "tensorflow" ]; then + sed -i '/tensorflow==/d;/tensorflow$/d' requirements.txt + sed -i '/^intel-tensorflow/d' requirements.txt + fi + if [ "${framework}" == "mxnet" ]; then + sed -i '/mxnet==/d;/mxnet$/d;/mxnet-mkl==/d;/mxnet-mkl$/d' requirements.txt + fi + if [ "${framework}" == "pytorch" ]; then + sed -i '/torch==/d;/torch$/d;/torchvision==/d;/torchvision$/d' requirements.txt + fi + n=0 + until [ "$n" -ge 5 ]; do + python -m pip install -r requirements.txt && break + n=$((n + 1)) + sleep 5 + done + pip list +else + echo "Not found requirements.txt file." +fi + +echo "======== update yaml config ========" +echo -e "\nPrint origin yaml..." +cat ${yaml} +python ${SCRIPTS_PATH}/update_yaml_config.py \ + --yaml=${yaml} \ + --framework=${framework} \ + --dataset_location=${dataset_location} \ + --batch_size=${batch_size} \ + --strategy=${strategy} \ + --new_benchmark=${new_benchmark} \ + --multi_instance='true' +echo -e "\nPrint updated yaml... " +cat ${yaml} diff --git a/.azure-pipelines/scripts/models/run_model_trigger_common.sh b/.azure-pipelines/scripts/models/run_model_trigger_common.sh index a21f66ee6e8..9fdbef5cc00 100644 --- a/.azure-pipelines/scripts/models/run_model_trigger_common.sh +++ b/.azure-pipelines/scripts/models/run_model_trigger_common.sh @@ -49,79 +49,17 @@ echo "tuning_cmd is ${tuning_cmd}" echo "benchmark_cmd is ${benchmark_cmd}" if [ "${mode}" == "env_setup" ]; then - echo "======= creat log_dir =========" - if [ -d "${log_dir}/${model}" ]; then - echo "${log_dir}/${model} already exists, don't need to mkdir." - else - echo "no log dir ${log_dir}/${model}, create." - cd ${log_dir} - mkdir ${model} - fi - - echo "====== install requirements ======" - /bin/bash /neural-compressor/.azure-pipelines/scripts/install_nc.sh - - cd ${WORK_SOURCE_DIR}/${model_src_dir} - pip install ruamel_yaml - pip install psutil - pip install protobuf==3.20.1 - if [[ "${framework}" == "tensorflow" ]]; then - pip install intel-tensorflow==${fwk_ver} - elif [[ "${framework}" == "pytorch" ]]; then - pip install torch==${fwk_ver} -f https://download.pytorch.org/whl/torch_stable.html - pip install torchvision==${torch_vision_ver} -f https://download.pytorch.org/whl/torch_stable.html - elif [[ "${framework}" == "onnxrt" ]]; then - pip install onnx==1.11.0 - pip install onnxruntime==${fwk_ver} - elif [[ "${framework}" == "mxnet" ]]; then - if [[ "${fwk_ver}" == "1.7.0" ]]; then - pip install mxnet==${fwk_ver}.post2 - elif [[ "${fwk_ver}" == "1.6.0" ]]; then - pip install mxnet-mkl==${mxnet_version} - else - pip install mxnet==${fwk_ver} - fi - fi - - if [ -f "requirements.txt" ]; then - sed -i '/neural-compressor/d' requirements.txt - if [ "${framework}" == "onnxrt" ]; then - sed -i '/^onnx>=/d;/^onnx==/d;/^onnxruntime>=/d;/^onnxruntime==/d' requirements.txt - fi - if [ "${framework}" == "tensorflow" ]; then - sed -i '/tensorflow==/d;/tensorflow$/d' requirements.txt - sed -i '/^intel-tensorflow/d' requirements.txt - fi - if [ "${framework}" == "mxnet" ]; then - sed -i '/mxnet==/d;/mxnet$/d;/mxnet-mkl==/d;/mxnet-mkl$/d' requirements.txt - fi - if [ "${framework}" == "pytorch" ]; then - sed -i '/torch==/d;/torch$/d;/torchvision==/d;/torchvision$/d' requirements.txt - fi - n=0 - until [ "$n" -ge 5 ]; do - python -m pip install -r requirements.txt && break - n=$((n + 1)) - sleep 5 - done - pip list - else - echo "Not found requirements.txt file." - fi - - echo "======== update yaml config ========" - echo -e "\nPrint origin yaml..." - cat ${yaml} - python ${SCRIPTS_PATH}/update_yaml_config.py \ + /bin/bash env_setup.sh \ --yaml=${yaml} \ --framework=${framework} \ + --fwk_ver=${fwk_ver} \ + --torch_vision_ver=${torch_vision_ver} \ + --model=${model} \ + --model_src_dir=${model_src_dir} \ --dataset_location=${dataset_location} \ --batch_size=${batch_size} \ --strategy=${strategy} \ - --new_benchmark=${new_benchmark} \ - --multi_instance='true' - echo -e "\nPrint updated yaml... " - cat ${yaml} + --new_benchmark=${new_benchmark} elif [ "${mode}" == "tuning" ]; then cd ${WORK_SOURCE_DIR}/${model_src_dir} echo "======== run tuning ========" diff --git a/.azure-pipelines/scripts/models/run_mxnet_models_trigger.sh b/.azure-pipelines/scripts/models/run_mxnet_models_trigger.sh index 381ecfef28b..8304b5da40e 100644 --- a/.azure-pipelines/scripts/models/run_mxnet_models_trigger.sh +++ b/.azure-pipelines/scripts/models/run_mxnet_models_trigger.sh @@ -10,8 +10,6 @@ do model=`echo $i | sed "s/${PATTERN}//"`;; --tune_acc=*) tune_acc=`echo $i | sed "s/${PATTERN}//"`;; - --build_id=*) - build_id=`echo $i | sed "s/${PATTERN}//"`;; --mode=*) mode=`echo $i | sed "s/${PATTERN}//"`;; *) diff --git a/.azure-pipelines/scripts/models/run_onnxrt_models_trigger.sh b/.azure-pipelines/scripts/models/run_onnxrt_models_trigger.sh index ae90a47cc3c..a5a7a2378ca 100644 --- a/.azure-pipelines/scripts/models/run_onnxrt_models_trigger.sh +++ b/.azure-pipelines/scripts/models/run_onnxrt_models_trigger.sh @@ -10,8 +10,6 @@ do model=`echo $i | sed "s/${PATTERN}//"`;; --tune_acc=*) tune_acc=`echo $i | sed "s/${PATTERN}//"`;; - --build_id=*) - build_id=`echo $i | sed "s/${PATTERN}//"`;; --mode=*) mode=`echo $i | sed "s/${PATTERN}//"`;; *) @@ -67,12 +65,6 @@ elif [ "${model}" == "distilbert_base_MRPC_qdq" ]; then fi -/bin/bash run_model_trigger_common.sh --yaml=${yaml} --framework=${FRAMEWORK} --fwk_ver=${FRAMEWORK_VERSION} \ ---model=${model} --model_src_dir=${model_src_dir} --dataset_location=${dataset_location} \ ---input_model=${input_model} --batch_size=${batch_size} --strategy=${strategy} --new_benchmark=${new_benchmark} \ ---tuning_cmd="${tuning_cmd}" --benchmark_cmd="${benchmark_cmd} --mode=performance" --tune_acc=${tune_acc} --build_id=${build_id} - - /bin/bash run_model_trigger_common.sh \ --yaml=${yaml} \ --framework=${FRAMEWORK} \ diff --git a/.azure-pipelines/scripts/models/run_pytorch_models_trigger.sh b/.azure-pipelines/scripts/models/run_pytorch_models_trigger.sh index 88f45362791..d47d137a841 100644 --- a/.azure-pipelines/scripts/models/run_pytorch_models_trigger.sh +++ b/.azure-pipelines/scripts/models/run_pytorch_models_trigger.sh @@ -10,8 +10,6 @@ do model=`echo $i | sed "s/${PATTERN}//"`;; --tune_acc=*) tune_acc=`echo $i | sed "s/${PATTERN}//"`;; - --build_id=*) - build_id=`echo $i | sed "s/${PATTERN}//"`;; --mode=*) mode=`echo $i | sed "s/${PATTERN}//"`;; *) From c9c94ee6df1d5fa5f788c3710883ccb60850eeea Mon Sep 17 00:00:00 2001 From: "Sun, Xuehao" Date: Wed, 2 Nov 2022 10:28:54 +0800 Subject: [PATCH 09/34] update --- .azure-pipelines/scripts/models/env_setup.sh | 8 -------- 1 file changed, 8 deletions(-) diff --git a/.azure-pipelines/scripts/models/env_setup.sh b/.azure-pipelines/scripts/models/env_setup.sh index bfc518eb3eb..06296ca0fd3 100644 --- a/.azure-pipelines/scripts/models/env_setup.sh +++ b/.azure-pipelines/scripts/models/env_setup.sh @@ -56,14 +56,6 @@ else mkdir ${model} fi -if [ -d "${log_dir}/refer_log" ]; then - echo "${log_dir}/refer_log already exists, don't need to mkdir." -else - echo "no log dir ${log_dir}/refer_log, create." - cd ${log_dir} - mkdir refer_log -fi - echo "====== install requirements ======" /bin/bash /neural-compressor/.azure-pipelines/scripts/install_nc.sh From a4c1b5bd45946e202c666cd30b786d27795811ba Mon Sep 17 00:00:00 2001 From: "Sun, Xuehao" Date: Wed, 2 Nov 2022 11:24:54 +0800 Subject: [PATCH 10/34] change color --- .azure-pipelines/model-test.yml | 4 +- .../models/run_model_trigger_common.sh | 17 +++++---- .../models/run_tensorflow_models_trigger.sh | 2 - .../scripts/models/run_tuning_common.sh | 37 ++++++++++++------- .../scripts/models/update_yaml_config.py | 10 +---- .azure-pipelines/template/model-template.yml | 10 +++-- 6 files changed, 43 insertions(+), 37 deletions(-) diff --git a/.azure-pipelines/model-test.yml b/.azure-pipelines/model-test.yml index 0b261cb12ef..8fcb36c3fd4 100644 --- a/.azure-pipelines/model-test.yml +++ b/.azure-pipelines/model-test.yml @@ -46,8 +46,8 @@ parameters: - name: ONNXModelList type: object default: - # - resnet50-v1-12 - - bert_base_MRPC_dynamic + - resnet50-v1-12 + # - bert_base_MRPC_dynamic - name: MXNetModelList type: object default: diff --git a/.azure-pipelines/scripts/models/run_model_trigger_common.sh b/.azure-pipelines/scripts/models/run_model_trigger_common.sh index 9fdbef5cc00..6bc427e25a9 100644 --- a/.azure-pipelines/scripts/models/run_model_trigger_common.sh +++ b/.azure-pipelines/scripts/models/run_model_trigger_common.sh @@ -1,5 +1,6 @@ #!/bin/bash set -eo pipefail +source /neural-compressor/.azure-pipelines/scripts/change_color.sh # get parameters PATTERN='[-a-zA-Z0-9_]*=' @@ -44,9 +45,9 @@ done log_dir="/neural-compressor/.azure-pipelines/scripts/models" WORK_SOURCE_DIR="/neural-compressor/examples/${framework}" SCRIPTS_PATH="/neural-compressor/.azure-pipelines/scripts/models" -echo "processing ${framework}-${fwk_ver}-${model}" -echo "tuning_cmd is ${tuning_cmd}" -echo "benchmark_cmd is ${benchmark_cmd}" +$BOLD_YELLOW && echo "processing ${framework}-${fwk_ver}-${model}" && $RESET +$BOLD_YELLOW && echo "tuning_cmd is ${tuning_cmd}" && $RESET +$BOLD_YELLOW && echo "benchmark_cmd is ${benchmark_cmd}" && $RESET if [ "${mode}" == "env_setup" ]; then /bin/bash env_setup.sh \ @@ -62,7 +63,7 @@ if [ "${mode}" == "env_setup" ]; then --new_benchmark=${new_benchmark} elif [ "${mode}" == "tuning" ]; then cd ${WORK_SOURCE_DIR}/${model_src_dir} - echo "======== run tuning ========" + $BOLD_YELLOW && echo "======== run tuning ========" && $RESET /bin/bash ${SCRIPTS_PATH}/run_tuning_common.sh \ --framework=${framework} \ --model=${model} \ @@ -73,7 +74,7 @@ elif [ "${mode}" == "tuning" ]; then 2>&1 | tee -a ${log_dir}/${model}/${framework}-${model}-tune.log elif [ "${mode}" == "int8_benchmark" ]; then cd ${WORK_SOURCE_DIR}/${model_src_dir} - echo "====== run benchmark fp32 =======" + $BOLD_YELLOW && echo "====== run benchmark fp32 =======" && $RESET /bin/bash ${SCRIPTS_PATH}/run_benchmark_common.sh \ --framework=${framework} \ --model=${model} \ @@ -85,7 +86,7 @@ elif [ "${mode}" == "int8_benchmark" ]; then --precision="fp32" elif [ "${mode}" == "fp32_benchmark" ]; then cd ${WORK_SOURCE_DIR}/${model_src_dir} - echo "====== run benchmark int8 =======" + $BOLD_YELLOW && echo "====== run benchmark int8 =======" && $RESET if [[ "${framework}" == "onnxrt" ]]; then model_name="${log_dir}/${model}/${framework}-${model}-tune.onnx" elif [[ "${framework}" == "mxnet" ]]; then @@ -108,7 +109,7 @@ elif [ "${mode}" == "fp32_benchmark" ]; then --precision="int8" elif [ "${mode}" == "collect_log" ]; then cd ${WORK_SOURCE_DIR}/${model_src_dir} - echo "====== collect logs of model ${model} =======" + $BOLD_YELLOW && echo "====== collect logs of model ${model} =======" && $RESET python -u ${SCRIPTS_PATH}/collect_log_model.py \ --framework=${framework} \ --fwk_ver=${fwk_ver} \ @@ -116,5 +117,5 @@ elif [ "${mode}" == "collect_log" ]; then --logs_dir="${log_dir}/${model}" \ --output_dir="${log_dir}/${model}" \ --build_id=${BUILD_BUILDID} - echo "====== Finish model test =======" + $BOLD_YELLOW && echo "====== Finish model test =======" && $RESET fi diff --git a/.azure-pipelines/scripts/models/run_tensorflow_models_trigger.sh b/.azure-pipelines/scripts/models/run_tensorflow_models_trigger.sh index 1cbf8c872d7..e7cbf16f230 100644 --- a/.azure-pipelines/scripts/models/run_tensorflow_models_trigger.sh +++ b/.azure-pipelines/scripts/models/run_tensorflow_models_trigger.sh @@ -10,8 +10,6 @@ do model=`echo $i | sed "s/${PATTERN}//"`;; --tune_acc=*) tune_acc=`echo $i | sed "s/${PATTERN}//"`;; - --build_id=*) - build_id=`echo $i | sed "s/${PATTERN}//"`;; --mode=*) mode=`echo $i | sed "s/${PATTERN}//"`;; *) diff --git a/.azure-pipelines/scripts/models/run_tuning_common.sh b/.azure-pipelines/scripts/models/run_tuning_common.sh index f1d739d03f9..843a6dd0a65 100644 --- a/.azure-pipelines/scripts/models/run_tuning_common.sh +++ b/.azure-pipelines/scripts/models/run_tuning_common.sh @@ -1,5 +1,6 @@ #!/bin/bash set -eo pipefail +source /neural-compressor/.azure-pipelines/scripts/change_color.sh # get parameters PATTERN='[-a-zA-Z0-9_]*=' @@ -35,18 +36,28 @@ else output_model=${log_dir}/${framework}-${model}-tune.pb fi -echo -e "-------- run_tuning_common --------" -echo ${tuning_cmd} -eval "/usr/bin/time -v ${tuning_cmd} --output_model=${output_model}" +$BOLD_YELLOW && echo -e "-------- run_tuning_common --------" && $RESET +$BOLD_YELLOW && echo ${tuning_cmd} && $RESET -echo "====== finish tuning. echo information. ======" -endtime=`date +'%Y-%m-%d %H:%M:%S'` -start_seconds=$(date --date="$starttime" +%s); -end_seconds=$(date --date="$endtime" +%s); -echo "Tuning time spend: "$((end_seconds-start_seconds))"s " - -echo "Tuning strategy: ${strategy}" - -echo "Total resident size (kbytes): $(cat /proc/meminfo |grep 'MemTotal' |sed 's/[^0-9]//g')" +max_loop=3 +for ((iter=0; iter<${max_loop}; iter++)) +do + eval "/usr/bin/time -v ${tuning_cmd} --output_model=${output_model}" + + $BOLD_YELLOW && echo "====== finish tuning. echo information. ======" && $RESET + endtime=`date +'%Y-%m-%d %H:%M:%S'` + start_seconds=$(date --date="$starttime" +%s); + end_seconds=$(date --date="$endtime" +%s); + $BOLD_GREEN echo "Tuning time spend: "$((end_seconds-start_seconds))"s " && $RESET + $BOLD_GREEN echo "Tuning strategy: ${strategy}" && $RESET + $BOLD_GREEN && echo "Total resident size (kbytes): $(cat /proc/meminfo |grep 'MemTotal' |sed 's/[^0-9]//g')" && $RESET + + $BOLD_YELLOW && echo "====== check tuning status. ======" && $RESET + if [ $(grep ${framework}-${model}-tune.log | wc -l) == 0 ];then + exit 1 + fi + if [ $(grep ${framework}-${model}-tune.log | grep "Not found" | wc -l) == 0 ];then + exit 1 + fi +done -exit 1 \ No newline at end of file diff --git a/.azure-pipelines/scripts/models/update_yaml_config.py b/.azure-pipelines/scripts/models/update_yaml_config.py index 9d92a457f6d..c561f4398d6 100644 --- a/.azure-pipelines/scripts/models/update_yaml_config.py +++ b/.azure-pipelines/scripts/models/update_yaml_config.py @@ -64,7 +64,7 @@ def update_yaml_dataset(yaml, framework, dataset_location): line = re.sub(key_patterns["pattern"], key_patterns["replacement"], line) config.write(line) - if framework == "pytorch": + else: val_dataset = dataset_location + f"\{os.path.sep}" + "val" train_dataset = dataset_location + f"\{os.path.sep}" + "train" patterns = { @@ -78,14 +78,6 @@ def update_yaml_dataset(yaml, framework, dataset_location): }, } - if topology == "distilbert_base_MRPC": - patterns.update({ - "bert_name": { - "pattern": r'name:/s+bert', - "replacement": "name: distilbert", - } - }) - print("======= update_yaml_dataset =======") with open(yaml, "w") as config: for line in lines: diff --git a/.azure-pipelines/template/model-template.yml b/.azure-pipelines/template/model-template.yml index 6f3a59efc9e..b43ff2a086e 100644 --- a/.azure-pipelines/template/model-template.yml +++ b/.azure-pipelines/template/model-template.yml @@ -52,9 +52,13 @@ steps: && bash run_${{ parameters.framework }}_models_trigger.sh --model=${{ parameters.modelName }} --tune_acc=true --mode='fp32_benchmark'" displayName: FP32 Benchmark - - script: | - docker exec ${{ parameters.modelContainerName }} bash -c "cd /neural-compressor/.azure-pipelines/scripts/models \ - && bash run_${{ parameters.framework }}_models_trigger.sh --model=${{ parameters.modelName }} --tune_acc=true --mode='collect_log'" + - task: Bash@3 + condition: always() + inputs: + targetType: "inline" + script: | + docker exec ${{ parameters.modelContainerName }} bash -c "cd /neural-compressor/.azure-pipelines/scripts/models \ + && bash run_${{ parameters.framework }}_models_trigger.sh --model=${{ parameters.modelName }} --tune_acc=true --mode='collect_log'" displayName: Collect log - task: PublishPipelineArtifact@1 From ab3a87b38330c887b6600d785c1fc21f3f7b766d Mon Sep 17 00:00:00 2001 From: "Sun, Xuehao" Date: Wed, 2 Nov 2022 11:40:09 +0800 Subject: [PATCH 11/34] change color --- .../scripts/models/run_benchmark_common.sh | 25 ++++++++++--------- .../scripts/models/run_tuning_common.sh | 16 ++++++------ 2 files changed, 21 insertions(+), 20 deletions(-) diff --git a/.azure-pipelines/scripts/models/run_benchmark_common.sh b/.azure-pipelines/scripts/models/run_benchmark_common.sh index d0f4ab72818..9f43a514369 100644 --- a/.azure-pipelines/scripts/models/run_benchmark_common.sh +++ b/.azure-pipelines/scripts/models/run_benchmark_common.sh @@ -1,5 +1,6 @@ #!/bin/bash -set -x +set -eo pipefail +source /neural-compressor/.azure-pipelines/scripts/change_color.sh # get parameters PATTERN='[-a-zA-Z0-9_]*=' @@ -28,21 +29,21 @@ do esac done -echo "-------- run_benchmark_common --------" +$BOLD_YELLOW && echo "-------- run_benchmark_common --------" && $RESET # run accuracy # tune_acc==true means using accuracy results from tuning log if [ "${tune_acc}" == "false" ]; then - echo "run tuning accuracy in precision ${precision}" + $BOLD_YELLOW && echo "run tuning accuracy in precision ${precision}" && $RESET eval "${benchmark_cmd} --input_model=${input_model} --mode=accuracy" 2>&1 | tee ${log_dir}/${framework}-${model}-accuracy-${precision}.log fi function multiInstance() { ncores_per_socket=${ncores_per_socket:=$( lscpu | grep 'Core(s) per socket' | cut -d: -f2 | xargs echo -n)} - echo "Executing multi instance benchmark" + $BOLD_YELLOW && echo "Executing multi instance benchmark" && $RESET ncores_per_instance=4 - echo "ncores_per_socket=${ncores_per_socket}, ncores_per_instance=${ncores_per_instance}" + $BOLD_YELLOW && echo "ncores_per_socket=${ncores_per_socket}, ncores_per_instance=${ncores_per_instance}" && $RESET logFile="${log_dir}/${framework}-${model}-performance-${precision}" benchmark_pids=() @@ -61,18 +62,18 @@ function multiInstance() { for pid in "${benchmark_pids[@]}"; do wait $pid exit_code=$? - echo "Detected exit code: ${exit_code}" + $BOLD_YELLOW && echo "Detected exit code: ${exit_code}" && $RESET if [ ${exit_code} == 0 ]; then - echo "Process ${pid} succeeded" + $BOLD_GREEN && echo "Process ${pid} succeeded" && $RESET else - echo "Process ${pid} failed" + $BOLD_RED && echo "Process ${pid} failed" && $RESET status="FAILURE" fi done - echo "Benchmark process status: ${status}" + $BOLD_YELLOW && echo "Benchmark process status: ${status}" && $RESET if [ ${status} == "FAILURE" ]; then - echo "Benchmark process returned non-zero exit code." + $BOLD_RED && echo "Benchmark process returned non-zero exit code." && $RESET exit 1 fi } @@ -82,9 +83,9 @@ function multiInstance() { cmd="${benchmark_cmd} --input_model=${input_model}" if [ "${new_benchmark}" == "true" ]; then - echo "run with internal benchmark..." + $BOLD_YELLOW && echo "run with internal benchmark..." && $RESET eval ${cmd} 2>&1 | tee ${log_dir}/${framework}-${model}-performance-${precision}.log else - echo "run with external multiInstance benchmark..." + $BOLD_YELLOW && echo "run with external multiInstance benchmark..." && $RESET multiInstance fi diff --git a/.azure-pipelines/scripts/models/run_tuning_common.sh b/.azure-pipelines/scripts/models/run_tuning_common.sh index 843a6dd0a65..cbbc28b1b27 100644 --- a/.azure-pipelines/scripts/models/run_tuning_common.sh +++ b/.azure-pipelines/scripts/models/run_tuning_common.sh @@ -48,16 +48,16 @@ do endtime=`date +'%Y-%m-%d %H:%M:%S'` start_seconds=$(date --date="$starttime" +%s); end_seconds=$(date --date="$endtime" +%s); - $BOLD_GREEN echo "Tuning time spend: "$((end_seconds-start_seconds))"s " && $RESET - $BOLD_GREEN echo "Tuning strategy: ${strategy}" && $RESET + $BOLD_GREEN && echo "Tuning time spend: "$((end_seconds-start_seconds))"s " && $RESET + $BOLD_GREEN && echo "Tuning strategy: ${strategy}" && $RESET $BOLD_GREEN && echo "Total resident size (kbytes): $(cat /proc/meminfo |grep 'MemTotal' |sed 's/[^0-9]//g')" && $RESET $BOLD_YELLOW && echo "====== check tuning status. ======" && $RESET - if [ $(grep ${framework}-${model}-tune.log | wc -l) == 0 ];then - exit 1 - fi - if [ $(grep ${framework}-${model}-tune.log | grep "Not found" | wc -l) == 0 ];then - exit 1 - fi + # if [ $(grep ${framework}-${model}-tune.log | wc -l) == 0 ];then + # exit 1 + # fi + # if [ $(grep ${framework}-${model}-tune.log | grep "Not found" | wc -l) == 0 ];then + # exit 1 + # fi done From 44ad300d281630f7a44a4fc26eeb50ea305323de Mon Sep 17 00:00:00 2001 From: "Sun, Xuehao" Date: Wed, 2 Nov 2022 14:17:44 +0800 Subject: [PATCH 12/34] update --- .../scripts/models/collect_log_model.py | 41 ++++++++-- .azure-pipelines/scripts/models/env_setup.sh | 20 ++--- .../models/run_model_trigger_common.sh | 78 +++++++++++++------ .../scripts/models/run_tuning_common.sh | 2 +- 4 files changed, 103 insertions(+), 38 deletions(-) diff --git a/.azure-pipelines/scripts/models/collect_log_model.py b/.azure-pipelines/scripts/models/collect_log_model.py index fcaea56d761..a59cbfcd266 100644 --- a/.azure-pipelines/scripts/models/collect_log_model.py +++ b/.azure-pipelines/scripts/models/collect_log_model.py @@ -9,6 +9,7 @@ parser.add_argument("--logs_dir", type=str, default=".") parser.add_argument("--output_dir", type=str, default=".") parser.add_argument("--build_id", type=str, default="3117") +parser.add_argument("--stage", type=str, default="collect_log") args = parser.parse_args() print('===== collecting log model =======') print('build_id: '+args.build_id) @@ -16,10 +17,8 @@ PLATFORM='icx' URL ='https://dev.azure.com/lpot-inc/neural-compressor/_build/results?buildId='+args.build_id+'&view=artifacts&pathAsName=false&type=publishedArtifacts' -print(args) - -def get_model_tuning_results(): +def get_model_tuning_dict_results(): tuning_result_dict = {} if os.path.exists(tuning_log): @@ -72,7 +71,7 @@ def get_model_tuning_results(): return tuning_result_dict, benchmark_accuracy_result_dict -def get_model_benchmark_results(): +def get_model_benchmark_txt_results(): benchmark_performance_result_dict = {'int8': {}, 'fp32': {}} for precision in ['int8', 'fp32']: throughput = 0.0 @@ -108,7 +107,25 @@ def get_model_benchmark_results(): return benchmark_performance_result_dict -def main(): +def get_refer_data(): + refer_log = os.path.join(args.logs_dir, f"{args.framework}_{args.model}_summary.log") + result = {} + if os.path.exists(refer_log): + with open(refer_log, "r") as f: + lines = f.readlines() + keys = lines[0].split(";") + values = [lines[i].split(";") for i in range(1, len(lines))] + for value in values: + precision = value[keys.index("Precision")] + Type = value[keys.index("Type")] + result[f"{precision}_{Type}"] = float(value[keys.index("Value")]) + return result + else: + print("refer log file not found") + return 0 + + +def collect_log(): results = [] tuning_infos = [] print("tuning log dir is {}".format(tuning_log)) @@ -211,4 +228,16 @@ def parse_perf_line(line) -> float: if __name__ == '__main__': tuning_log = os.path.join(args.logs_dir, f"{args.framework}-{args.model}-tune.log") - main() + refer = get_refer_data() + if args.stage == "collect_log": + collect_log() + elif args.stage == "tuning": + tuning_result_dict, benchmark_accuracy_result_dict = get_model_tuning_dict_results() + elif args.stage == "int8_benchmark": + benchmark_performance_result_dict = get_model_benchmark_txt_results() + assert abs(benchmark_performance_result_dict.get("Value")-refer.get(f"INT8_Performance"))/refer.get(f"INT8_Performance") <= 0.05 + elif args.stage == "fp32_benchmark": + benchmark_performance_result_dict = get_model_benchmark_txt_results() + assert abs(benchmark_performance_result_dict.get("Value")-refer.get(f"FP32_Performance"))/refer.get(f"FP32_Performance") <= 0.05 + else: + raise ValueError(f"{args.stage} does not exist") diff --git a/.azure-pipelines/scripts/models/env_setup.sh b/.azure-pipelines/scripts/models/env_setup.sh index 06296ca0fd3..1711e9f12c6 100644 --- a/.azure-pipelines/scripts/models/env_setup.sh +++ b/.azure-pipelines/scripts/models/env_setup.sh @@ -1,5 +1,7 @@ #!/bin/bash set -eo pipefail +source /neural-compressor/.azure-pipelines/scripts/change_color.sh + # get parameters PATTERN='[-a-zA-Z0-9_]*=' @@ -45,18 +47,18 @@ done SCRIPTS_PATH="/neural-compressor/.azure-pipelines/scripts/models" log_dir="/neural-compressor/.azure-pipelines/scripts/models" WORK_SOURCE_DIR="/neural-compressor/examples/${framework}" -echo "processing ${framework}-${fwk_ver}-${model}" +$BOLD_YELLOW && echo "processing ${framework}-${fwk_ver}-${model}" && $RESET -echo "======= creat log_dir =========" +$BOLD_YELLOW && echo "======= creat log_dir =========" && $RESET if [ -d "${log_dir}/${model}" ]; then - echo "${log_dir}/${model} already exists, don't need to mkdir." + $BOLD_GREEN && echo "${log_dir}/${model} already exists, don't need to mkdir." && $RESET else - echo "no log dir ${log_dir}/${model}, create." + $BOLD_GREEN && echo "no log dir ${log_dir}/${model}, create." && $RESET cd ${log_dir} mkdir ${model} fi -echo "====== install requirements ======" +$BOLD_YELLOW && echo "====== install requirements ======" && $RESET /bin/bash /neural-compressor/.azure-pipelines/scripts/install_nc.sh cd ${WORK_SOURCE_DIR}/${model_src_dir} @@ -104,11 +106,11 @@ if [ -f "requirements.txt" ]; then done pip list else - echo "Not found requirements.txt file." + $BOLD_RED && echo "Not found requirements.txt file." && $RESET fi -echo "======== update yaml config ========" -echo -e "\nPrint origin yaml..." +$BOLD_YELLOW && echo "======== update yaml config ========" && $RESET +$BOLD_YELLOW && echo -e "\nPrint origin yaml..." && $RESET cat ${yaml} python ${SCRIPTS_PATH}/update_yaml_config.py \ --yaml=${yaml} \ @@ -118,5 +120,5 @@ python ${SCRIPTS_PATH}/update_yaml_config.py \ --strategy=${strategy} \ --new_benchmark=${new_benchmark} \ --multi_instance='true' -echo -e "\nPrint updated yaml... " +$BOLD_YELLOW && echo -e "\nPrint updated yaml... " && $RESET cat ${yaml} diff --git a/.azure-pipelines/scripts/models/run_model_trigger_common.sh b/.azure-pipelines/scripts/models/run_model_trigger_common.sh index 6bc427e25a9..9456580da45 100644 --- a/.azure-pipelines/scripts/models/run_model_trigger_common.sh +++ b/.azure-pipelines/scripts/models/run_model_trigger_common.sh @@ -46,7 +46,6 @@ log_dir="/neural-compressor/.azure-pipelines/scripts/models" WORK_SOURCE_DIR="/neural-compressor/examples/${framework}" SCRIPTS_PATH="/neural-compressor/.azure-pipelines/scripts/models" $BOLD_YELLOW && echo "processing ${framework}-${fwk_ver}-${model}" && $RESET -$BOLD_YELLOW && echo "tuning_cmd is ${tuning_cmd}" && $RESET $BOLD_YELLOW && echo "benchmark_cmd is ${benchmark_cmd}" && $RESET if [ "${mode}" == "env_setup" ]; then @@ -63,6 +62,7 @@ if [ "${mode}" == "env_setup" ]; then --new_benchmark=${new_benchmark} elif [ "${mode}" == "tuning" ]; then cd ${WORK_SOURCE_DIR}/${model_src_dir} + $BOLD_YELLOW && echo "tuning_cmd is ${tuning_cmd}" && $RESET $BOLD_YELLOW && echo "======== run tuning ========" && $RESET /bin/bash ${SCRIPTS_PATH}/run_tuning_common.sh \ --framework=${framework} \ @@ -72,20 +72,43 @@ elif [ "${mode}" == "tuning" ]; then --input_model=${input_model} \ --strategy=${strategy} \ 2>&1 | tee -a ${log_dir}/${model}/${framework}-${model}-tune.log -elif [ "${mode}" == "int8_benchmark" ]; then +elif [ "${mode}" == "fp32_benchmark" ]; then cd ${WORK_SOURCE_DIR}/${model_src_dir} + $BOLD_YELLOW && echo "benchmark_cmd is ${benchmark_cmd}" && $RESET $BOLD_YELLOW && echo "====== run benchmark fp32 =======" && $RESET - /bin/bash ${SCRIPTS_PATH}/run_benchmark_common.sh \ - --framework=${framework} \ - --model=${model} \ - --input_model=${input_model} \ - --benchmark_cmd="${benchmark_cmd}" \ - --tune_acc=${tune_acc} \ - --log_dir="${log_dir}/${model}" \ - --new_benchmark=${new_benchmark} \ - --precision="fp32" -elif [ "${mode}" == "fp32_benchmark" ]; then + max_loop=3 + for ((iter=0; iter<${max_loop}; iter++)) + do + /bin/bash ${SCRIPTS_PATH}/run_benchmark_common.sh \ + --framework=${framework} \ + --model=${model} \ + --input_model=${input_model} \ + --benchmark_cmd="${benchmark_cmd}" \ + --tune_acc=${tune_acc} \ + --log_dir="${log_dir}/${model}" \ + --new_benchmark=${new_benchmark} \ + --precision="fp32" + + python -u ${SCRIPTS_PATH}/collect_log_model.py \ + --framework=${framework} \ + --fwk_ver=${fwk_ver} \ + --model=${model} \ + --logs_dir="${log_dir}/${model}" \ + --output_dir="${log_dir}/${model}" \ + --build_id=${BUILD_BUILDID} \ + --stage=${mode} + + exit_code=$? + if [ ${exit_code} -ne 0 ] ; then + $BOLD_RED && echo "Error!! Run again" && $RESET + else + break + fi + done + +elif [ "${mode}" == "int8_benchmark" ]; then cd ${WORK_SOURCE_DIR}/${model_src_dir} + $BOLD_YELLOW && echo "benchmark_cmd is ${benchmark_cmd}" && $RESET $BOLD_YELLOW && echo "====== run benchmark int8 =======" && $RESET if [[ "${framework}" == "onnxrt" ]]; then model_name="${log_dir}/${model}/${framework}-${model}-tune.onnx" @@ -98,15 +121,26 @@ elif [ "${mode}" == "fp32_benchmark" ]; then benchmark_cmd="${benchmark_cmd} --int8=true" fi - /bin/bash ${SCRIPTS_PATH}/run_benchmark_common.sh \ - --framework=${framework} \ - --model=${model} \ - --input_model="${model_name}" \ - --benchmark_cmd="${benchmark_cmd}" \ - --tune_acc=${tune_acc} \ - --log_dir="${log_dir}/${model}" \ - --new_benchmark=${new_benchmark} \ - --precision="int8" + max_loop=3 + for ((iter=0; iter<${max_loop}; iter++)) + do + /bin/bash ${SCRIPTS_PATH}/run_benchmark_common.sh \ + --framework=${framework} \ + --model=${model} \ + --input_model="${model_name}" \ + --benchmark_cmd="${benchmark_cmd}" \ + --tune_acc=${tune_acc} \ + --log_dir="${log_dir}/${model}" \ + --new_benchmark=${new_benchmark} \ + --precision="int8" + exit_code=$? + if [ ${exit_code} -ne 0 ] ; then + $BOLD_RED && echo "Error!! Run again" && $RESET + else + break + fi + done + elif [ "${mode}" == "collect_log" ]; then cd ${WORK_SOURCE_DIR}/${model_src_dir} $BOLD_YELLOW && echo "====== collect logs of model ${model} =======" && $RESET @@ -117,5 +151,5 @@ elif [ "${mode}" == "collect_log" ]; then --logs_dir="${log_dir}/${model}" \ --output_dir="${log_dir}/${model}" \ --build_id=${BUILD_BUILDID} - $BOLD_YELLOW && echo "====== Finish model test =======" && $RESET + $BOLD_YELLOW && echo "====== Finish collect logs =======" && $RESET fi diff --git a/.azure-pipelines/scripts/models/run_tuning_common.sh b/.azure-pipelines/scripts/models/run_tuning_common.sh index cbbc28b1b27..e5729bd790c 100644 --- a/.azure-pipelines/scripts/models/run_tuning_common.sh +++ b/.azure-pipelines/scripts/models/run_tuning_common.sh @@ -39,7 +39,7 @@ fi $BOLD_YELLOW && echo -e "-------- run_tuning_common --------" && $RESET $BOLD_YELLOW && echo ${tuning_cmd} && $RESET -max_loop=3 +max_loop=1 for ((iter=0; iter<${max_loop}; iter++)) do eval "/usr/bin/time -v ${tuning_cmd} --output_model=${output_model}" From 719880d992b87f3bb8a84d8ac63b2ca4bfe08a70 Mon Sep 17 00:00:00 2001 From: "Sun, Xuehao" Date: Wed, 2 Nov 2022 14:38:56 +0800 Subject: [PATCH 13/34] update --- .../scripts/models/collect_log_model.py | 2 +- .../scripts/models/run_model_trigger_common.sh | 16 ++++++++++++++-- .azure-pipelines/template/model-template.yml | 6 +++--- 3 files changed, 18 insertions(+), 6 deletions(-) diff --git a/.azure-pipelines/scripts/models/collect_log_model.py b/.azure-pipelines/scripts/models/collect_log_model.py index a59cbfcd266..d122683cfd3 100644 --- a/.azure-pipelines/scripts/models/collect_log_model.py +++ b/.azure-pipelines/scripts/models/collect_log_model.py @@ -108,7 +108,7 @@ def get_model_benchmark_txt_results(): def get_refer_data(): - refer_log = os.path.join(args.logs_dir, f"{args.framework}_{args.model}_summary.log") + refer_log = os.path.join(f"{args.logs_dir}_refer", f"{args.framework}_{args.model}_summary.log") result = {} if os.path.exists(refer_log): with open(refer_log, "r") as f: diff --git a/.azure-pipelines/scripts/models/run_model_trigger_common.sh b/.azure-pipelines/scripts/models/run_model_trigger_common.sh index 9456580da45..7847725275a 100644 --- a/.azure-pipelines/scripts/models/run_model_trigger_common.sh +++ b/.azure-pipelines/scripts/models/run_model_trigger_common.sh @@ -100,8 +100,9 @@ elif [ "${mode}" == "fp32_benchmark" ]; then exit_code=$? if [ ${exit_code} -ne 0 ] ; then - $BOLD_RED && echo "Error!! Run again" && $RESET + $BOLD_RED && echo "FAILED!! Run again" && $RESET else + $BOLD_GREEN && echo "SUCCEED" && $RESET break fi done @@ -133,10 +134,21 @@ elif [ "${mode}" == "int8_benchmark" ]; then --log_dir="${log_dir}/${model}" \ --new_benchmark=${new_benchmark} \ --precision="int8" + + python -u ${SCRIPTS_PATH}/collect_log_model.py \ + --framework=${framework} \ + --fwk_ver=${fwk_ver} \ + --model=${model} \ + --logs_dir="${log_dir}/${model}" \ + --output_dir="${log_dir}/${model}" \ + --build_id=${BUILD_BUILDID} \ + --stage=${mode} + exit_code=$? if [ ${exit_code} -ne 0 ] ; then - $BOLD_RED && echo "Error!! Run again" && $RESET + $BOLD_RED && echo "FAILED!! Run again" && $RESET else + $BOLD_GREEN && echo "SUCCEED" && $RESET break fi done diff --git a/.azure-pipelines/template/model-template.yml b/.azure-pipelines/template/model-template.yml index b43ff2a086e..9e9332707bc 100644 --- a/.azure-pipelines/template/model-template.yml +++ b/.azure-pipelines/template/model-template.yml @@ -27,11 +27,11 @@ steps: - task: DownloadPipelineArtifact@2 inputs: source: "specific" - artifact: "FinalReport" + artifact: ${{ parameters.modelName }} patterns: "**.log" - path: $(Build.SourcesDirectory)/.azure-pipelines/scripts/models/refer_log + path: $(Build.SourcesDirectory)/.azure-pipelines/scripts/models/${{ parameters.modelName }}_refer_log project: $(System.TeamProject) - pipeline: "Baseline" + pipeline: "Model-Test" runVersion: "specific" runId: $(model_runID) retryDownloadCount: 3 From 249d0cbd1a2e0f7dd212ec20ded498726962ef92 Mon Sep 17 00:00:00 2001 From: "Sun, Xuehao" Date: Wed, 2 Nov 2022 14:43:09 +0800 Subject: [PATCH 14/34] fix bug --- .azure-pipelines/template/model-template.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.azure-pipelines/template/model-template.yml b/.azure-pipelines/template/model-template.yml index 9e9332707bc..d224571be1e 100644 --- a/.azure-pipelines/template/model-template.yml +++ b/.azure-pipelines/template/model-template.yml @@ -27,7 +27,7 @@ steps: - task: DownloadPipelineArtifact@2 inputs: source: "specific" - artifact: ${{ parameters.modelName }} + artifact: ${{ parameters.framework }}_${{ parameters.modelName }} patterns: "**.log" path: $(Build.SourcesDirectory)/.azure-pipelines/scripts/models/${{ parameters.modelName }}_refer_log project: $(System.TeamProject) From 65e19904d74f26f499f984d07a7a1857529d37d5 Mon Sep 17 00:00:00 2001 From: "Sun, Xuehao" Date: Wed, 2 Nov 2022 14:47:56 +0800 Subject: [PATCH 15/34] fix bug --- .azure-pipelines/scripts/models/collect_log_model.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.azure-pipelines/scripts/models/collect_log_model.py b/.azure-pipelines/scripts/models/collect_log_model.py index d122683cfd3..1e678831580 100644 --- a/.azure-pipelines/scripts/models/collect_log_model.py +++ b/.azure-pipelines/scripts/models/collect_log_model.py @@ -108,7 +108,7 @@ def get_model_benchmark_txt_results(): def get_refer_data(): - refer_log = os.path.join(f"{args.logs_dir}_refer", f"{args.framework}_{args.model}_summary.log") + refer_log = os.path.join(f"{args.logs_dir}_refer_log", f"{args.framework}_{args.model}_summary.log") result = {} if os.path.exists(refer_log): with open(refer_log, "r") as f: @@ -121,7 +121,7 @@ def get_refer_data(): result[f"{precision}_{Type}"] = float(value[keys.index("Value")]) return result else: - print("refer log file not found") + print(f"refer log file: {refer_log} not found") return 0 From 14a6a332e27c6a5c4e6c970c2144bcaef4653eda Mon Sep 17 00:00:00 2001 From: "Sun, Xuehao" Date: Wed, 2 Nov 2022 14:55:35 +0800 Subject: [PATCH 16/34] fix bug --- .../scripts/models/collect_log_model.py | 26 +++++++++---------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/.azure-pipelines/scripts/models/collect_log_model.py b/.azure-pipelines/scripts/models/collect_log_model.py index 1e678831580..ef9e8f66d30 100644 --- a/.azure-pipelines/scripts/models/collect_log_model.py +++ b/.azure-pipelines/scripts/models/collect_log_model.py @@ -71,26 +71,26 @@ def get_model_tuning_dict_results(): return tuning_result_dict, benchmark_accuracy_result_dict -def get_model_benchmark_txt_results(): - benchmark_performance_result_dict = {'int8': {}, 'fp32': {}} - for precision in ['int8', 'fp32']: +def get_model_benchmark_dict_results(): + benchmark_performance_result_dict = {"int8": {}, "fp32": {}} + for precision in ["int8", "fp32"]: throughput = 0.0 bs = 1 for root, dirs, files in os.walk(args.logs_dir): for name in files: file_name = os.path.join(root, name) print(file_name) - if 'performance-' + precision in name: + if "performance-" + precision in name: for line in open(file_name, "r"): - result= parse_perf_line(line) + result = parse_perf_line(line) if result.get("throughput"): throughput += result.get("throughput") if result.get("batch_size"): bs = result.get("batch_size") # set model status failed - if throughput==0.0: - os.system('echo "##vso[task.setvariable variable='+args.framework+'_'+args.model+'_failed]true"') + if throughput == 0.0: + os.system('echo "##vso[task.setvariable variable=' + args.framework + '_' + args.model + '_failed]true"') benchmark_performance_result_dict[precision] = { "OS": OS, "Platform": PLATFORM, @@ -100,8 +100,8 @@ def get_model_benchmark_txt_results(): "Mode": "Inference", "Type": "Performance", "BS": 1, - "Value":throughput, - "Url":URL, + "Value": throughput, + "Url": URL, } return benchmark_performance_result_dict @@ -234,10 +234,10 @@ def parse_perf_line(line) -> float: elif args.stage == "tuning": tuning_result_dict, benchmark_accuracy_result_dict = get_model_tuning_dict_results() elif args.stage == "int8_benchmark": - benchmark_performance_result_dict = get_model_benchmark_txt_results() - assert abs(benchmark_performance_result_dict.get("Value")-refer.get(f"INT8_Performance"))/refer.get(f"INT8_Performance") <= 0.05 + benchmark_performance_result_dict = get_model_benchmark_dict_results() + assert abs(benchmark_performance_result_dict.get("int8").get("Value")-refer.get(f"INT8_Performance"))/refer.get(f"INT8_Performance") <= 0.05 elif args.stage == "fp32_benchmark": - benchmark_performance_result_dict = get_model_benchmark_txt_results() - assert abs(benchmark_performance_result_dict.get("Value")-refer.get(f"FP32_Performance"))/refer.get(f"FP32_Performance") <= 0.05 + benchmark_performance_result_dict = get_model_benchmark_dict_results() + assert abs(benchmark_performance_result_dict.get("fp32").get("Value")-refer.get(f"FP32_Performance"))/refer.get(f"FP32_Performance") <= 0.05 else: raise ValueError(f"{args.stage} does not exist") From 9b9ba675c7dcde0c16c41dfda4c09bc5720101a1 Mon Sep 17 00:00:00 2001 From: "Sun, Xuehao" Date: Wed, 2 Nov 2022 15:20:25 +0800 Subject: [PATCH 17/34] add try catch --- .../models/run_model_trigger_common.sh | 49 +++++++++++-------- 1 file changed, 28 insertions(+), 21 deletions(-) diff --git a/.azure-pipelines/scripts/models/run_model_trigger_common.sh b/.azure-pipelines/scripts/models/run_model_trigger_common.sh index 7847725275a..4734b72e79b 100644 --- a/.azure-pipelines/scripts/models/run_model_trigger_common.sh +++ b/.azure-pipelines/scripts/models/run_model_trigger_common.sh @@ -88,19 +88,22 @@ elif [ "${mode}" == "fp32_benchmark" ]; then --log_dir="${log_dir}/${model}" \ --new_benchmark=${new_benchmark} \ --precision="fp32" + { + python -u ${SCRIPTS_PATH}/collect_log_model.py \ + --framework=${framework} \ + --fwk_ver=${fwk_ver} \ + --model=${model} \ + --logs_dir="${log_dir}/${model}" \ + --output_dir="${log_dir}/${model}" \ + --build_id=${BUILD_BUILDID} \ + --stage=${mode} + exit_code=$? + } || { + echo "=====================================" + } - python -u ${SCRIPTS_PATH}/collect_log_model.py \ - --framework=${framework} \ - --fwk_ver=${fwk_ver} \ - --model=${model} \ - --logs_dir="${log_dir}/${model}" \ - --output_dir="${log_dir}/${model}" \ - --build_id=${BUILD_BUILDID} \ - --stage=${mode} - - exit_code=$? if [ ${exit_code} -ne 0 ] ; then - $BOLD_RED && echo "FAILED!! Run again" && $RESET + $BOLD_RED && echo "FAILED!!" && $RESET else $BOLD_GREEN && echo "SUCCEED" && $RESET break @@ -135,18 +138,22 @@ elif [ "${mode}" == "int8_benchmark" ]; then --new_benchmark=${new_benchmark} \ --precision="int8" - python -u ${SCRIPTS_PATH}/collect_log_model.py \ - --framework=${framework} \ - --fwk_ver=${fwk_ver} \ - --model=${model} \ - --logs_dir="${log_dir}/${model}" \ - --output_dir="${log_dir}/${model}" \ - --build_id=${BUILD_BUILDID} \ - --stage=${mode} + { + python -u ${SCRIPTS_PATH}/collect_log_model.py \ + --framework=${framework} \ + --fwk_ver=${fwk_ver} \ + --model=${model} \ + --logs_dir="${log_dir}/${model}" \ + --output_dir="${log_dir}/${model}" \ + --build_id=${BUILD_BUILDID} \ + --stage=${mode} + exit_code=$? + } || { + echo "=====================================" + } - exit_code=$? if [ ${exit_code} -ne 0 ] ; then - $BOLD_RED && echo "FAILED!! Run again" && $RESET + $BOLD_RED && echo "FAILED!!" && $RESET else $BOLD_GREEN && echo "SUCCEED" && $RESET break From 4708ba730616af888f77d0333147b2914d90a20d Mon Sep 17 00:00:00 2001 From: "Sun, Xuehao" Date: Wed, 2 Nov 2022 15:33:32 +0800 Subject: [PATCH 18/34] update --- .azure-pipelines/model-test.yml | 18 +++++++++--------- .../scripts/models/collect_log_model.py | 10 ++++++++-- .../scripts/models/run_model_trigger_common.sh | 2 ++ 3 files changed, 19 insertions(+), 11 deletions(-) diff --git a/.azure-pipelines/model-test.yml b/.azure-pipelines/model-test.yml index 8fcb36c3fd4..93550fa65e6 100644 --- a/.azure-pipelines/model-test.yml +++ b/.azure-pipelines/model-test.yml @@ -30,24 +30,24 @@ parameters: - name: TensorFlowModelList type: object default: - # - resnet50v1.5 - # - ssd_resnet50_v1 - # - ssd_mobilenet_v1_ckpt - # - inception_v1 - # - resnet50_fashion + - resnet50v1.5 + - ssd_resnet50_v1 + - ssd_mobilenet_v1_ckpt + - inception_v1 + - resnet50_fashion - darknet19 - # - densenet-121 - # - resnet-101 + - densenet-121 + - resnet-101 - name: PyTorchModelList type: object default: - resnet18 - # - resnet18_fx + - resnet18_fx - name: ONNXModelList type: object default: - resnet50-v1-12 - # - bert_base_MRPC_dynamic + - bert_base_MRPC_dynamic - name: MXNetModelList type: object default: diff --git a/.azure-pipelines/scripts/models/collect_log_model.py b/.azure-pipelines/scripts/models/collect_log_model.py index ef9e8f66d30..86c35a823c1 100644 --- a/.azure-pipelines/scripts/models/collect_log_model.py +++ b/.azure-pipelines/scripts/models/collect_log_model.py @@ -235,9 +235,15 @@ def parse_perf_line(line) -> float: tuning_result_dict, benchmark_accuracy_result_dict = get_model_tuning_dict_results() elif args.stage == "int8_benchmark": benchmark_performance_result_dict = get_model_benchmark_dict_results() - assert abs(benchmark_performance_result_dict.get("int8").get("Value")-refer.get(f"INT8_Performance"))/refer.get(f"INT8_Performance") <= 0.05 + current_data = benchmark_performance_result_dict.get("int8").get("Value") + refer_data = refer.get(f"INT8_Performance") + print(f"current_data = {current_data}, refer_data = {refer_data}") + assert abs(current_data-refer_data)/refer_data <= 0.05 elif args.stage == "fp32_benchmark": benchmark_performance_result_dict = get_model_benchmark_dict_results() - assert abs(benchmark_performance_result_dict.get("fp32").get("Value")-refer.get(f"FP32_Performance"))/refer.get(f"FP32_Performance") <= 0.05 + current_data = benchmark_performance_result_dict.get("fp32").get("Value") + refer_data = refer.get(f"FP32_Performance") + print(f"current_data = {current_data}, refer_data = {refer_data}") + assert abs(current_data-refer_data)/refer_data <= 0.05 else: raise ValueError(f"{args.stage} does not exist") diff --git a/.azure-pipelines/scripts/models/run_model_trigger_common.sh b/.azure-pipelines/scripts/models/run_model_trigger_common.sh index 4734b72e79b..fd019327e31 100644 --- a/.azure-pipelines/scripts/models/run_model_trigger_common.sh +++ b/.azure-pipelines/scripts/models/run_model_trigger_common.sh @@ -109,6 +109,7 @@ elif [ "${mode}" == "fp32_benchmark" ]; then break fi done + exit ${exit_code} elif [ "${mode}" == "int8_benchmark" ]; then cd ${WORK_SOURCE_DIR}/${model_src_dir} @@ -159,6 +160,7 @@ elif [ "${mode}" == "int8_benchmark" ]; then break fi done + exit ${exit_code} elif [ "${mode}" == "collect_log" ]; then cd ${WORK_SOURCE_DIR}/${model_src_dir} From d71bfaa03c20dc4d4c3a68f6dafcc1923f82ea3c Mon Sep 17 00:00:00 2001 From: "Sun, Xuehao" Date: Wed, 2 Nov 2022 16:11:20 +0800 Subject: [PATCH 19/34] add accuracy check --- .../scripts/models/collect_log_model.py | 25 +++++++++++-------- .azure-pipelines/template/model-template.yml | 2 +- 2 files changed, 16 insertions(+), 11 deletions(-) diff --git a/.azure-pipelines/scripts/models/collect_log_model.py b/.azure-pipelines/scripts/models/collect_log_model.py index 86c35a823c1..978c4d0504d 100644 --- a/.azure-pipelines/scripts/models/collect_log_model.py +++ b/.azure-pipelines/scripts/models/collect_log_model.py @@ -226,6 +226,19 @@ def parse_perf_line(line) -> float: return perf_data +def check_status(precision, precision_upper): + benchmark_performance_result_dict = get_model_benchmark_dict_results() + _, benchmark_accuracy_result_dict = get_model_tuning_dict_results() + current_performance = benchmark_performance_result_dict.get(precision).get("Value") + current_accuracy = benchmark_accuracy_result_dict.get(precision).get("Value") + refer_performance = refer.get(f"{precision_upper}_Performance") + refer_accuracy = refer.get(f"{precision_upper}_Accuracy") + print(f"current_performance_data = {current_performance}, refer_performance_data = {refer_performance}") + assert abs(current_performance - refer_performance) / refer_performance <= 0.05 + print(f"current_accuracy_data = {current_accuracy}, refer_accuarcy_data = {refer_accuracy}") + assert abs(current_accuracy - refer_accuracy) / refer_accuracy <= 0.05 + + if __name__ == '__main__': tuning_log = os.path.join(args.logs_dir, f"{args.framework}-{args.model}-tune.log") refer = get_refer_data() @@ -234,16 +247,8 @@ def parse_perf_line(line) -> float: elif args.stage == "tuning": tuning_result_dict, benchmark_accuracy_result_dict = get_model_tuning_dict_results() elif args.stage == "int8_benchmark": - benchmark_performance_result_dict = get_model_benchmark_dict_results() - current_data = benchmark_performance_result_dict.get("int8").get("Value") - refer_data = refer.get(f"INT8_Performance") - print(f"current_data = {current_data}, refer_data = {refer_data}") - assert abs(current_data-refer_data)/refer_data <= 0.05 + check_status("int8", "INT8") elif args.stage == "fp32_benchmark": - benchmark_performance_result_dict = get_model_benchmark_dict_results() - current_data = benchmark_performance_result_dict.get("fp32").get("Value") - refer_data = refer.get(f"FP32_Performance") - print(f"current_data = {current_data}, refer_data = {refer_data}") - assert abs(current_data-refer_data)/refer_data <= 0.05 + check_status("fp32", "FP32") else: raise ValueError(f"{args.stage} does not exist") diff --git a/.azure-pipelines/template/model-template.yml b/.azure-pipelines/template/model-template.yml index d224571be1e..3a65defcef3 100644 --- a/.azure-pipelines/template/model-template.yml +++ b/.azure-pipelines/template/model-template.yml @@ -28,7 +28,7 @@ steps: inputs: source: "specific" artifact: ${{ parameters.framework }}_${{ parameters.modelName }} - patterns: "**.log" + patterns: "**_summary.log" path: $(Build.SourcesDirectory)/.azure-pipelines/scripts/models/${{ parameters.modelName }}_refer_log project: $(System.TeamProject) pipeline: "Model-Test" From 1600051b3186d1a9bc59c8b68b5e0aa9fd7dea2d Mon Sep 17 00:00:00 2001 From: "Sun, Xuehao" Date: Wed, 2 Nov 2022 21:54:38 +0800 Subject: [PATCH 20/34] fix path --- .azure-pipelines/scripts/models/update_yaml_config.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.azure-pipelines/scripts/models/update_yaml_config.py b/.azure-pipelines/scripts/models/update_yaml_config.py index c561f4398d6..c305134e18d 100644 --- a/.azure-pipelines/scripts/models/update_yaml_config.py +++ b/.azure-pipelines/scripts/models/update_yaml_config.py @@ -65,8 +65,8 @@ def update_yaml_dataset(yaml, framework, dataset_location): config.write(line) else: - val_dataset = dataset_location + f"\{os.path.sep}" + "val" - train_dataset = dataset_location + f"\{os.path.sep}" + "train" + val_dataset = dataset_location + f"{os.path.sep}" + "val" + train_dataset = dataset_location + f"{os.path.sep}" + "train" patterns = { "calibration_dataset": { "pattern": r'root:.*/path/to/calibration/dataset/?', From fbeff35921409f4f82aa74e0ca2eb3d801de94d6 Mon Sep 17 00:00:00 2001 From: chensuyue Date: Thu, 3 Nov 2022 00:00:29 +0800 Subject: [PATCH 21/34] update code structure --- .../scripts/models/run_benchmark_common.sh | 73 ++++++++++++---- .../models/run_model_trigger_common.sh | 86 ++++--------------- .../models/run_onnxrt_models_trigger.sh | 10 +-- .../scripts/models/run_tuning_common.sh | 40 ++++----- 4 files changed, 98 insertions(+), 111 deletions(-) diff --git a/.azure-pipelines/scripts/models/run_benchmark_common.sh b/.azure-pipelines/scripts/models/run_benchmark_common.sh index 9f43a514369..ae60e09a415 100644 --- a/.azure-pipelines/scripts/models/run_benchmark_common.sh +++ b/.azure-pipelines/scripts/models/run_benchmark_common.sh @@ -31,16 +31,65 @@ done $BOLD_YELLOW && echo "-------- run_benchmark_common --------" && $RESET -# run accuracy -# tune_acc==true means using accuracy results from tuning log -if [ "${tune_acc}" == "false" ]; then +main() { + + # run accuracy + # tune_acc==true means using accuracy results from tuning log + if [ "${tune_acc}" == "false" ]; then + run_accuracy + fi + + # run performance + max_loop=3 + for ((iter=0; iter<${max_loop}; iter++)) + do + run_performance + { + check_perf_gap + exit_code=$? + } || true + + if [ ${exit_code} -ne 0 ] ; then + $BOLD_RED && echo "FAILED with performance gap!!" && $RESET + else + $BOLD_GREEN && echo "SUCCEED!!" && $RESET + break + fi + done + exit ${exit_code} + +} + +function check_perf_gap() { + python -u ${SCRIPTS_PATH}/collect_log_model.py \ + --framework=${framework} \ + --fwk_ver=${fwk_ver} \ + --model=${model} \ + --logs_dir="${log_dir}/${model}" \ + --output_dir="${log_dir}/${model}" \ + --build_id=${BUILD_BUILDID} \ + --stage=${mode} +} + +function run_performance() { + cmd="${benchmark_cmd} --input_model=${input_model}" + if [ "${new_benchmark}" == "true" ]; then + $BOLD_YELLOW && echo "run with internal benchmark..." && $RESET + eval ${cmd} 2>&1 | tee ${log_dir}/${framework}-${model}-performance-${precision}.log + else + $BOLD_YELLOW && echo "run with external multiInstance benchmark..." && $RESET + multiInstance + fi +} + +function run_accuracy() { $BOLD_YELLOW && echo "run tuning accuracy in precision ${precision}" && $RESET eval "${benchmark_cmd} --input_model=${input_model} --mode=accuracy" 2>&1 | tee ${log_dir}/${framework}-${model}-accuracy-${precision}.log -fi - +} function multiInstance() { - ncores_per_socket=${ncores_per_socket:=$( lscpu | grep 'Core(s) per socket' | cut -d: -f2 | xargs echo -n)} + ncores_per_socket=${ncores_per_socket:=$( lscpu | grep 'Core(s) per socket' | cut -d: -f2 | xargs echo -n + })} $BOLD_YELLOW && echo "Executing multi instance benchmark" && $RESET ncores_per_instance=4 $BOLD_YELLOW && echo "ncores_per_socket=${ncores_per_socket}, ncores_per_instance=${ncores_per_instance}" && $RESET @@ -78,14 +127,4 @@ function multiInstance() { fi } - -# run performance -cmd="${benchmark_cmd} --input_model=${input_model}" - -if [ "${new_benchmark}" == "true" ]; then - $BOLD_YELLOW && echo "run with internal benchmark..." && $RESET - eval ${cmd} 2>&1 | tee ${log_dir}/${framework}-${model}-performance-${precision}.log -else - $BOLD_YELLOW && echo "run with external multiInstance benchmark..." && $RESET - multiInstance -fi +main \ No newline at end of file diff --git a/.azure-pipelines/scripts/models/run_model_trigger_common.sh b/.azure-pipelines/scripts/models/run_model_trigger_common.sh index fd019327e31..9d403149294 100644 --- a/.azure-pipelines/scripts/models/run_model_trigger_common.sh +++ b/.azure-pipelines/scripts/models/run_model_trigger_common.sh @@ -76,40 +76,16 @@ elif [ "${mode}" == "fp32_benchmark" ]; then cd ${WORK_SOURCE_DIR}/${model_src_dir} $BOLD_YELLOW && echo "benchmark_cmd is ${benchmark_cmd}" && $RESET $BOLD_YELLOW && echo "====== run benchmark fp32 =======" && $RESET - max_loop=3 - for ((iter=0; iter<${max_loop}; iter++)) - do - /bin/bash ${SCRIPTS_PATH}/run_benchmark_common.sh \ - --framework=${framework} \ - --model=${model} \ - --input_model=${input_model} \ - --benchmark_cmd="${benchmark_cmd}" \ - --tune_acc=${tune_acc} \ - --log_dir="${log_dir}/${model}" \ - --new_benchmark=${new_benchmark} \ - --precision="fp32" - { - python -u ${SCRIPTS_PATH}/collect_log_model.py \ - --framework=${framework} \ - --fwk_ver=${fwk_ver} \ - --model=${model} \ - --logs_dir="${log_dir}/${model}" \ - --output_dir="${log_dir}/${model}" \ - --build_id=${BUILD_BUILDID} \ - --stage=${mode} - exit_code=$? - } || { - echo "=====================================" - } - if [ ${exit_code} -ne 0 ] ; then - $BOLD_RED && echo "FAILED!!" && $RESET - else - $BOLD_GREEN && echo "SUCCEED" && $RESET - break - fi - done - exit ${exit_code} + /bin/bash ${SCRIPTS_PATH}/run_benchmark_common.sh \ + --framework=${framework} \ + --model=${model} \ + --input_model=${input_model} \ + --benchmark_cmd="${benchmark_cmd}" \ + --tune_acc=${tune_acc} \ + --log_dir="${log_dir}/${model}" \ + --new_benchmark=${new_benchmark} \ + --precision="fp32" elif [ "${mode}" == "int8_benchmark" ]; then cd ${WORK_SOURCE_DIR}/${model_src_dir} @@ -126,41 +102,15 @@ elif [ "${mode}" == "int8_benchmark" ]; then benchmark_cmd="${benchmark_cmd} --int8=true" fi - max_loop=3 - for ((iter=0; iter<${max_loop}; iter++)) - do - /bin/bash ${SCRIPTS_PATH}/run_benchmark_common.sh \ - --framework=${framework} \ - --model=${model} \ - --input_model="${model_name}" \ - --benchmark_cmd="${benchmark_cmd}" \ - --tune_acc=${tune_acc} \ - --log_dir="${log_dir}/${model}" \ - --new_benchmark=${new_benchmark} \ - --precision="int8" - - { - python -u ${SCRIPTS_PATH}/collect_log_model.py \ - --framework=${framework} \ - --fwk_ver=${fwk_ver} \ - --model=${model} \ - --logs_dir="${log_dir}/${model}" \ - --output_dir="${log_dir}/${model}" \ - --build_id=${BUILD_BUILDID} \ - --stage=${mode} - exit_code=$? - } || { - echo "=====================================" - } - - if [ ${exit_code} -ne 0 ] ; then - $BOLD_RED && echo "FAILED!!" && $RESET - else - $BOLD_GREEN && echo "SUCCEED" && $RESET - break - fi - done - exit ${exit_code} + /bin/bash ${SCRIPTS_PATH}/run_benchmark_common.sh \ + --framework=${framework} \ + --model=${model} \ + --input_model="${model_name}" \ + --benchmark_cmd="${benchmark_cmd}" \ + --tune_acc=${tune_acc} \ + --log_dir="${log_dir}/${model}" \ + --new_benchmark=${new_benchmark} \ + --precision="int8" elif [ "${mode}" == "collect_log" ]; then cd ${WORK_SOURCE_DIR}/${model_src_dir} diff --git a/.azure-pipelines/scripts/models/run_onnxrt_models_trigger.sh b/.azure-pipelines/scripts/models/run_onnxrt_models_trigger.sh index a5a7a2378ca..a6d475758bf 100644 --- a/.azure-pipelines/scripts/models/run_onnxrt_models_trigger.sh +++ b/.azure-pipelines/scripts/models/run_onnxrt_models_trigger.sh @@ -31,7 +31,7 @@ if [ "${model}" == "resnet50-v1-12" ]; then batch_size=1 new_benchmark=true tuning_cmd="bash run_tuning.sh --input_model=${input_model} --config=${yaml}" - benchmark_cmd="bash run_benchmark.sh --config=${yaml}" + benchmark_cmd="bash run_benchmark.sh --config=${yaml} --mode=performance" elif [ "${model}" == "bert_base_MRPC_static" ]; then model_src_dir="language_translation/bert/quantization/ptq" dataset_location="/tf_dataset/pytorch/glue_data/MRPC" @@ -41,7 +41,7 @@ elif [ "${model}" == "bert_base_MRPC_static" ]; then batch_size=1 new_benchmark=true tuning_cmd="bash run_tuning.sh --input_model=${input_model} --config=${yaml}" - benchmark_cmd="bash run_benchmark.sh --config=${yaml}" + benchmark_cmd="bash run_benchmark.sh --config=${yaml} --mode=performance" elif [ "${model}" == "bert_base_MRPC_dynamic" ]; then model_src_dir="language_translation/bert/quantization/ptq" dataset_location="/tf_dataset/pytorch/glue_data/MRPC" @@ -51,7 +51,7 @@ elif [ "${model}" == "bert_base_MRPC_dynamic" ]; then batch_size=1 new_benchmark=true tuning_cmd="bash run_tuning.sh --input_model=${input_model} --config=${yaml}" - benchmark_cmd="bash run_benchmark.sh --config=${yaml}" + benchmark_cmd="bash run_benchmark.sh --config=${yaml} --mode=performance" elif [ "${model}" == "distilbert_base_MRPC_qdq" ]; then model_src_dir="language_translation/distilbert/quantization/ptq" dataset_location="/tf_dataset/pytorch/glue_data/MRPC" @@ -61,7 +61,7 @@ elif [ "${model}" == "distilbert_base_MRPC_qdq" ]; then batch_size=1 new_benchmark=true tuning_cmd="bash run_tuning.sh --input_model=${input_model} --config=${yaml}" - benchmark_cmd="bash run_benchmark.sh --config=${yaml}" + benchmark_cmd="bash run_benchmark.sh --config=${yaml} --mode=performance" fi @@ -77,6 +77,6 @@ fi --strategy=${strategy} \ --new_benchmark=${new_benchmark} \ --tuning_cmd="${tuning_cmd}" \ - --benchmark_cmd="${benchmark_cmd} --mode=performance" \ + --benchmark_cmd="${benchmark_cmd}" \ --tune_acc=${tune_acc} \ --mode=${mode} diff --git a/.azure-pipelines/scripts/models/run_tuning_common.sh b/.azure-pipelines/scripts/models/run_tuning_common.sh index e5729bd790c..dc81b538ced 100644 --- a/.azure-pipelines/scripts/models/run_tuning_common.sh +++ b/.azure-pipelines/scripts/models/run_tuning_common.sh @@ -39,25 +39,23 @@ fi $BOLD_YELLOW && echo -e "-------- run_tuning_common --------" && $RESET $BOLD_YELLOW && echo ${tuning_cmd} && $RESET -max_loop=1 -for ((iter=0; iter<${max_loop}; iter++)) -do - eval "/usr/bin/time -v ${tuning_cmd} --output_model=${output_model}" - - $BOLD_YELLOW && echo "====== finish tuning. echo information. ======" && $RESET - endtime=`date +'%Y-%m-%d %H:%M:%S'` - start_seconds=$(date --date="$starttime" +%s); - end_seconds=$(date --date="$endtime" +%s); - $BOLD_GREEN && echo "Tuning time spend: "$((end_seconds-start_seconds))"s " && $RESET - $BOLD_GREEN && echo "Tuning strategy: ${strategy}" && $RESET - $BOLD_GREEN && echo "Total resident size (kbytes): $(cat /proc/meminfo |grep 'MemTotal' |sed 's/[^0-9]//g')" && $RESET - - $BOLD_YELLOW && echo "====== check tuning status. ======" && $RESET - # if [ $(grep ${framework}-${model}-tune.log | wc -l) == 0 ];then - # exit 1 - # fi - # if [ $(grep ${framework}-${model}-tune.log | grep "Not found" | wc -l) == 0 ];then - # exit 1 - # fi -done +eval "/usr/bin/time -v ${tuning_cmd} --output_model=${output_model}" + +$BOLD_YELLOW && echo "====== finish tuning. echo information. ======" && $RESET +endtime=`date +'%Y-%m-%d %H:%M:%S'` +start_seconds=$(date --date="$starttime" +%s); +end_seconds=$(date --date="$endtime" +%s); +$BOLD_GREEN && echo "Tuning time spend: "$((end_seconds-start_seconds))"s " && $RESET +$BOLD_GREEN && echo "Tuning strategy: ${strategy}" && $RESET +$BOLD_GREEN && echo "Total resident size (kbytes): $(cat /proc/meminfo |grep 'MemTotal' |sed 's/[^0-9]//g')" && $RESET + +$BOLD_YELLOW && echo "====== check tuning status. ======" && $RESET +control_phrase="model which meet accuracy goal." +if [ $(grep "${control_phrase}" ${framework}-${model}-${os}-${cpu}-tune.log | wc -l) == 0 ];then + exit 1 +fi +if [ $(grep "${control_phrase}" ${framework}-${model}-${os}-${cpu}-tune.log | grep "Not found" | wc -l) == 1 ];then + exit 1 +fi + From 8c0dedbea3f78723d54e3fb66cd87e83df4050b0 Mon Sep 17 00:00:00 2001 From: "Sun, Xuehao" Date: Thu, 3 Nov 2022 10:31:07 +0800 Subject: [PATCH 22/34] add global variable --- .azure-pipelines/model-test.yml | 4 +- .azure-pipelines/scripts/install_nc.sh | 2 +- .azure-pipelines/scripts/install_nc_full.sh | 2 +- .azure-pipelines/scripts/models/env_setup.sh | 2 +- .../scripts/models/run_benchmark_common.sh | 72 +++++++++---------- .../models/run_model_trigger_common.sh | 8 --- .../models/run_mxnet_models_trigger.sh | 3 - .../models/run_onnxrt_models_trigger.sh | 3 - .../models/run_pytorch_models_trigger.sh | 3 - .../models/run_tensorflow_models_trigger.sh | 3 - .azure-pipelines/template/model-template.yml | 8 +-- 11 files changed, 43 insertions(+), 67 deletions(-) diff --git a/.azure-pipelines/model-test.yml b/.azure-pipelines/model-test.yml index 93550fa65e6..5a4da22f3c8 100644 --- a/.azure-pipelines/model-test.yml +++ b/.azure-pipelines/model-test.yml @@ -152,9 +152,9 @@ stages: patterns: "**.log" path: $(OUT_SCRIPT_PATH)/last_generated project: $(System.TeamProject) - pipeline: "Baseline" + pipeline: "Model-Test" runVersion: "specific" - runId: $(model_runID) + runId: $(refer_buildId) retryDownloadCount: 3 displayName: "Download last logs" - script: | diff --git a/.azure-pipelines/scripts/install_nc.sh b/.azure-pipelines/scripts/install_nc.sh index 38fd439404b..fa2daca5d19 100644 --- a/.azure-pipelines/scripts/install_nc.sh +++ b/.azure-pipelines/scripts/install_nc.sh @@ -4,4 +4,4 @@ cd /neural-compressor python -m pip install --no-cache-dir -r requirements.txt python setup.py sdist bdist_wheel pip install dist/neural_compressor*.whl -pip list \ No newline at end of file +pip list diff --git a/.azure-pipelines/scripts/install_nc_full.sh b/.azure-pipelines/scripts/install_nc_full.sh index b3f59626dfd..7513baeb254 100644 --- a/.azure-pipelines/scripts/install_nc_full.sh +++ b/.azure-pipelines/scripts/install_nc_full.sh @@ -4,4 +4,4 @@ cd /neural-compressor python -m pip install --no-cache-dir -r requirements.txt python setup.py --full sdist bdist_wheel pip install dist/neural_compressor*.whl -pip list \ No newline at end of file +pip list diff --git a/.azure-pipelines/scripts/models/env_setup.sh b/.azure-pipelines/scripts/models/env_setup.sh index 1711e9f12c6..7443e3e9d25 100644 --- a/.azure-pipelines/scripts/models/env_setup.sh +++ b/.azure-pipelines/scripts/models/env_setup.sh @@ -106,7 +106,7 @@ if [ -f "requirements.txt" ]; then done pip list else - $BOLD_RED && echo "Not found requirements.txt file." && $RESET + $BOLD_RED && echo "Not found requirements.txt file." && $RESET fi $BOLD_YELLOW && echo "======== update yaml config ========" && $RESET diff --git a/.azure-pipelines/scripts/models/run_benchmark_common.sh b/.azure-pipelines/scripts/models/run_benchmark_common.sh index ae60e09a415..10adedd4314 100644 --- a/.azure-pipelines/scripts/models/run_benchmark_common.sh +++ b/.azure-pipelines/scripts/models/run_benchmark_common.sh @@ -5,8 +5,7 @@ source /neural-compressor/.azure-pipelines/scripts/change_color.sh # get parameters PATTERN='[-a-zA-Z0-9_]*=' -for i in "$@" -do +for i in "$@"; do case $i in --framework=*) framework=`echo $i | sed "s/${PATTERN}//"`;; @@ -16,8 +15,6 @@ do input_model=`echo $i | sed "s/${PATTERN}//"`;; --benchmark_cmd=*) benchmark_cmd=`echo $i | sed "s/${PATTERN}//"`;; - --tune_acc=*) - tune_acc=`echo $i | sed "s/${PATTERN}//"`;; --log_dir=*) log_dir=`echo $i | sed "s/${PATTERN}//"`;; --new_benchmark=*) @@ -32,43 +29,44 @@ done $BOLD_YELLOW && echo "-------- run_benchmark_common --------" && $RESET main() { - # run accuracy - # tune_acc==true means using accuracy results from tuning log - if [ "${tune_acc}" == "false" ]; then + # USE_TUNE_ACC==true means using accuracy results from tuning log + if [ "${USE_TUNE_ACC}" == "false" ]; then run_accuracy fi # run performance - max_loop=3 - for ((iter=0; iter<${max_loop}; iter++)) - do + if [ "${PERF_STABLE_CHECK}" == "false" ]; then run_performance - { - check_perf_gap - exit_code=$? - } || true - - if [ ${exit_code} -ne 0 ] ; then - $BOLD_RED && echo "FAILED with performance gap!!" && $RESET - else - $BOLD_GREEN && echo "SUCCEED!!" && $RESET - break - fi - done - exit ${exit_code} - + else + max_loop=3 + for ((iter = 0; iter < ${max_loop}; iter++)); do + run_performance + { + check_perf_gap + exit_code=$? + } || true + + if [ ${exit_code} -ne 0 ]; then + $BOLD_RED && echo "FAILED with performance gap!!" && $RESET + else + $BOLD_GREEN && echo "SUCCEED!!" && $RESET + break + fi + done + exit ${exit_code} + fi } function check_perf_gap() { python -u ${SCRIPTS_PATH}/collect_log_model.py \ - --framework=${framework} \ - --fwk_ver=${fwk_ver} \ - --model=${model} \ - --logs_dir="${log_dir}/${model}" \ - --output_dir="${log_dir}/${model}" \ - --build_id=${BUILD_BUILDID} \ - --stage=${mode} + --framework=${framework} \ + --fwk_ver=${fwk_ver} \ + --model=${model} \ + --logs_dir="${log_dir}/${model}" \ + --output_dir="${log_dir}/${model}" \ + --build_id=${BUILD_BUILDID} \ + --stage=${mode} } function run_performance() { @@ -88,8 +86,7 @@ function run_accuracy() { } function multiInstance() { - ncores_per_socket=${ncores_per_socket:=$( lscpu | grep 'Core(s) per socket' | cut -d: -f2 | xargs echo -n - })} + ncores_per_socket=${ncores_per_socket:=$(lscpu | grep 'Core(s) per socket' | cut -d: -f2 | xargs echo -n)} $BOLD_YELLOW && echo "Executing multi instance benchmark" && $RESET ncores_per_instance=4 $BOLD_YELLOW && echo "ncores_per_socket=${ncores_per_socket}, ncores_per_instance=${ncores_per_instance}" && $RESET @@ -97,11 +94,10 @@ function multiInstance() { logFile="${log_dir}/${framework}-${model}-performance-${precision}" benchmark_pids=() - for((j=0;$j<${ncores_per_socket};j=$(($j + ${ncores_per_instance})))); - do - end_core_num=$((j + ncores_per_instance -1)) + for ((j = 0; $j < ${ncores_per_socket}; j = $(($j + ${ncores_per_instance})))); do + end_core_num=$((j + ncores_per_instance - 1)) if [ ${end_core_num} -ge ${ncores_per_socket} ]; then - end_core_num=$((ncores_per_socket-1)) + end_core_num=$((ncores_per_socket - 1)) fi numactl -m 0 -C "${j}-${end_core_num}" ${cmd} 2>&1 | tee ${logFile}-${ncores_per_socket}-${ncores_per_instance}-${j}.log & benchmark_pids+=($!) @@ -127,4 +123,4 @@ function multiInstance() { fi } -main \ No newline at end of file +main diff --git a/.azure-pipelines/scripts/models/run_model_trigger_common.sh b/.azure-pipelines/scripts/models/run_model_trigger_common.sh index 9d403149294..7f30854f6ba 100644 --- a/.azure-pipelines/scripts/models/run_model_trigger_common.sh +++ b/.azure-pipelines/scripts/models/run_model_trigger_common.sh @@ -33,8 +33,6 @@ do tuning_cmd=`echo $i | sed "s/${PATTERN}//"`;; --benchmark_cmd=*) benchmark_cmd=`echo $i | sed "s/${PATTERN}//"`;; - --tune_acc=*) - tune_acc=`echo $i | sed "s/${PATTERN}//"`;; --mode=*) mode=`echo $i | sed "s/${PATTERN}//"`;; *) @@ -76,17 +74,14 @@ elif [ "${mode}" == "fp32_benchmark" ]; then cd ${WORK_SOURCE_DIR}/${model_src_dir} $BOLD_YELLOW && echo "benchmark_cmd is ${benchmark_cmd}" && $RESET $BOLD_YELLOW && echo "====== run benchmark fp32 =======" && $RESET - /bin/bash ${SCRIPTS_PATH}/run_benchmark_common.sh \ --framework=${framework} \ --model=${model} \ --input_model=${input_model} \ --benchmark_cmd="${benchmark_cmd}" \ - --tune_acc=${tune_acc} \ --log_dir="${log_dir}/${model}" \ --new_benchmark=${new_benchmark} \ --precision="fp32" - elif [ "${mode}" == "int8_benchmark" ]; then cd ${WORK_SOURCE_DIR}/${model_src_dir} $BOLD_YELLOW && echo "benchmark_cmd is ${benchmark_cmd}" && $RESET @@ -101,17 +96,14 @@ elif [ "${mode}" == "int8_benchmark" ]; then model_name=${input_model} benchmark_cmd="${benchmark_cmd} --int8=true" fi - /bin/bash ${SCRIPTS_PATH}/run_benchmark_common.sh \ --framework=${framework} \ --model=${model} \ --input_model="${model_name}" \ --benchmark_cmd="${benchmark_cmd}" \ - --tune_acc=${tune_acc} \ --log_dir="${log_dir}/${model}" \ --new_benchmark=${new_benchmark} \ --precision="int8" - elif [ "${mode}" == "collect_log" ]; then cd ${WORK_SOURCE_DIR}/${model_src_dir} $BOLD_YELLOW && echo "====== collect logs of model ${model} =======" && $RESET diff --git a/.azure-pipelines/scripts/models/run_mxnet_models_trigger.sh b/.azure-pipelines/scripts/models/run_mxnet_models_trigger.sh index 8304b5da40e..8bf3b293fc2 100644 --- a/.azure-pipelines/scripts/models/run_mxnet_models_trigger.sh +++ b/.azure-pipelines/scripts/models/run_mxnet_models_trigger.sh @@ -8,8 +8,6 @@ do case $i in --model=*) model=`echo $i | sed "s/${PATTERN}//"`;; - --tune_acc=*) - tune_acc=`echo $i | sed "s/${PATTERN}//"`;; --mode=*) mode=`echo $i | sed "s/${PATTERN}//"`;; *) @@ -48,5 +46,4 @@ fi --new_benchmark=${new_benchmark} \ --tuning_cmd="${tuning_cmd}" \ --benchmark_cmd="${benchmark_cmd}" \ - --tune_acc=${tune_acc} \ --mode=${mode} diff --git a/.azure-pipelines/scripts/models/run_onnxrt_models_trigger.sh b/.azure-pipelines/scripts/models/run_onnxrt_models_trigger.sh index a6d475758bf..a69852f01a5 100644 --- a/.azure-pipelines/scripts/models/run_onnxrt_models_trigger.sh +++ b/.azure-pipelines/scripts/models/run_onnxrt_models_trigger.sh @@ -8,8 +8,6 @@ do case $i in --model=*) model=`echo $i | sed "s/${PATTERN}//"`;; - --tune_acc=*) - tune_acc=`echo $i | sed "s/${PATTERN}//"`;; --mode=*) mode=`echo $i | sed "s/${PATTERN}//"`;; *) @@ -78,5 +76,4 @@ fi --new_benchmark=${new_benchmark} \ --tuning_cmd="${tuning_cmd}" \ --benchmark_cmd="${benchmark_cmd}" \ - --tune_acc=${tune_acc} \ --mode=${mode} diff --git a/.azure-pipelines/scripts/models/run_pytorch_models_trigger.sh b/.azure-pipelines/scripts/models/run_pytorch_models_trigger.sh index d47d137a841..5cd776816f4 100644 --- a/.azure-pipelines/scripts/models/run_pytorch_models_trigger.sh +++ b/.azure-pipelines/scripts/models/run_pytorch_models_trigger.sh @@ -8,8 +8,6 @@ do case $i in --model=*) model=`echo $i | sed "s/${PATTERN}//"`;; - --tune_acc=*) - tune_acc=`echo $i | sed "s/${PATTERN}//"`;; --mode=*) mode=`echo $i | sed "s/${PATTERN}//"`;; *) @@ -60,5 +58,4 @@ fi --new_benchmark=${new_benchmark} \ --tuning_cmd="${tuning_cmd}" \ --benchmark_cmd="${benchmark_cmd}" \ - --tune_acc=${tune_acc} \ --mode=${mode} diff --git a/.azure-pipelines/scripts/models/run_tensorflow_models_trigger.sh b/.azure-pipelines/scripts/models/run_tensorflow_models_trigger.sh index e7cbf16f230..b3eee910900 100644 --- a/.azure-pipelines/scripts/models/run_tensorflow_models_trigger.sh +++ b/.azure-pipelines/scripts/models/run_tensorflow_models_trigger.sh @@ -8,8 +8,6 @@ do case $i in --model=*) model=`echo $i | sed "s/${PATTERN}//"`;; - --tune_acc=*) - tune_acc=`echo $i | sed "s/${PATTERN}//"`;; --mode=*) mode=`echo $i | sed "s/${PATTERN}//"`;; *) @@ -117,5 +115,4 @@ fi --new_benchmark=${new_benchmark} \ --tuning_cmd="${tuning_cmd}" \ --benchmark_cmd="${benchmark_cmd}" \ - --tune_acc=${tune_acc} \ --mode=${mode} diff --git a/.azure-pipelines/template/model-template.yml b/.azure-pipelines/template/model-template.yml index 3a65defcef3..f145025faf2 100644 --- a/.azure-pipelines/template/model-template.yml +++ b/.azure-pipelines/template/model-template.yml @@ -33,7 +33,7 @@ steps: project: $(System.TeamProject) pipeline: "Model-Test" runVersion: "specific" - runId: $(model_runID) + runId: $(refer_buildId) retryDownloadCount: 3 displayName: "Download refer logs" @@ -44,12 +44,12 @@ steps: - script: | docker exec ${{ parameters.modelContainerName }} bash -c "cd /neural-compressor/.azure-pipelines/scripts/models \ - && bash run_${{ parameters.framework }}_models_trigger.sh --model=${{ parameters.modelName }} --tune_acc=true --mode='int8_benchmark'" + && bash run_${{ parameters.framework }}_models_trigger.sh --model=${{ parameters.modelName }} --mode='int8_benchmark'" displayName: INT8 Benchmark - script: | docker exec ${{ parameters.modelContainerName }} bash -c "cd /neural-compressor/.azure-pipelines/scripts/models \ - && bash run_${{ parameters.framework }}_models_trigger.sh --model=${{ parameters.modelName }} --tune_acc=true --mode='fp32_benchmark'" + && bash run_${{ parameters.framework }}_models_trigger.sh --model=${{ parameters.modelName }} --mode='fp32_benchmark'" displayName: FP32 Benchmark - task: Bash@3 @@ -58,7 +58,7 @@ steps: targetType: "inline" script: | docker exec ${{ parameters.modelContainerName }} bash -c "cd /neural-compressor/.azure-pipelines/scripts/models \ - && bash run_${{ parameters.framework }}_models_trigger.sh --model=${{ parameters.modelName }} --tune_acc=true --mode='collect_log'" + && bash run_${{ parameters.framework }}_models_trigger.sh --model=${{ parameters.modelName }} --mode='collect_log'" displayName: Collect log - task: PublishPipelineArtifact@1 From 703cebe8a0a58b9573f1d2a436e63602e5bdb521 Mon Sep 17 00:00:00 2001 From: "Sun, Xuehao" Date: Thu, 3 Nov 2022 10:39:17 +0800 Subject: [PATCH 23/34] fix tuning status check --- .azure-pipelines/scripts/models/run_tuning_common.sh | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/.azure-pipelines/scripts/models/run_tuning_common.sh b/.azure-pipelines/scripts/models/run_tuning_common.sh index dc81b538ced..51f84dac8ea 100644 --- a/.azure-pipelines/scripts/models/run_tuning_common.sh +++ b/.azure-pipelines/scripts/models/run_tuning_common.sh @@ -47,15 +47,13 @@ start_seconds=$(date --date="$starttime" +%s); end_seconds=$(date --date="$endtime" +%s); $BOLD_GREEN && echo "Tuning time spend: "$((end_seconds-start_seconds))"s " && $RESET $BOLD_GREEN && echo "Tuning strategy: ${strategy}" && $RESET -$BOLD_GREEN && echo "Total resident size (kbytes): $(cat /proc/meminfo |grep 'MemTotal' |sed 's/[^0-9]//g')" && $RESET +$BOLD_GREEN && echo "Total resident size (kbytes): $(cat /proc/meminfo | grep 'MemTotal' | sed 's/[^0-9]//g')" && $RESET $BOLD_YELLOW && echo "====== check tuning status. ======" && $RESET control_phrase="model which meet accuracy goal." -if [ $(grep "${control_phrase}" ${framework}-${model}-${os}-${cpu}-tune.log | wc -l) == 0 ];then +if [ $(grep "${control_phrase}" ${framework}-${model}-tune.log | wc -l) == 0 ];then exit 1 fi -if [ $(grep "${control_phrase}" ${framework}-${model}-${os}-${cpu}-tune.log | grep "Not found" | wc -l) == 1 ];then +if [ $(grep "${control_phrase}" ${framework}-${model}-tune.log | grep "Not found" | wc -l) == 1 ];then exit 1 fi - - From 131e452998cab9c945b1d959c2f45b2b7cde8190 Mon Sep 17 00:00:00 2001 From: "Sun, Xuehao" Date: Thu, 3 Nov 2022 10:45:35 +0800 Subject: [PATCH 24/34] fix tuning check --- .azure-pipelines/model-test.yml | 2 +- .../scripts/models/run_model_trigger_common.sh | 8 ++++++++ .azure-pipelines/scripts/models/run_tuning_common.sh | 9 --------- 3 files changed, 9 insertions(+), 10 deletions(-) diff --git a/.azure-pipelines/model-test.yml b/.azure-pipelines/model-test.yml index 5a4da22f3c8..fd379ef6088 100644 --- a/.azure-pipelines/model-test.yml +++ b/.azure-pipelines/model-test.yml @@ -47,7 +47,7 @@ parameters: type: object default: - resnet50-v1-12 - - bert_base_MRPC_dynamic + # - bert_base_MRPC_dynamic - name: MXNetModelList type: object default: diff --git a/.azure-pipelines/scripts/models/run_model_trigger_common.sh b/.azure-pipelines/scripts/models/run_model_trigger_common.sh index 7f30854f6ba..231aefa54c1 100644 --- a/.azure-pipelines/scripts/models/run_model_trigger_common.sh +++ b/.azure-pipelines/scripts/models/run_model_trigger_common.sh @@ -70,6 +70,14 @@ elif [ "${mode}" == "tuning" ]; then --input_model=${input_model} \ --strategy=${strategy} \ 2>&1 | tee -a ${log_dir}/${model}/${framework}-${model}-tune.log + $BOLD_YELLOW && echo "====== check tuning status. ======" && $RESET + control_phrase="model which meet accuracy goal." + if [ $(grep "${control_phrase}" ${framework}-${model}-tune.log | wc -l) == 0 ];then + exit 1 + fi + if [ $(grep "${control_phrase}" ${framework}-${model}-tune.log | grep "Not found" | wc -l) == 1 ];then + exit 1 + fi elif [ "${mode}" == "fp32_benchmark" ]; then cd ${WORK_SOURCE_DIR}/${model_src_dir} $BOLD_YELLOW && echo "benchmark_cmd is ${benchmark_cmd}" && $RESET diff --git a/.azure-pipelines/scripts/models/run_tuning_common.sh b/.azure-pipelines/scripts/models/run_tuning_common.sh index 51f84dac8ea..fbb68d65605 100644 --- a/.azure-pipelines/scripts/models/run_tuning_common.sh +++ b/.azure-pipelines/scripts/models/run_tuning_common.sh @@ -48,12 +48,3 @@ end_seconds=$(date --date="$endtime" +%s); $BOLD_GREEN && echo "Tuning time spend: "$((end_seconds-start_seconds))"s " && $RESET $BOLD_GREEN && echo "Tuning strategy: ${strategy}" && $RESET $BOLD_GREEN && echo "Total resident size (kbytes): $(cat /proc/meminfo | grep 'MemTotal' | sed 's/[^0-9]//g')" && $RESET - -$BOLD_YELLOW && echo "====== check tuning status. ======" && $RESET -control_phrase="model which meet accuracy goal." -if [ $(grep "${control_phrase}" ${framework}-${model}-tune.log | wc -l) == 0 ];then - exit 1 -fi -if [ $(grep "${control_phrase}" ${framework}-${model}-tune.log | grep "Not found" | wc -l) == 1 ];then - exit 1 -fi From adc73ffd67dcd97b8ad0ee1b804304521c89669c Mon Sep 17 00:00:00 2001 From: "Sun, Xuehao" Date: Thu, 3 Nov 2022 10:51:47 +0800 Subject: [PATCH 25/34] fix --- .azure-pipelines/scripts/models/run_model_trigger_common.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.azure-pipelines/scripts/models/run_model_trigger_common.sh b/.azure-pipelines/scripts/models/run_model_trigger_common.sh index 231aefa54c1..3e3b2867711 100644 --- a/.azure-pipelines/scripts/models/run_model_trigger_common.sh +++ b/.azure-pipelines/scripts/models/run_model_trigger_common.sh @@ -72,10 +72,10 @@ elif [ "${mode}" == "tuning" ]; then 2>&1 | tee -a ${log_dir}/${model}/${framework}-${model}-tune.log $BOLD_YELLOW && echo "====== check tuning status. ======" && $RESET control_phrase="model which meet accuracy goal." - if [ $(grep "${control_phrase}" ${framework}-${model}-tune.log | wc -l) == 0 ];then + if [ $(grep "${control_phrase}" ${log_dir}/${model}/${framework}-${model}-tune.log | wc -l) == 0 ];then exit 1 fi - if [ $(grep "${control_phrase}" ${framework}-${model}-tune.log | grep "Not found" | wc -l) == 1 ];then + if [ $(grep "${control_phrase}" ${log_dir}/${model}/${framework}-${model}-tune.log | grep "Not found" | wc -l) == 1 ];then exit 1 fi elif [ "${mode}" == "fp32_benchmark" ]; then From 0fd14b1e52284b79de81787927d94c4ad48a98ed Mon Sep 17 00:00:00 2001 From: "Sun, Xuehao" Date: Thu, 3 Nov 2022 11:00:24 +0800 Subject: [PATCH 26/34] fix benchmark check --- .azure-pipelines/scripts/models/run_benchmark_common.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/.azure-pipelines/scripts/models/run_benchmark_common.sh b/.azure-pipelines/scripts/models/run_benchmark_common.sh index 10adedd4314..e56bbd915e4 100644 --- a/.azure-pipelines/scripts/models/run_benchmark_common.sh +++ b/.azure-pipelines/scripts/models/run_benchmark_common.sh @@ -4,6 +4,7 @@ source /neural-compressor/.azure-pipelines/scripts/change_color.sh # get parameters PATTERN='[-a-zA-Z0-9_]*=' +SCRIPTS_PATH="/neural-compressor/.azure-pipelines/scripts/models" for i in "$@"; do case $i in From a4621926fbe2f0778160100640a2ce184945bfd2 Mon Sep 17 00:00:00 2001 From: "Sun, Xuehao" Date: Thu, 3 Nov 2022 11:12:57 +0800 Subject: [PATCH 27/34] fix --- .azure-pipelines/scripts/models/run_benchmark_common.sh | 4 +++- .azure-pipelines/scripts/models/run_model_trigger_common.sh | 6 ++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/.azure-pipelines/scripts/models/run_benchmark_common.sh b/.azure-pipelines/scripts/models/run_benchmark_common.sh index e56bbd915e4..67758051a3e 100644 --- a/.azure-pipelines/scripts/models/run_benchmark_common.sh +++ b/.azure-pipelines/scripts/models/run_benchmark_common.sh @@ -22,6 +22,8 @@ for i in "$@"; do new_benchmark=`echo $i | sed "s/${PATTERN}//"`;; --precision=*) precision=`echo $i | sed "s/${PATTERN}//"`;; + --stage=*) + stage=`echo $i | sed "s/${PATTERN}//"`;; *) echo "Parameter $i not recognized."; exit 1;; esac @@ -67,7 +69,7 @@ function check_perf_gap() { --logs_dir="${log_dir}/${model}" \ --output_dir="${log_dir}/${model}" \ --build_id=${BUILD_BUILDID} \ - --stage=${mode} + --stage=${stage} } function run_performance() { diff --git a/.azure-pipelines/scripts/models/run_model_trigger_common.sh b/.azure-pipelines/scripts/models/run_model_trigger_common.sh index 3e3b2867711..1f789d87466 100644 --- a/.azure-pipelines/scripts/models/run_model_trigger_common.sh +++ b/.azure-pipelines/scripts/models/run_model_trigger_common.sh @@ -89,7 +89,8 @@ elif [ "${mode}" == "fp32_benchmark" ]; then --benchmark_cmd="${benchmark_cmd}" \ --log_dir="${log_dir}/${model}" \ --new_benchmark=${new_benchmark} \ - --precision="fp32" + --precision="fp32" \ + --stage=${mode} elif [ "${mode}" == "int8_benchmark" ]; then cd ${WORK_SOURCE_DIR}/${model_src_dir} $BOLD_YELLOW && echo "benchmark_cmd is ${benchmark_cmd}" && $RESET @@ -111,7 +112,8 @@ elif [ "${mode}" == "int8_benchmark" ]; then --benchmark_cmd="${benchmark_cmd}" \ --log_dir="${log_dir}/${model}" \ --new_benchmark=${new_benchmark} \ - --precision="int8" + --precision="int8" \ + --stage=${mode} elif [ "${mode}" == "collect_log" ]; then cd ${WORK_SOURCE_DIR}/${model_src_dir} $BOLD_YELLOW && echo "====== collect logs of model ${model} =======" && $RESET From dbdff5c427fd8f9fe0da3bea0d24273ca6f8509c Mon Sep 17 00:00:00 2001 From: "Sun, Xuehao" Date: Thu, 3 Nov 2022 11:22:06 +0800 Subject: [PATCH 28/34] test --- .azure-pipelines/model-test.yml | 16 ++++++++-------- .../scripts/models/collect_log_model.py | 2 -- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/.azure-pipelines/model-test.yml b/.azure-pipelines/model-test.yml index fd379ef6088..f3695ac2659 100644 --- a/.azure-pipelines/model-test.yml +++ b/.azure-pipelines/model-test.yml @@ -30,19 +30,19 @@ parameters: - name: TensorFlowModelList type: object default: - - resnet50v1.5 - - ssd_resnet50_v1 - - ssd_mobilenet_v1_ckpt - - inception_v1 - - resnet50_fashion + # - resnet50v1.5 + # - ssd_resnet50_v1 + # - ssd_mobilenet_v1_ckpt + # - inception_v1 + # - resnet50_fashion - darknet19 - - densenet-121 - - resnet-101 + # - densenet-121 + # - resnet-101 - name: PyTorchModelList type: object default: - resnet18 - - resnet18_fx + # - resnet18_fx - name: ONNXModelList type: object default: diff --git a/.azure-pipelines/scripts/models/collect_log_model.py b/.azure-pipelines/scripts/models/collect_log_model.py index 978c4d0504d..3c828e256d7 100644 --- a/.azure-pipelines/scripts/models/collect_log_model.py +++ b/.azure-pipelines/scripts/models/collect_log_model.py @@ -244,8 +244,6 @@ def check_status(precision, precision_upper): refer = get_refer_data() if args.stage == "collect_log": collect_log() - elif args.stage == "tuning": - tuning_result_dict, benchmark_accuracy_result_dict = get_model_tuning_dict_results() elif args.stage == "int8_benchmark": check_status("int8", "INT8") elif args.stage == "fp32_benchmark": From 90590b9a83c01cab0415d15631aaba5f9a765142 Mon Sep 17 00:00:00 2001 From: "Sun, Xuehao" Date: Thu, 3 Nov 2022 11:29:41 +0800 Subject: [PATCH 29/34] update --- .../scripts/models/collect_log_model.py | 22 +++++++++++-------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/.azure-pipelines/scripts/models/collect_log_model.py b/.azure-pipelines/scripts/models/collect_log_model.py index 3c828e256d7..7fbfb55dfac 100644 --- a/.azure-pipelines/scripts/models/collect_log_model.py +++ b/.azure-pipelines/scripts/models/collect_log_model.py @@ -68,7 +68,9 @@ def get_model_tuning_dict_results(): } } - return tuning_result_dict, benchmark_accuracy_result_dict + return tuning_result_dict, benchmark_accuracy_result_dict + else: + return {}, {} def get_model_benchmark_dict_results(): @@ -226,17 +228,19 @@ def parse_perf_line(line) -> float: return perf_data -def check_status(precision, precision_upper): - benchmark_performance_result_dict = get_model_benchmark_dict_results() - _, benchmark_accuracy_result_dict = get_model_tuning_dict_results() - current_performance = benchmark_performance_result_dict.get(precision).get("Value") - current_accuracy = benchmark_accuracy_result_dict.get(precision).get("Value") +def check_status(precision, precision_upper, check_accuracy = False): + performance_result = get_model_benchmark_dict_results() + current_performance = performance_result.get(precision).get("Value") refer_performance = refer.get(f"{precision_upper}_Performance") - refer_accuracy = refer.get(f"{precision_upper}_Accuracy") print(f"current_performance_data = {current_performance}, refer_performance_data = {refer_performance}") assert abs(current_performance - refer_performance) / refer_performance <= 0.05 - print(f"current_accuracy_data = {current_accuracy}, refer_accuarcy_data = {refer_accuracy}") - assert abs(current_accuracy - refer_accuracy) / refer_accuracy <= 0.05 + + if check_accuracy: + _, accuracy_result = get_model_tuning_dict_results() + current_accuracy = accuracy_result.get(precision).get("Value") + refer_accuracy = refer.get(f"{precision_upper}_Accuracy") + print(f"current_accuracy_data = {current_accuracy}, refer_accuarcy_data = {refer_accuracy}") + assert abs(current_accuracy - refer_accuracy) / refer_accuracy <= 0.05 if __name__ == '__main__': From 59f846c2af1b08c25fff5212562ebf7135852bd8 Mon Sep 17 00:00:00 2001 From: "Sun, Xuehao" Date: Thu, 3 Nov 2022 11:40:38 +0800 Subject: [PATCH 30/34] update --- .azure-pipelines/scripts/models/run_benchmark_common.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.azure-pipelines/scripts/models/run_benchmark_common.sh b/.azure-pipelines/scripts/models/run_benchmark_common.sh index 67758051a3e..ae5b8a1af36 100644 --- a/.azure-pipelines/scripts/models/run_benchmark_common.sh +++ b/.azure-pipelines/scripts/models/run_benchmark_common.sh @@ -66,8 +66,8 @@ function check_perf_gap() { --framework=${framework} \ --fwk_ver=${fwk_ver} \ --model=${model} \ - --logs_dir="${log_dir}/${model}" \ - --output_dir="${log_dir}/${model}" \ + --logs_dir="${log_dir}" \ + --output_dir="${log_dir}" \ --build_id=${BUILD_BUILDID} \ --stage=${stage} } From ef5b6825b2cc075fdcb997e97f490e8c912a244f Mon Sep 17 00:00:00 2001 From: "Sun, Xuehao" Date: Thu, 3 Nov 2022 11:54:03 +0800 Subject: [PATCH 31/34] validation --- .azure-pipelines/model-test.yml | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/.azure-pipelines/model-test.yml b/.azure-pipelines/model-test.yml index f3695ac2659..fd379ef6088 100644 --- a/.azure-pipelines/model-test.yml +++ b/.azure-pipelines/model-test.yml @@ -30,19 +30,19 @@ parameters: - name: TensorFlowModelList type: object default: - # - resnet50v1.5 - # - ssd_resnet50_v1 - # - ssd_mobilenet_v1_ckpt - # - inception_v1 - # - resnet50_fashion + - resnet50v1.5 + - ssd_resnet50_v1 + - ssd_mobilenet_v1_ckpt + - inception_v1 + - resnet50_fashion - darknet19 - # - densenet-121 - # - resnet-101 + - densenet-121 + - resnet-101 - name: PyTorchModelList type: object default: - resnet18 - # - resnet18_fx + - resnet18_fx - name: ONNXModelList type: object default: From 8e9f003632374f64e5f30405c499bf58bf0f8d64 Mon Sep 17 00:00:00 2001 From: "Sun, Xuehao" Date: Thu, 3 Nov 2022 13:21:54 +0800 Subject: [PATCH 32/34] fix --- .../scripts/models/run_model_trigger_common.sh | 8 -------- .../scripts/models/run_tuning_common.sh | 13 +++++++++++++ 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/.azure-pipelines/scripts/models/run_model_trigger_common.sh b/.azure-pipelines/scripts/models/run_model_trigger_common.sh index 1f789d87466..7a34552da43 100644 --- a/.azure-pipelines/scripts/models/run_model_trigger_common.sh +++ b/.azure-pipelines/scripts/models/run_model_trigger_common.sh @@ -70,14 +70,6 @@ elif [ "${mode}" == "tuning" ]; then --input_model=${input_model} \ --strategy=${strategy} \ 2>&1 | tee -a ${log_dir}/${model}/${framework}-${model}-tune.log - $BOLD_YELLOW && echo "====== check tuning status. ======" && $RESET - control_phrase="model which meet accuracy goal." - if [ $(grep "${control_phrase}" ${log_dir}/${model}/${framework}-${model}-tune.log | wc -l) == 0 ];then - exit 1 - fi - if [ $(grep "${control_phrase}" ${log_dir}/${model}/${framework}-${model}-tune.log | grep "Not found" | wc -l) == 1 ];then - exit 1 - fi elif [ "${mode}" == "fp32_benchmark" ]; then cd ${WORK_SOURCE_DIR}/${model_src_dir} $BOLD_YELLOW && echo "benchmark_cmd is ${benchmark_cmd}" && $RESET diff --git a/.azure-pipelines/scripts/models/run_tuning_common.sh b/.azure-pipelines/scripts/models/run_tuning_common.sh index fbb68d65605..3c6a0dcca4d 100644 --- a/.azure-pipelines/scripts/models/run_tuning_common.sh +++ b/.azure-pipelines/scripts/models/run_tuning_common.sh @@ -48,3 +48,16 @@ end_seconds=$(date --date="$endtime" +%s); $BOLD_GREEN && echo "Tuning time spend: "$((end_seconds-start_seconds))"s " && $RESET $BOLD_GREEN && echo "Tuning strategy: ${strategy}" && $RESET $BOLD_GREEN && echo "Total resident size (kbytes): $(cat /proc/meminfo | grep 'MemTotal' | sed 's/[^0-9]//g')" && $RESET + +$BOLD_YELLOW && echo "====== check tuning status. ======" && $RESET +control_phrase="model which meet accuracy goal." +if [ $(grep "${control_phrase}" ${log_dir}/${model}/${framework}-${model}-tune.log | wc -l) == 0 ];then + $BOLD_RED && echo "====== tuning FAILED!! ======" && $RESET + exit 1 +fi +if [ $(grep "${control_phrase}" ${log_dir}/${model}/${framework}-${model}-tune.log | grep "Not found" | wc -l) == 1 ];then + $BOLD_RED && echo "====== tuning FAILED!! ======" && $RESET + exit 1 +fi + +$BOLD_GREEN && echo "====== tuning SUCCEED!! ======" && $RESET From 1b6d74c5921870f1d93fc34948b7bac639f6c8af Mon Sep 17 00:00:00 2001 From: "Sun, Xuehao" Date: Thu, 3 Nov 2022 13:36:02 +0800 Subject: [PATCH 33/34] update --- .../scripts/models/run_model_trigger_common.sh | 9 +++++++++ .../scripts/models/run_tuning_common.sh | 13 ------------- 2 files changed, 9 insertions(+), 13 deletions(-) diff --git a/.azure-pipelines/scripts/models/run_model_trigger_common.sh b/.azure-pipelines/scripts/models/run_model_trigger_common.sh index 7a34552da43..d0c89560416 100644 --- a/.azure-pipelines/scripts/models/run_model_trigger_common.sh +++ b/.azure-pipelines/scripts/models/run_model_trigger_common.sh @@ -70,6 +70,15 @@ elif [ "${mode}" == "tuning" ]; then --input_model=${input_model} \ --strategy=${strategy} \ 2>&1 | tee -a ${log_dir}/${model}/${framework}-${model}-tune.log + $BOLD_YELLOW && echo "====== check tuning status. ======" && $RESET + control_phrase="model which meet accuracy goal." + if [ $(grep "${control_phrase}" ${log_dir}/${model}/${framework}-${model}-tune.log | wc -l) == 0 ];then + $BOLD_RED && echo "====== tuning FAILED!! ======" && $RESET; exit 1 + fi + if [ $(grep "${control_phrase}" ${log_dir}/${model}/${framework}-${model}-tune.log | grep "Not found" | wc -l) == 1 ];then + $BOLD_RED && echo "====== tuning FAILED!! ======" && $RESET; exit 1 + fi + $BOLD_GREEN && echo "====== tuning SUCCEED!! ======" && $RESET elif [ "${mode}" == "fp32_benchmark" ]; then cd ${WORK_SOURCE_DIR}/${model_src_dir} $BOLD_YELLOW && echo "benchmark_cmd is ${benchmark_cmd}" && $RESET diff --git a/.azure-pipelines/scripts/models/run_tuning_common.sh b/.azure-pipelines/scripts/models/run_tuning_common.sh index 3c6a0dcca4d..fbb68d65605 100644 --- a/.azure-pipelines/scripts/models/run_tuning_common.sh +++ b/.azure-pipelines/scripts/models/run_tuning_common.sh @@ -48,16 +48,3 @@ end_seconds=$(date --date="$endtime" +%s); $BOLD_GREEN && echo "Tuning time spend: "$((end_seconds-start_seconds))"s " && $RESET $BOLD_GREEN && echo "Tuning strategy: ${strategy}" && $RESET $BOLD_GREEN && echo "Total resident size (kbytes): $(cat /proc/meminfo | grep 'MemTotal' | sed 's/[^0-9]//g')" && $RESET - -$BOLD_YELLOW && echo "====== check tuning status. ======" && $RESET -control_phrase="model which meet accuracy goal." -if [ $(grep "${control_phrase}" ${log_dir}/${model}/${framework}-${model}-tune.log | wc -l) == 0 ];then - $BOLD_RED && echo "====== tuning FAILED!! ======" && $RESET - exit 1 -fi -if [ $(grep "${control_phrase}" ${log_dir}/${model}/${framework}-${model}-tune.log | grep "Not found" | wc -l) == 1 ];then - $BOLD_RED && echo "====== tuning FAILED!! ======" && $RESET - exit 1 -fi - -$BOLD_GREEN && echo "====== tuning SUCCEED!! ======" && $RESET From 4018927a2a196ebd4530faf4848f2b6cfaafa3a0 Mon Sep 17 00:00:00 2001 From: "Sun, Xuehao" Date: Fri, 4 Nov 2022 15:36:15 +0800 Subject: [PATCH 34/34] use python3.8 --- .azure-pipelines/model-test.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.azure-pipelines/model-test.yml b/.azure-pipelines/model-test.yml index fd379ef6088..c63d03cc6a4 100644 --- a/.azure-pipelines/model-test.yml +++ b/.azure-pipelines/model-test.yml @@ -136,9 +136,9 @@ stages: patterns: "**/*_tuning_info.log" path: $(OUT_SCRIPT_PATH) - task: UsePythonVersion@0 - displayName: "Use Python 3." + displayName: "Use Python 3.8" inputs: - versionSpec: "3" + versionSpec: "3.8" - script: | cd ${OUT_SCRIPT_PATH} mkdir generated