diff --git a/.azure-pipelines/model-test.yml b/.azure-pipelines/model-test.yml
index f604b176e13..c63d03cc6a4 100644
--- a/.azure-pipelines/model-test.yml
+++ b/.azure-pipelines/model-test.yml
@@ -47,13 +47,12 @@ parameters:
     type: object
     default:
       - resnet50-v1-12
-      - bert_base_MRPC_dynamic
+      # - bert_base_MRPC_dynamic
   - name: MXNetModelList
     type: object
     default:
       - resnet50v1
 
-
 stages:
   - stage: TensorFlowModels
     displayName: Run TensorFlow Model
@@ -111,7 +110,6 @@ stages:
                   modelName: ${{ model }}
                   framework: "mxnet"
 
-
   - stage: GenerateLogs
     displayName: Generate Report
     dependsOn: [TensorFlowModels, PyTorchModels, MXNetModels, ONNXModels]
@@ -138,9 +136,9 @@ stages:
               patterns: "**/*_tuning_info.log"
               path: $(OUT_SCRIPT_PATH)
           - task: UsePythonVersion@0
-            displayName: "Use Python 3."
+            displayName: "Use Python 3.8"
             inputs:
-              versionSpec: "3"
+              versionSpec: "3.8"
           - script: |
               cd ${OUT_SCRIPT_PATH}
               mkdir generated
@@ -154,15 +152,15 @@ stages:
               patterns: "**.log"
               path: $(OUT_SCRIPT_PATH)/last_generated
               project: $(System.TeamProject)
-              pipeline: "Baseline"
+              pipeline: "Model-Test"
               runVersion: "specific"
-              runId: $(model_runID)
+              runId: $(refer_buildId)
               retryDownloadCount: 3
             displayName: "Download last logs"
           - script: |
               echo "------ Generating final report.html ------"
               cd ${OUT_SCRIPT_PATH}
-              /usr/bin/bash generate_report.sh --WORKSPACE generated --output_dir generated --last_logt_dir last_generated --ghprbPullId $(System.PullRequest.PullRequestNumber) --MR_source_branch $(System.PullRequest.SourceBranch) --MR_source_repo $(System.PullRequest.SourceRepositoryURI) --MR_target_branch $(System.PullRequest.TargetBranch) --repo_url $(Build.Repository.Uri) --source_commit_id $(Build.SourceVersion) --build_id $(Build.BuildId)
+              /usr/bin/bash generate_report.sh --WORKSPACE generated --output_dir generated --last_logt_dir last_generated
             displayName: "Generate report"
           - task: PublishPipelineArtifact@1
             inputs:
diff --git a/.azure-pipelines/scripts/install_nc.sh b/.azure-pipelines/scripts/install_nc.sh
index 38fd439404b..fa2daca5d19 100644
--- a/.azure-pipelines/scripts/install_nc.sh
+++ b/.azure-pipelines/scripts/install_nc.sh
@@ -4,4 +4,4 @@ cd /neural-compressor
 python -m pip install --no-cache-dir -r requirements.txt
 python setup.py sdist bdist_wheel
 pip install dist/neural_compressor*.whl
-pip list
\ No newline at end of file
+pip list
diff --git a/.azure-pipelines/scripts/install_nc_full.sh b/.azure-pipelines/scripts/install_nc_full.sh
index b3f59626dfd..7513baeb254 100644
--- a/.azure-pipelines/scripts/install_nc_full.sh
+++ b/.azure-pipelines/scripts/install_nc_full.sh
@@ -4,4 +4,4 @@ cd /neural-compressor
 python -m pip install --no-cache-dir -r requirements.txt
 python setup.py --full sdist bdist_wheel
 pip install dist/neural_compressor*.whl
-pip list
\ No newline at end of file
+pip list
diff --git a/.azure-pipelines/scripts/models/collect_log_all.py b/.azure-pipelines/scripts/models/collect_log_all.py
index 61fe9454c2e..fb9db0d6721 100644
--- a/.azure-pipelines/scripts/models/collect_log_all.py
+++ b/.azure-pipelines/scripts/models/collect_log_all.py
@@ -1,7 +1,5 @@
-import re
-import os
-import platform
 import argparse
+import os
 
 parser = argparse.ArgumentParser(allow_abbrev=False)
 parser.add_argument("--logs_dir", type=str, default=".")
diff --git a/.azure-pipelines/scripts/models/collect_log_model.py b/.azure-pipelines/scripts/models/collect_log_model.py
index 37d305423bf..7fbfb55dfac 100644
--- a/.azure-pipelines/scripts/models/collect_log_model.py
+++ b/.azure-pipelines/scripts/models/collect_log_model.py
@@ -1,8 +1,6 @@
-import re
-import os
-import platform
 import argparse
-
+import os
+import re
 
 parser = argparse.ArgumentParser(allow_abbrev=False)
 parser.add_argument("--framework", type=str, required=True)
@@ -11,6 +9,7 @@
 parser.add_argument("--logs_dir", type=str, default=".")
 parser.add_argument("--output_dir", type=str, default=".")
 parser.add_argument("--build_id", type=str, default="3117")
+parser.add_argument("--stage", type=str, default="collect_log")
 args = parser.parse_args()
 print('===== collecting log model =======')
 print('build_id: '+args.build_id)
@@ -18,13 +17,119 @@
 PLATFORM='icx'
 URL ='https://dev.azure.com/lpot-inc/neural-compressor/_build/results?buildId='+args.build_id+'&view=artifacts&pathAsName=false&type=publishedArtifacts'
 
-print(args)
+
+def get_model_tuning_dict_results():
+    tuning_result_dict = {}
+
+    if os.path.exists(tuning_log):
+        print('tuning log found')
+        tmp = {'fp32_acc': 0, 'int8_acc': 0, 'tuning_trials': 0}
+        with open(tuning_log, "r") as f:
+            for line in f:
+                parse_tuning_line(line, tmp)
+        print(tmp)
+        # set model status failed
+        if tmp['fp32_acc'] == 0 or tmp['int8_acc'] == 0:
+            os.system('echo "##vso[task.setvariable variable=' + args.framework + '_' + args.model + '_failed]true"')
+
+        tuning_result_dict = {
+            "OS": OS,
+            "Platform": PLATFORM,
+            "Framework": args.framework,
+            "Version": args.fwk_ver,
+            "Model": args.model,
+            "Strategy": tmp['strategy'],
+            "Tune_time": tmp['tune_time'],
+        }
+        benchmark_accuracy_result_dict = {
+            'int8': {
+                "OS": OS,
+                "Platform": PLATFORM,
+                "Framework": args.framework,
+                "Version": args.fwk_ver,
+                "Model": args.model,
+                "Mode": "Inference",
+                "Type": "Accuracy",
+                "BS": 1,
+                "Value": tmp['int8_acc'],
+                "Url": URL,
+            },
+            'fp32': {
+                "OS": OS,
+                "Platform": PLATFORM,
+                "Framework": args.framework,
+                "Version": args.fwk_ver,
+                "Model": args.model,
+                "Mode": "Inference",
+                "Type": "Accuracy",
+                "BS": 1,
+                "Value": tmp['fp32_acc'],
+                "Url": URL,
+            }
+        }
+
+        return tuning_result_dict, benchmark_accuracy_result_dict
+    else:
+        return {}, {}
+
+
+def get_model_benchmark_dict_results():
+    benchmark_performance_result_dict = {"int8": {}, "fp32": {}}
+    for precision in ["int8", "fp32"]:
+        throughput = 0.0
+        bs = 1
+        for root, dirs, files in os.walk(args.logs_dir):
+            for name in files:
+                file_name = os.path.join(root, name)
+                print(file_name)
+                if "performance-" + precision in name:
+                    for line in open(file_name, "r"):
+                        result = parse_perf_line(line)
+                        if result.get("throughput"):
+                            throughput += result.get("throughput")
+                        if result.get("batch_size"):
+                            bs = result.get("batch_size")
+
+        # set model status failed
+        if throughput == 0.0:
+            os.system('echo "##vso[task.setvariable variable=' + args.framework + '_' + args.model + '_failed]true"')
+        benchmark_performance_result_dict[precision] = {
+            "OS": OS,
+            "Platform": PLATFORM,
+            "Framework": args.framework,
+            "Version": args.fwk_ver,
+            "Model": args.model,
+            "Mode": "Inference",
+            "Type": "Performance",
+            "BS": 1,
+            "Value": throughput,
+            "Url": URL,
+        }
+
+    return benchmark_performance_result_dict
+
+
+def get_refer_data():
+    refer_log = os.path.join(f"{args.logs_dir}_refer_log", f"{args.framework}_{args.model}_summary.log")
+    result = {}
+    if os.path.exists(refer_log):
+        with open(refer_log, "r") as f:
+            lines = f.readlines()
+            keys = lines[0].split(";")
+            values = [lines[i].split(";") for i in range(1, len(lines))]
+        for value in values:
+            precision = value[keys.index("Precision")]
+            Type = value[keys.index("Type")]
+            result[f"{precision}_{Type}"] = float(value[keys.index("Value")])
+        return result
+    else:
+        print(f"refer log file: {refer_log} not found")
+        return 0
 
 
-def main():
+def collect_log():
     results = []
     tuning_infos = []
-    tuning_log = os.path.join(args.logs_dir, f"{args.framework}-{args.model}-tune.log")
     print("tuning log dir is {}".format(tuning_log))
     # get model tuning results
     if os.path.exists(tuning_log):
@@ -123,5 +228,29 @@ def parse_perf_line(line) -> float:
     return perf_data
 
 
+def check_status(precision, precision_upper, check_accuracy = False):
+    performance_result = get_model_benchmark_dict_results()
+    current_performance = performance_result.get(precision).get("Value")
+    refer_performance = refer.get(f"{precision_upper}_Performance")
+    print(f"current_performance_data = {current_performance}, refer_performance_data = {refer_performance}")
+    assert abs(current_performance - refer_performance) / refer_performance <= 0.05
+
+    if check_accuracy:
+        _, accuracy_result = get_model_tuning_dict_results()
+        current_accuracy = accuracy_result.get(precision).get("Value")
+        refer_accuracy = refer.get(f"{precision_upper}_Accuracy")
+        print(f"current_accuracy_data = {current_accuracy}, refer_accuarcy_data = {refer_accuracy}")
+        assert abs(current_accuracy - refer_accuracy) / refer_accuracy <= 0.05
+
+
 if __name__ == '__main__':
-    main()
+    tuning_log = os.path.join(args.logs_dir, f"{args.framework}-{args.model}-tune.log")
+    refer = get_refer_data()
+    if args.stage == "collect_log":
+        collect_log()
+    elif args.stage == "int8_benchmark":
+        check_status("int8", "INT8")
+    elif args.stage == "fp32_benchmark":
+        check_status("fp32", "FP32")
+    else:
+        raise ValueError(f"{args.stage} does not exist")
diff --git a/.azure-pipelines/scripts/models/env_setup.sh b/.azure-pipelines/scripts/models/env_setup.sh
new file mode 100644
index 00000000000..7443e3e9d25
--- /dev/null
+++ b/.azure-pipelines/scripts/models/env_setup.sh
@@ -0,0 +1,124 @@
+#!/bin/bash
+set -eo pipefail
+source /neural-compressor/.azure-pipelines/scripts/change_color.sh
+
+# get parameters
+PATTERN='[-a-zA-Z0-9_]*='
+
+for i in "$@"; do
+    case $i in
+    --yaml=*)
+        yaml=$(echo $i | sed "s/${PATTERN}//")
+        ;;
+    --framework=*)
+        framework=$(echo $i | sed "s/${PATTERN}//")
+        ;;
+    --fwk_ver=*)
+        fwk_ver=$(echo $i | sed "s/${PATTERN}//")
+        ;;
+    --torch_vision_ver=*)
+        torch_vision_ver=$(echo $i | sed "s/${PATTERN}//")
+        ;;
+    --model=*)
+        model=$(echo $i | sed "s/${PATTERN}//")
+        ;;
+    --model_src_dir=*)
+        model_src_dir=$(echo $i | sed "s/${PATTERN}//")
+        ;;
+    --dataset_location=*)
+        dataset_location=$(echo $i | sed "s/${PATTERN}//")
+        ;;
+    --batch_size=*)
+        batch_size=$(echo $i | sed "s/${PATTERN}//")
+        ;;
+    --strategy=*)
+        strategy=$(echo $i | sed "s/${PATTERN}//")
+        ;;
+    --new_benchmark=*)
+        new_benchmark=$(echo $i | sed "s/${PATTERN}//")
+        ;;
+    *)
+        echo "Parameter $i not recognized."
+        exit 1
+        ;;
+    esac
+done
+
+SCRIPTS_PATH="/neural-compressor/.azure-pipelines/scripts/models"
+log_dir="/neural-compressor/.azure-pipelines/scripts/models"
+WORK_SOURCE_DIR="/neural-compressor/examples/${framework}"
+$BOLD_YELLOW && echo "processing ${framework}-${fwk_ver}-${model}" && $RESET
+
+$BOLD_YELLOW && echo "======= creat log_dir =========" && $RESET
+if [ -d "${log_dir}/${model}" ]; then
+    $BOLD_GREEN && echo "${log_dir}/${model} already exists, don't need to mkdir." && $RESET
+else
+    $BOLD_GREEN && echo "no log dir ${log_dir}/${model}, create." && $RESET
+    cd ${log_dir}
+    mkdir ${model}
+fi
+
+$BOLD_YELLOW && echo "====== install requirements ======" && $RESET
+/bin/bash /neural-compressor/.azure-pipelines/scripts/install_nc.sh
+
+cd ${WORK_SOURCE_DIR}/${model_src_dir}
+pip install ruamel_yaml
+pip install psutil
+pip install protobuf==3.20.1
+if [[ "${framework}" == "tensorflow" ]]; then
+    pip install intel-tensorflow==${fwk_ver}
+elif [[ "${framework}" == "pytorch" ]]; then
+    pip install torch==${fwk_ver} -f https://download.pytorch.org/whl/torch_stable.html
+    pip install torchvision==${torch_vision_ver} -f https://download.pytorch.org/whl/torch_stable.html
+elif [[ "${framework}" == "onnxrt" ]]; then
+    pip install onnx==1.11.0
+    pip install onnxruntime==${fwk_ver}
+elif [[ "${framework}" == "mxnet" ]]; then
+    if [[ "${fwk_ver}" == "1.7.0" ]]; then
+        pip install mxnet==${fwk_ver}.post2
+    elif [[ "${fwk_ver}" == "1.6.0" ]]; then
+        pip install mxnet-mkl==${mxnet_version}
+    else
+        pip install mxnet==${fwk_ver}
+    fi
+fi
+
+if [ -f "requirements.txt" ]; then
+    sed -i '/neural-compressor/d' requirements.txt
+    if [ "${framework}" == "onnxrt" ]; then
+        sed -i '/^onnx>=/d;/^onnx==/d;/^onnxruntime>=/d;/^onnxruntime==/d' requirements.txt
+    fi
+    if [ "${framework}" == "tensorflow" ]; then
+        sed -i '/tensorflow==/d;/tensorflow$/d' requirements.txt
+        sed -i '/^intel-tensorflow/d' requirements.txt
+    fi
+    if [ "${framework}" == "mxnet" ]; then
+        sed -i '/mxnet==/d;/mxnet$/d;/mxnet-mkl==/d;/mxnet-mkl$/d' requirements.txt
+    fi
+    if [ "${framework}" == "pytorch" ]; then
+        sed -i '/torch==/d;/torch$/d;/torchvision==/d;/torchvision$/d' requirements.txt
+    fi
+    n=0
+    until [ "$n" -ge 5 ]; do
+        python -m pip install -r requirements.txt && break
+        n=$((n + 1))
+        sleep 5
+    done
+    pip list
+else
+    $BOLD_RED && echo "Not found requirements.txt file." && $RESET
+fi
+
+$BOLD_YELLOW && echo "======== update yaml config ========" && $RESET
+$BOLD_YELLOW && echo -e "\nPrint origin yaml..." && $RESET
+cat ${yaml}
+python ${SCRIPTS_PATH}/update_yaml_config.py \
+    --yaml=${yaml} \
+    --framework=${framework} \
+    --dataset_location=${dataset_location} \
+    --batch_size=${batch_size} \
+    --strategy=${strategy} \
+    --new_benchmark=${new_benchmark} \
+    --multi_instance='true'
+$BOLD_YELLOW && echo -e "\nPrint updated yaml... " && $RESET
+cat ${yaml}
diff --git a/.azure-pipelines/scripts/models/generate_report.sh b/.azure-pipelines/scripts/models/generate_report.sh
index 51b0e3423c8..e76cba525f4 100644
--- a/.azure-pipelines/scripts/models/generate_report.sh
+++ b/.azure-pipelines/scripts/models/generate_report.sh
@@ -24,52 +24,24 @@ while [[ $# -gt 0 ]];do
   key=${1}
   case ${key} in
     -w|--WORKSPACE)
-      WORKSPACE=${2}
-      shift 2
-      ;;
+        WORKSPACE=${2}
+        shift 2
+        ;;
     --script_path)
-      script_path=${2}
-      shift 2
-      ;;
+        script_path=${2}
+        shift 2
+        ;;
     --output_dir)
-      output_dir=${2}
-      shift 2
-      ;;
+        output_dir=${2}
+        shift 2
+        ;;
     --last_logt_dir)
-      last_logt_dir=${2}
-      shift 2
-      ;;
-    --ghprbPullId)
-      ghprbPullId=${2}
-      shift 2
-      ;;
-    --MR_source_branch)
-      MR_source_branch=${2}
-      shift 2
-      ;;
-    --MR_source_repo)
-      MR_source_repo=${2}
-      shift 2
-      ;;
-    --MR_target_branch)
-      MR_target_branch=${2}
-      shift 2
-      ;;
-    --repo_url)
-      repo_url=${2}
-      shift 2
-      ;;
-    --source_commit_id)
-      source_commit_id=${2}
-      shift 2
-      ;;
-    --build_id)
-      build_id=${2}
-      shift 2
-      ;;
+        last_logt_dir=${2}
+        shift 2
+        ;;
     *)
-      shift
-      ;;
+        shift
+        ;;
   esac
 done
 
@@ -86,11 +58,19 @@ summaryLogLast="${last_logt_dir}/summary.log"
 tuneLogLast="${last_logt_dir}/tuning_info.log"
 echo "summaryLogLast: ${summaryLogLast}"
 echo "tuneLogLast: ${tuneLogLast}"
+ghprbPullId=${SYSTEM_PULLREQUEST_PULLREQUESTNUMBER}
+MR_source_branch=${SYSTEM_PULLREQUEST_SOURCEBRANCH}
+MR_source_repo=${SYSTEM_PULLREQUEST_SOURCEREPOSITORYURI}
+MR_target_branch=${SYSTEM_PULLREQUEST_TARGETBRANCH}
+repo_url=${BUILD_REPOSITORY_URI}
+source_commit_id=${BUILD_SOURCEVERSION}
+build_id=${BUILD_BUILDID}
 echo "MR_source_branch: ${MR_source_branch}"
 echo "MR_source_repo: ${MR_source_repo}"
 echo "MR_target_branch: ${MR_target_branch}"
 echo "repo_url: ${repo_url}"
 echo "commit_id: ${source_commit_id}"
+echo "ghprbPullId: ${ghprbPullId}"
 
 
 function main {
@@ -98,7 +78,6 @@ function main {
     generate_html_body
     generate_results
     generate_html_footer
-
 }
 
 function generate_inference {
@@ -205,33 +184,33 @@ function generate_html_core {
                 if(metric == "acc") {
                     target = (int8_result - fp32_result) / fp32_result;
                     if(target >= -0.01) {
-                       printf("<td rowspan=3 style=\"background-color:#90EE90\">%.2f %</td>", target*100);
+                        printf("<td rowspan=3 style=\"background-color:#90EE90\">%.2f %</td>", target*100);
                     }else if(target < -0.05) {
-                       printf("<td rowspan=3 style=\"background-color:#FFD2D2\">%.2f %</td>", target*100);
-                       job_status = "fail"
+                        printf("<td rowspan=3 style=\"background-color:#FFD2D2\">%.2f %</td>", target*100);
+                        job_status = "fail"
                     }else{
-                       printf("<td rowspan=3>%.2f %</td>", target*100);
+                        printf("<td rowspan=3>%.2f %</td>", target*100);
                     }
                 }else if(metric == "perf") {
                     target = int8_result / fp32_result;
                     if(target >= 1.5) {
-                       printf("<td rowspan=3 style=\"background-color:#90EE90\">%.2f</td>", target);
+                        printf("<td rowspan=3 style=\"background-color:#90EE90\">%.2f</td>", target);
                     }else if(target < 1) {
-                       printf("<td  rowspan=3 style=\"background-color:#FFD2D2\">%.2f</td>", target);
-                       job_status = "fail"
+                        printf("<td  rowspan=3 style=\"background-color:#FFD2D2\">%.2f</td>", target);
+                        job_status = "fail"
                     }else{
-                       printf("<td rowspan=3>%.2f</td>", target);
+                        printf("<td rowspan=3>%.2f</td>", target);
                     }
                 }
                 else {
                     target = int8_result / fp32_result;
                     if(target >= 2) {
-                       printf("<td rowspan=3 style=\"background-color:#90EE90\">%.2f</td>", target);
+                        printf("<td rowspan=3 style=\"background-color:#90EE90\">%.2f</td>", target);
                     }else if(target < 1) {
-                       printf("<td rowspan=3 style=\"background-color:#FFD2D2\">%.2f</td>", target);
-                       job_status = "fail"
+                        printf("<td rowspan=3 style=\"background-color:#FFD2D2\">%.2f</td>", target);
+                        job_status = "fail"
                     }else{
-                       printf("<td rowspan=3>%.2f</td>", target);
+                        printf("<td rowspan=3>%.2f</td>", target);
                     }
                 }
             }else {
@@ -263,14 +242,14 @@ function generate_html_core {
                 }
             } else {
               if(new_result == nan && previous_result == nan){
-                printf("<td class=\"col-cell col-cell3\" colspan=2></td>");
+                    printf("<td class=\"col-cell col-cell3\" colspan=2></td>");
               } else{
                   if(new_result == nan) {
-                    job_status = "fail"
-                    status_png = "background-color:#FFD2D2";
-                    printf("<td style=\"%s\" colspan=2></td>", status_png);
+                        job_status = "fail"
+                        status_png = "background-color:#FFD2D2";
+                        printf("<td style=\"%s\" colspan=2></td>", status_png);
                   } else{
-                    printf("<td class=\"col-cell col-cell3\" colspan=2></td>");
+                        printf("<td class=\"col-cell col-cell3\" colspan=2></td>");
                   }
               }
             }
@@ -367,7 +346,7 @@ function generate_html_core {
             printf("</tr>\n");
 
         } END{
-          printf("\n%s", job_status);
+            printf("\n%s", job_status);
         }
     ' >> ${output_dir}/report.html
     job_state=$(tail -1 ${WORKSPACE}/report.html)
@@ -426,14 +405,14 @@ Test_Info=''
 
 if [ "${qtools_branch}" == "" ];
 then
-  commit_id=$(echo ${ghprbActualCommit} |awk '{print substr($1,1,7)}')
+    commit_id=$(echo ${ghprbActualCommit} |awk '{print substr($1,1,7)}')
 
-  MR_TITLE="[ <a href='${repo_url}/pull/${ghprbPullId}'>PR-${ghprbPullId}</a> ]"
-  Test_Info_Title="<th colspan="2">Source Branch</th> <th colspan="4">Target Branch</th> <th colspan="4">Commit</th> "
-  Test_Info="<td colspan="2"><a href='${MR_source_repo}/tree/${MR_source_branch}'>${MR_source_branch}</a></td> <td colspan="4"><a href='${repo_url}/tree/${MR_target_branch}'>${MR_target_branch}</a></td> <td colspan="4"><a href='${MR_source_repo}/commit/${source_commit_id}'>${source_commit_id:0:6}</a></td>"
+    MR_TITLE="[ <a href='${repo_url}/pull/${ghprbPullId}'>PR-${ghprbPullId}</a> ]"
+    Test_Info_Title="<th colspan="2">Source Branch</th> <th colspan="4">Target Branch</th> <th colspan="4">Commit</th> "
+    Test_Info="<td colspan="2">${MR_source_branch}</td> <td colspan="4"><a href='${repo_url}/tree/${MR_target_branch}'>${MR_target_branch}</a></td> <td colspan="4"><a href='${MR_source_repo}/commit/${source_commit_id}'>${source_commit_id:0:6}</a></td>"
 else
-  Test_Info_Title="<th colspan="4">Test Branch</th> <th colspan="4">Commit ID</th> "
-  Test_Info="<th colspan="4">${qtools_branch}</th> <th colspan="4">${qtools_commit}</th> "
+    Test_Info_Title="<th colspan="4">Test Branch</th> <th colspan="4">Commit ID</th> "
+    Test_Info="<th colspan="4">${qtools_branch}</th> <th colspan="4">${qtools_commit}</th> "
 fi
 
 cat >> ${output_dir}/report.html << eof
@@ -441,18 +420,20 @@ cat >> ${output_dir}/report.html << eof
 <body>
     <div id="main">
         <h1 align="center">Neural Compressor Tuning Tests ${MR_TITLE}
-        [ <a href="https://dev.azure.com/lpot-inc/neural-compressor/_build/results?buildId=${build_id}">Job-${build_id}</a> ]</h1>
-      <h1 align="center">Test Status: ${Jenkins_job_status}</h1>
+            [ <a
+                href="https://dev.azure.com/lpot-inc/neural-compressor/_build/results?buildId=${build_id}">Job-${build_id}</a>
+            ]</h1>
+        <h1 align="center">Test Status: ${Jenkins_job_status}</h1>
         <h2>Summary</h2>
         <table class="features-table">
             <tr>
-              <th>Repo</th>
-              ${Test_Info_Title}
-              </tr>
-              <tr>
-                    <td><a href="https://github.com/intel/neural-compressor">neural-compressor</a></td>
-              ${Test_Info}
-                </tr>
+                <th>Repo</th>
+                ${Test_Info_Title}
+            </tr>
+            <tr>
+                <td><a href="https://github.com/intel/neural-compressor">neural-compressor</a></td>
+                ${Test_Info}
+            </tr>
         </table>
 eof
 
@@ -537,68 +518,68 @@ cat > ${output_dir}/report.html << eof
         }
         .features-table
         {
-          width: 100%;
-          margin: 0 auto;
-          border-collapse: separate;
-          border-spacing: 0;
-          text-shadow: 0 1px 0 #fff;
-          color: #2a2a2a;
-          background: #fafafa;
-          background-image: -moz-linear-gradient(top, #fff, #eaeaea, #fff); /* Firefox 3.6 */
-          background-image: -webkit-gradient(linear,center bottom,center top,from(#fff),color-stop(0.5, #eaeaea),to(#fff));
-          font-family: Verdana,Arial,Helvetica
+            width: 100%;
+            margin: 0 auto;
+            border-collapse: separate;
+            border-spacing: 0;
+            text-shadow: 0 1px 0 #fff;
+            color: #2a2a2a;
+            background: #fafafa;
+            background-image: -moz-linear-gradient(top, #fff, #eaeaea, #fff); /* Firefox 3.6 */
+            background-image: -webkit-gradient(linear,center bottom,center top,from(#fff),color-stop(0.5, #eaeaea),to(#fff));
+            font-family: Verdana,Arial,Helvetica
         }
         .features-table th,td
         {
-          text-align: center;
-          height: 25px;
-          line-height: 25px;
-          padding: 0 8px;
-          border: 1px solid #cdcdcd;
-          box-shadow: 0 1px 0 white;
-          -moz-box-shadow: 0 1px 0 white;
-          -webkit-box-shadow: 0 1px 0 white;
-          white-space: nowrap;
+            text-align: center;
+            height: 25px;
+            line-height: 25px;
+            padding: 0 8px;
+            border: 1px solid #cdcdcd;
+            box-shadow: 0 1px 0 white;
+            -moz-box-shadow: 0 1px 0 white;
+            -webkit-box-shadow: 0 1px 0 white;
+            white-space: nowrap;
         }
         .no-border th
         {
-          box-shadow: none;
-          -moz-box-shadow: none;
-          -webkit-box-shadow: none;
+            box-shadow: none;
+            -moz-box-shadow: none;
+            -webkit-box-shadow: none;
         }
         .col-cell
         {
-          text-align: center;
-          width: 150px;
-          font: normal 1em Verdana, Arial, Helvetica;
+            text-align: center;
+            width: 150px;
+            font: normal 1em Verdana, Arial, Helvetica;
         }
         .col-cell3
         {
-          background: #efefef;
-          background: rgba(144,144,144,0.15);
+            background: #efefef;
+            background: rgba(144,144,144,0.15);
         }
         .col-cell1, .col-cell2
         {
-          background: #B0C4DE;
-          background: rgba(176,196,222,0.3);
+            background: #B0C4DE;
+            background: rgba(176,196,222,0.3);
         }
         .col-cellh
         {
-          font: bold 1.3em 'trebuchet MS', 'Lucida Sans', Arial;
-          -moz-border-radius-topright: 10px;
-          -moz-border-radius-topleft: 10px;
-          border-top-right-radius: 10px;
-          border-top-left-radius: 10px;
-          border-top: 1px solid #eaeaea !important;
+            font: bold 1.3em 'trebuchet MS', 'Lucida Sans', Arial;
+            -moz-border-radius-topright: 10px;
+            -moz-border-radius-topleft: 10px;
+            border-top-right-radius: 10px;
+            border-top-left-radius: 10px;
+            border-top: 1px solid #eaeaea !important;
         }
         .col-cellf
         {
-          font: bold 1.4em Georgia;
-          -moz-border-radius-bottomright: 10px;
-          -moz-border-radius-bottomleft: 10px;
-          border-bottom-right-radius: 10px;
-          border-bottom-left-radius: 10px;
-          border-bottom: 1px solid #dadada !important;
+            font: bold 1.4em Georgia;
+            -moz-border-radius-bottomright: 10px;
+            -moz-border-radius-bottomleft: 10px;
+            border-bottom-right-radius: 10px;
+            border-bottom-left-radius: 10px;
+            border-bottom: 1px solid #dadada !important;
         }
     </style>
 </head>
diff --git a/.azure-pipelines/scripts/models/run_benchmark_common.sh b/.azure-pipelines/scripts/models/run_benchmark_common.sh
index b342ac48992..ae5b8a1af36 100644
--- a/.azure-pipelines/scripts/models/run_benchmark_common.sh
+++ b/.azure-pipelines/scripts/models/run_benchmark_common.sh
@@ -1,11 +1,12 @@
 #!/bin/bash
-set -x
+set -eo pipefail
+source /neural-compressor/.azure-pipelines/scripts/change_color.sh
 
 # get parameters
 PATTERN='[-a-zA-Z0-9_]*='
+SCRIPTS_PATH="/neural-compressor/.azure-pipelines/scripts/models"
 
-for i in "$@"
-do
+for i in "$@"; do
     case $i in
         --framework=*)
             framework=`echo $i | sed "s/${PATTERN}//"`;;
@@ -15,76 +16,114 @@ do
             input_model=`echo $i | sed "s/${PATTERN}//"`;;
         --benchmark_cmd=*)
             benchmark_cmd=`echo $i | sed "s/${PATTERN}//"`;;
-        --tune_acc=*)
-            tune_acc=`echo $i | sed "s/${PATTERN}//"`;;
         --log_dir=*)
             log_dir=`echo $i | sed "s/${PATTERN}//"`;;
         --new_benchmark=*)
             new_benchmark=`echo $i | sed "s/${PATTERN}//"`;;
         --precision=*)
             precision=`echo $i | sed "s/${PATTERN}//"`;;
+        --stage=*)
+            stage=`echo $i | sed "s/${PATTERN}//"`;;
         *)
             echo "Parameter $i not recognized."; exit 1;;
     esac
 done
 
-echo "-------- run_benchmark_common --------"
+$BOLD_YELLOW && echo "-------- run_benchmark_common --------" && $RESET
 
-# run accuracy
-# tune_acc==true means using accuracy results from tuning log
-if [ "${tune_acc}" == "false" ]; then
-    echo "run tuning accuracy in precision ${precision}"
-    eval "${benchmark_cmd} --input_model=${input_model} --mode=accuracy" 2>&1 | tee ${log_dir}/${framework}-${model}-accuracy-${precision}.log
-fi
+main() {
+    # run accuracy
+    # USE_TUNE_ACC==true means using accuracy results from tuning log
+    if [ "${USE_TUNE_ACC}" == "false" ]; then
+        run_accuracy
+    fi
+
+    # run performance
+    if [ "${PERF_STABLE_CHECK}" == "false" ]; then
+        run_performance
+    else 
+        max_loop=3
+        for ((iter = 0; iter < ${max_loop}; iter++)); do
+            run_performance
+            {
+                check_perf_gap
+                exit_code=$?
+            } || true
 
+            if [ ${exit_code} -ne 0 ]; then
+                $BOLD_RED && echo "FAILED with performance gap!!" && $RESET
+            else
+                $BOLD_GREEN && echo "SUCCEED!!" && $RESET
+                break
+            fi
+        done
+        exit ${exit_code}
+    fi
+}
+
+function check_perf_gap() {
+    python -u ${SCRIPTS_PATH}/collect_log_model.py \
+        --framework=${framework} \
+        --fwk_ver=${fwk_ver} \
+        --model=${model} \
+        --logs_dir="${log_dir}" \
+        --output_dir="${log_dir}" \
+        --build_id=${BUILD_BUILDID} \
+        --stage=${stage}
+}
+
+function run_performance() {
+    cmd="${benchmark_cmd} --input_model=${input_model}"
+    if [ "${new_benchmark}" == "true" ]; then
+        $BOLD_YELLOW && echo "run with internal benchmark..." && $RESET
+        eval ${cmd} 2>&1 | tee ${log_dir}/${framework}-${model}-performance-${precision}.log
+    else
+        $BOLD_YELLOW && echo "run with external multiInstance benchmark..." && $RESET
+        multiInstance
+    fi
+}
+
+function run_accuracy() {
+    $BOLD_YELLOW && echo "run tuning accuracy in precision ${precision}" && $RESET
+    eval "${benchmark_cmd} --input_model=${input_model} --mode=accuracy" 2>&1 | tee ${log_dir}/${framework}-${model}-accuracy-${precision}.log
+}
 
 function multiInstance() {
-    ncores_per_socket=${ncores_per_socket:=$( lscpu | grep 'Core(s) per socket' | cut -d: -f2 | xargs echo -n)}
-    echo "Executing multi instance benchmark"
+    ncores_per_socket=${ncores_per_socket:=$(lscpu | grep 'Core(s) per socket' | cut -d: -f2 | xargs echo -n)}
+    $BOLD_YELLOW && echo "Executing multi instance benchmark" && $RESET
     ncores_per_instance=4
-    echo "ncores_per_socket=${ncores_per_socket}, ncores_per_instance=${ncores_per_instance}"
+    $BOLD_YELLOW && echo "ncores_per_socket=${ncores_per_socket}, ncores_per_instance=${ncores_per_instance}" && $RESET
 
     logFile="${log_dir}/${framework}-${model}-performance-${precision}"
     benchmark_pids=()
 
-    for((j=0;$j<${ncores_per_socket};j=$(($j + ${ncores_per_instance}))));
-    do
-    end_core_num=$((j + ncores_per_instance -1))
-    if [ ${end_core_num} -ge ${ncores_per_socket} ]; then
-        end_core_num=$((ncores_per_socket-1))
-    fi
-    numactl -m 0 -C "${j}-${end_core_num}" ${cmd} 2>&1 | tee ${logFile}-${ncores_per_socket}-${ncores_per_instance}-${j}.log &
-    benchmark_pids+=($!)
+    for ((j = 0; $j < ${ncores_per_socket}; j = $(($j + ${ncores_per_instance})))); do
+        end_core_num=$((j + ncores_per_instance - 1))
+        if [ ${end_core_num} -ge ${ncores_per_socket} ]; then
+            end_core_num=$((ncores_per_socket - 1))
+        fi
+        numactl -m 0 -C "${j}-${end_core_num}" ${cmd} 2>&1 | tee ${logFile}-${ncores_per_socket}-${ncores_per_instance}-${j}.log &
+        benchmark_pids+=($!)
     done
 
     status="SUCCESS"
     for pid in "${benchmark_pids[@]}"; do
         wait $pid
         exit_code=$?
-        echo "Detected exit code: ${exit_code}"
+        $BOLD_YELLOW && echo "Detected exit code: ${exit_code}" && $RESET
         if [ ${exit_code} == 0 ]; then
-            echo "Process ${pid} succeeded"
+            $BOLD_GREEN && echo "Process ${pid} succeeded" && $RESET
         else
-            echo "Process ${pid} failed"
+            $BOLD_RED && echo "Process ${pid} failed" && $RESET
             status="FAILURE"
         fi
     done
 
-    echo "Benchmark process status: ${status}"
+    $BOLD_YELLOW && echo "Benchmark process status: ${status}" && $RESET
     if [ ${status} == "FAILURE" ]; then
-        echo "Benchmark process returned non-zero exit code."
+        $BOLD_RED && echo "Benchmark process returned non-zero exit code." && $RESET
         exit 1
     fi
 }
 
-
-# run performance
-cmd="${benchmark_cmd} --input_model=${input_model}"
-
-if [ "${new_benchmark}" == "true" ]; then
-    echo "run with internal benchmark..."
-    eval ${cmd} 2>&1 | tee ${log_dir}/${framework}-${model}-performance-${precision}.log
-else
-    echo "run with external multiInstance benchmark..."
-    multiInstance
-fi
+main
diff --git a/.azure-pipelines/scripts/models/run_model_trigger_common.sh b/.azure-pipelines/scripts/models/run_model_trigger_common.sh
index 397ccade727..d0c89560416 100644
--- a/.azure-pipelines/scripts/models/run_model_trigger_common.sh
+++ b/.azure-pipelines/scripts/models/run_model_trigger_common.sh
@@ -1,5 +1,6 @@
 #!/bin/bash
-
+set -eo pipefail
+source /neural-compressor/.azure-pipelines/scripts/change_color.sh
 # get parameters
 PATTERN='[-a-zA-Z0-9_]*='
 
@@ -32,10 +33,8 @@ do
             tuning_cmd=`echo $i | sed "s/${PATTERN}//"`;;
         --benchmark_cmd=*)
             benchmark_cmd=`echo $i | sed "s/${PATTERN}//"`;;
-        --tune_acc=*)
-            tune_acc=`echo $i | sed "s/${PATTERN}//"`;;
-        --build_id=*)
-            build_id=`echo $i | sed "s/${PATTERN}//"`;;
+        --mode=*)
+            mode=`echo $i | sed "s/${PATTERN}//"`;;
         *)
             echo "Parameter $i not recognized."; exit 1;;
     esac
@@ -44,113 +43,87 @@ done
 log_dir="/neural-compressor/.azure-pipelines/scripts/models"
 WORK_SOURCE_DIR="/neural-compressor/examples/${framework}"
 SCRIPTS_PATH="/neural-compressor/.azure-pipelines/scripts/models"
-echo "processing ${framework}-${fwk_ver}-${model}"
-echo "tuning_cmd is ${tuning_cmd}"
-echo "benchmark_cmd is ${benchmark_cmd}"
-
-echo "======= creat log_dir ========="
-if [ -d "${log_dir}/${model}" ]; then
-    echo "${log_dir}/${model} already exists, don't need to mkdir."
-else
-    echo "no log dir ${log_dir}/${model}, create."
-    cd ${log_dir}
-    mkdir ${model}
-fi
-
-echo "====== install requirements ======"
-/bin/bash /neural-compressor/.azure-pipelines/scripts/install_nc.sh
-
-cd ${WORK_SOURCE_DIR}/${model_src_dir}
-pip install ruamel_yaml
-pip install psutil
-pip install protobuf==3.20.1
-if [[ "${framework}" == "tensorflow" ]]; then
-  pip install intel-tensorflow==${fwk_ver}
-elif [[ "${framework}" == "pytorch" ]]; then
-  pip install torch==${fwk_ver} -f https://download.pytorch.org/whl/torch_stable.html
-  pip install torchvision==${torch_vision_ver} -f https://download.pytorch.org/whl/torch_stable.html
-elif [[ "${framework}" == "onnxrt" ]]; then
-  pip install onnx==1.11.0
-  pip install onnxruntime==${fwk_ver}
-elif [[ "${framework}" == "mxnet" ]]; then
-  if [[ "${fwk_ver}" == "1.7.0" ]]; then
-    pip install mxnet==${fwk_ver}.post2
-  elif [[ "${fwk_ver}" == "1.6.0" ]]; then
-    pip install mxnet-mkl==${mxnet_version}
-  else
-    pip install mxnet==${fwk_ver}
-  fi
-fi
-
-if [ -f "requirements.txt" ]; then
-    sed -i '/neural-compressor/d' requirements.txt
-    if [ "${framework}" == "onnxrt" ]; then
-      sed -i '/^onnx>=/d;/^onnx==/d;/^onnxruntime>=/d;/^onnxruntime==/d' requirements.txt
-    fi
-    if [ "${framework}" == "tensorflow" ]; then
-      sed -i '/tensorflow==/d;/tensorflow$/d' requirements.txt
-      sed -i '/^intel-tensorflow/d' requirements.txt
+$BOLD_YELLOW && echo "processing ${framework}-${fwk_ver}-${model}" && $RESET
+$BOLD_YELLOW && echo "benchmark_cmd is ${benchmark_cmd}" && $RESET
+
+if [ "${mode}" == "env_setup" ]; then
+    /bin/bash env_setup.sh \
+        --yaml=${yaml} \
+        --framework=${framework} \
+        --fwk_ver=${fwk_ver} \
+        --torch_vision_ver=${torch_vision_ver} \
+        --model=${model} \
+        --model_src_dir=${model_src_dir} \
+        --dataset_location=${dataset_location} \
+        --batch_size=${batch_size} \
+        --strategy=${strategy} \
+        --new_benchmark=${new_benchmark}
+elif [ "${mode}" == "tuning" ]; then
+    cd ${WORK_SOURCE_DIR}/${model_src_dir}
+    $BOLD_YELLOW && echo "tuning_cmd is ${tuning_cmd}" && $RESET
+    $BOLD_YELLOW && echo "======== run tuning ========" && $RESET
+    /bin/bash ${SCRIPTS_PATH}/run_tuning_common.sh \
+        --framework=${framework} \
+        --model=${model} \
+        --tuning_cmd="${tuning_cmd}" \
+        --log_dir="${log_dir}/${model}" \
+        --input_model=${input_model} \
+        --strategy=${strategy} \
+        2>&1 | tee -a ${log_dir}/${model}/${framework}-${model}-tune.log
+    $BOLD_YELLOW && echo "====== check tuning status. ======" && $RESET
+    control_phrase="model which meet accuracy goal."
+    if [ $(grep "${control_phrase}" ${log_dir}/${model}/${framework}-${model}-tune.log | wc -l) == 0 ];then
+        $BOLD_RED && echo "====== tuning FAILED!! ======" && $RESET; exit 1
     fi
-    if [ "${framework}" == "mxnet" ]; then
-      sed -i '/mxnet==/d;/mxnet$/d;/mxnet-mkl==/d;/mxnet-mkl$/d' requirements.txt
+    if [ $(grep "${control_phrase}" ${log_dir}/${model}/${framework}-${model}-tune.log | grep "Not found" | wc -l) == 1 ];then
+        $BOLD_RED && echo "====== tuning FAILED!! ======" && $RESET; exit 1
     fi
-    if [ "${framework}" == "pytorch" ]; then
-      sed -i '/torch==/d;/torch$/d;/torchvision==/d;/torchvision$/d' requirements.txt
+    $BOLD_GREEN && echo "====== tuning SUCCEED!! ======" && $RESET
+elif [ "${mode}" == "fp32_benchmark" ]; then
+    cd ${WORK_SOURCE_DIR}/${model_src_dir}
+    $BOLD_YELLOW && echo "benchmark_cmd is ${benchmark_cmd}" && $RESET
+    $BOLD_YELLOW && echo "====== run benchmark fp32 =======" && $RESET
+    /bin/bash ${SCRIPTS_PATH}/run_benchmark_common.sh \
+        --framework=${framework} \
+        --model=${model} \
+        --input_model=${input_model} \
+        --benchmark_cmd="${benchmark_cmd}" \
+        --log_dir="${log_dir}/${model}" \
+        --new_benchmark=${new_benchmark} \
+        --precision="fp32" \
+        --stage=${mode}
+elif [ "${mode}" == "int8_benchmark" ]; then
+    cd ${WORK_SOURCE_DIR}/${model_src_dir}
+    $BOLD_YELLOW && echo "benchmark_cmd is ${benchmark_cmd}" && $RESET
+    $BOLD_YELLOW && echo "====== run benchmark int8 =======" && $RESET
+    if [[ "${framework}" == "onnxrt" ]]; then
+        model_name="${log_dir}/${model}/${framework}-${model}-tune.onnx"
+    elif [[ "${framework}" == "mxnet" ]]; then
+        model_name="${log_dir}/${model}"
+    elif [[ "${framework}" == "tensorflow" ]]; then
+        model_name="${log_dir}/${model}/${framework}-${model}-tune.pb"
+    elif [[ "${framework}" == "pytorch" ]]; then
+        model_name=${input_model}
+        benchmark_cmd="${benchmark_cmd} --int8=true"
     fi
-    n=0
-    until [ "$n" -ge 5 ]
-    do
-        python -m pip install -r requirements.txt && break
-        n=$((n+1))
-        sleep 5
-    done
-    pip list
-else
-    echo "Not found requirements.txt file."
+    /bin/bash ${SCRIPTS_PATH}/run_benchmark_common.sh \
+        --framework=${framework} \
+        --model=${model} \
+        --input_model="${model_name}" \
+        --benchmark_cmd="${benchmark_cmd}" \
+        --log_dir="${log_dir}/${model}" \
+        --new_benchmark=${new_benchmark} \
+        --precision="int8" \
+        --stage=${mode}
+elif [ "${mode}" == "collect_log" ]; then
+    cd ${WORK_SOURCE_DIR}/${model_src_dir}
+    $BOLD_YELLOW && echo "====== collect logs of model ${model} =======" && $RESET
+    python -u ${SCRIPTS_PATH}/collect_log_model.py \
+        --framework=${framework} \
+        --fwk_ver=${fwk_ver} \
+        --model=${model} \
+        --logs_dir="${log_dir}/${model}" \
+        --output_dir="${log_dir}/${model}" \
+        --build_id=${BUILD_BUILDID}
+    $BOLD_YELLOW && echo "====== Finish collect logs =======" && $RESET
 fi
-
-
-echo "======== update yaml config ========"
-echo -e "\nPrint origin yaml..."
-cat ${yaml}
-python ${SCRIPTS_PATH}/update_yaml_config.py --yaml=${yaml} --framework=${framework} \
---dataset_location=${dataset_location} --batch_size=${batch_size} --strategy=${strategy} \
---new_benchmark=${new_benchmark} --multi_instance='true'
-echo -e "\nPrint updated yaml... "
-cat ${yaml}
-
-
-echo "======== run tuning ========"
-/bin/bash ${SCRIPTS_PATH}/run_tuning_common.sh --framework=${framework} --model=${model} \
---tuning_cmd="${tuning_cmd}" --log_dir="${log_dir}/${model}" --input_model=${input_model} --strategy=${strategy} \
-2>&1 | tee -a ${log_dir}/${model}/${framework}-${model}-tune.log
-
-
-echo "====== run benchmark fp32 ======="
-/bin/bash ${SCRIPTS_PATH}/run_benchmark_common.sh --framework=${framework} --model=${model} \
- --input_model=${input_model} --benchmark_cmd="${benchmark_cmd}" --tune_acc=${tune_acc} \
- --log_dir="${log_dir}/${model}" --new_benchmark=${new_benchmark} --precision="fp32"
-
-
-echo "====== run benchmark int8 ======="
-if [[ "${framework}" == "onnxrt" ]]; then
-  model_name="${log_dir}/${model}/${framework}-${model}-tune.onnx"
-elif [[ "${framework}" == "mxnet" ]]; then
-  model_name="${log_dir}/${model}"
-elif [[ "${framework}" == "tensorflow" ]]; then
-  model_name="${log_dir}/${model}/${framework}-${model}-tune.pb"
-elif [[ "${framework}" == "pytorch" ]]; then
-  model_name=${input_model}
-  benchmark_cmd="${benchmark_cmd} --int8=true"
-fi
-/bin/bash ${SCRIPTS_PATH}/run_benchmark_common.sh --framework=${framework} --model=${model} \
- --input_model="${model_name}" --benchmark_cmd="${benchmark_cmd}" \
- --tune_acc=${tune_acc} --log_dir="${log_dir}/${model}" --new_benchmark=${new_benchmark} --precision="int8"
-
-
-echo "====== collect logs of model ${model} ======="
-python -u ${SCRIPTS_PATH}/collect_log_model.py --framework=${framework} --fwk_ver=${fwk_ver} --model=${model} \
---logs_dir="${log_dir}/${model}" --output_dir="${log_dir}/${model}" --build_id=${build_id}
-
-
-echo "====== Finish model test ======="
\ No newline at end of file
diff --git a/.azure-pipelines/scripts/models/run_mxnet_models_trigger.sh b/.azure-pipelines/scripts/models/run_mxnet_models_trigger.sh
index 455ee809c61..8bf3b293fc2 100644
--- a/.azure-pipelines/scripts/models/run_mxnet_models_trigger.sh
+++ b/.azure-pipelines/scripts/models/run_mxnet_models_trigger.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-
+set -eo pipefail
 # get parameters
 PATTERN='[-a-zA-Z0-9_]*='
 
@@ -8,10 +8,8 @@ do
     case $i in
         --model=*)
             model=`echo $i | sed "s/${PATTERN}//"`;;
-        --tune_acc=*)
-            tune_acc=`echo $i | sed "s/${PATTERN}//"`;;
-        --build_id=*)
-            build_id=`echo $i | sed "s/${PATTERN}//"`;;
+        --mode=*)
+            mode=`echo $i | sed "s/${PATTERN}//"`;;
         *)
             echo "Parameter $i not recognized."; exit 1;;
     esac
@@ -35,7 +33,17 @@ if [ "${model}" == "resnet50v1" ]; then
 fi
 
 
-/bin/bash run_model_trigger_common.sh --yaml=${yaml} --framework=${FRAMEWORK} --fwk_ver=${FRAMEWORK_VERSION} \
---model=${model} --model_src_dir=${model_src_dir} --dataset_location=${dataset_location} \
---input_model=${input_model} --batch_size=${batch_size} --strategy=${strategy} --new_benchmark=${new_benchmark} \
---tuning_cmd="${tuning_cmd}" --benchmark_cmd="${benchmark_cmd}" --tune_acc=${tune_acc} --build_id=${build_id}
+/bin/bash run_model_trigger_common.sh \
+    --yaml=${yaml} \
+    --framework=${FRAMEWORK} \
+    --fwk_ver=${FRAMEWORK_VERSION} \
+    --model=${model} \
+    --model_src_dir=${model_src_dir} \
+    --dataset_location=${dataset_location} \
+    --input_model=${input_model} \
+    --batch_size=${batch_size} \
+    --strategy=${strategy} \
+    --new_benchmark=${new_benchmark} \
+    --tuning_cmd="${tuning_cmd}" \
+    --benchmark_cmd="${benchmark_cmd}" \
+    --mode=${mode}
diff --git a/.azure-pipelines/scripts/models/run_onnxrt_models_trigger.sh b/.azure-pipelines/scripts/models/run_onnxrt_models_trigger.sh
index d71081f61b3..a69852f01a5 100644
--- a/.azure-pipelines/scripts/models/run_onnxrt_models_trigger.sh
+++ b/.azure-pipelines/scripts/models/run_onnxrt_models_trigger.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-
+set -eo pipefail
 # get parameters
 PATTERN='[-a-zA-Z0-9_]*='
 
@@ -8,10 +8,8 @@ do
     case $i in
         --model=*)
             model=`echo $i | sed "s/${PATTERN}//"`;;
-        --tune_acc=*)
-            tune_acc=`echo $i | sed "s/${PATTERN}//"`;;
-        --build_id=*)
-            build_id=`echo $i | sed "s/${PATTERN}//"`;;
+        --mode=*)
+            mode=`echo $i | sed "s/${PATTERN}//"`;;
         *)
             echo "Parameter $i not recognized."; exit 1;;
     esac
@@ -31,7 +29,7 @@ if [ "${model}" == "resnet50-v1-12" ]; then
     batch_size=1
     new_benchmark=true
     tuning_cmd="bash run_tuning.sh --input_model=${input_model} --config=${yaml}"
-    benchmark_cmd="bash run_benchmark.sh --config=${yaml}"
+    benchmark_cmd="bash run_benchmark.sh --config=${yaml} --mode=performance"
 elif [ "${model}" == "bert_base_MRPC_static" ]; then
     model_src_dir="language_translation/bert/quantization/ptq"
     dataset_location="/tf_dataset/pytorch/glue_data/MRPC"
@@ -41,7 +39,7 @@ elif [ "${model}" == "bert_base_MRPC_static" ]; then
     batch_size=1
     new_benchmark=true
     tuning_cmd="bash run_tuning.sh --input_model=${input_model} --config=${yaml}"
-    benchmark_cmd="bash run_benchmark.sh --config=${yaml}"
+    benchmark_cmd="bash run_benchmark.sh --config=${yaml} --mode=performance"
 elif [ "${model}" == "bert_base_MRPC_dynamic" ]; then
     model_src_dir="language_translation/bert/quantization/ptq"
     dataset_location="/tf_dataset/pytorch/glue_data/MRPC"
@@ -51,7 +49,7 @@ elif [ "${model}" == "bert_base_MRPC_dynamic" ]; then
     batch_size=1
     new_benchmark=true
     tuning_cmd="bash run_tuning.sh --input_model=${input_model} --config=${yaml}"
-    benchmark_cmd="bash run_benchmark.sh --config=${yaml}"
+    benchmark_cmd="bash run_benchmark.sh --config=${yaml} --mode=performance"
 elif [ "${model}" == "distilbert_base_MRPC_qdq" ]; then
     model_src_dir="language_translation/distilbert/quantization/ptq"
     dataset_location="/tf_dataset/pytorch/glue_data/MRPC"
@@ -61,11 +59,21 @@ elif [ "${model}" == "distilbert_base_MRPC_qdq" ]; then
     batch_size=1
     new_benchmark=true
     tuning_cmd="bash run_tuning.sh --input_model=${input_model} --config=${yaml}"
-    benchmark_cmd="bash run_benchmark.sh --config=${yaml}"
+    benchmark_cmd="bash run_benchmark.sh --config=${yaml} --mode=performance"
 fi
 
 
-/bin/bash run_model_trigger_common.sh --yaml=${yaml} --framework=${FRAMEWORK} --fwk_ver=${FRAMEWORK_VERSION} \
---model=${model} --model_src_dir=${model_src_dir} --dataset_location=${dataset_location} \
---input_model=${input_model} --batch_size=${batch_size} --strategy=${strategy} --new_benchmark=${new_benchmark} \
---tuning_cmd="${tuning_cmd}" --benchmark_cmd="${benchmark_cmd} --mode=performance" --tune_acc=${tune_acc} --build_id=${build_id}
+/bin/bash run_model_trigger_common.sh \
+    --yaml=${yaml} \
+    --framework=${FRAMEWORK} \
+    --fwk_ver=${FRAMEWORK_VERSION} \
+    --model=${model} \
+    --model_src_dir=${model_src_dir} \
+    --dataset_location=${dataset_location} \
+    --input_model=${input_model} \
+    --batch_size=${batch_size} \
+    --strategy=${strategy} \
+    --new_benchmark=${new_benchmark} \
+    --tuning_cmd="${tuning_cmd}" \
+    --benchmark_cmd="${benchmark_cmd}" \
+    --mode=${mode}
diff --git a/.azure-pipelines/scripts/models/run_pytorch_models_trigger.sh b/.azure-pipelines/scripts/models/run_pytorch_models_trigger.sh
index 9aef437666d..5cd776816f4 100644
--- a/.azure-pipelines/scripts/models/run_pytorch_models_trigger.sh
+++ b/.azure-pipelines/scripts/models/run_pytorch_models_trigger.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-
+set -eo pipefail
 # get parameters
 PATTERN='[-a-zA-Z0-9_]*='
 
@@ -8,10 +8,8 @@ do
     case $i in
         --model=*)
             model=`echo $i | sed "s/${PATTERN}//"`;;
-        --tune_acc=*)
-            tune_acc=`echo $i | sed "s/${PATTERN}//"`;;
-        --build_id=*)
-            build_id=`echo $i | sed "s/${PATTERN}//"`;;
+        --mode=*)
+            mode=`echo $i | sed "s/${PATTERN}//"`;;
         *)
             echo "Parameter $i not recognized."; exit 1;;
     esac
@@ -46,8 +44,18 @@ elif [ "${model}" == "resnet18_fx" ]; then
 fi
 
 
-/bin/bash run_model_trigger_common.sh --yaml=${yaml} --framework=${FRAMEWORK} --fwk_ver=${FRAMEWORK_VERSION} \
---torch_vision_ver=${TORCH_VISION_VERSION} --model=${model} --model_src_dir=${model_src_dir} \
---dataset_location=${dataset_location} --input_model=${input_model} --batch_size=${batch_size} --strategy=${strategy} \
---new_benchmark=${new_benchmark} --tuning_cmd="${tuning_cmd}" --benchmark_cmd="${benchmark_cmd}" \
---tune_acc=${tune_acc} --build_id=${build_id}
+/bin/bash run_model_trigger_common.sh \
+    --yaml=${yaml} \
+    --framework=${FRAMEWORK} \
+    --fwk_ver=${FRAMEWORK_VERSION} \
+    --torch_vision_ver=${TORCH_VISION_VERSION} \
+    --model=${model} \
+    --model_src_dir=${model_src_dir} \
+    --dataset_location=${dataset_location} \
+    --input_model=${input_model} \
+    --batch_size=${batch_size} \
+    --strategy=${strategy} \
+    --new_benchmark=${new_benchmark} \
+    --tuning_cmd="${tuning_cmd}" \
+    --benchmark_cmd="${benchmark_cmd}" \
+    --mode=${mode}
diff --git a/.azure-pipelines/scripts/models/run_tensorflow_models_trigger.sh b/.azure-pipelines/scripts/models/run_tensorflow_models_trigger.sh
index 1b3a6c6815d..b3eee910900 100644
--- a/.azure-pipelines/scripts/models/run_tensorflow_models_trigger.sh
+++ b/.azure-pipelines/scripts/models/run_tensorflow_models_trigger.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-
+set -eo pipefail
 # get parameters
 PATTERN='[-a-zA-Z0-9_]*='
 
@@ -8,10 +8,8 @@ do
     case $i in
         --model=*)
             model=`echo $i | sed "s/${PATTERN}//"`;;
-        --tune_acc=*)
-            tune_acc=`echo $i | sed "s/${PATTERN}//"`;;
-        --build_id=*)
-            build_id=`echo $i | sed "s/${PATTERN}//"`;;
+        --mode=*)
+            mode=`echo $i | sed "s/${PATTERN}//"`;;
         *)
             echo "Parameter $i not recognized."; exit 1;;
     esac
@@ -104,7 +102,17 @@ elif [ "${model}" == "resnet50_fashion" ]; then
 fi
 
 
-/bin/bash run_model_trigger_common.sh --yaml=${yaml} --framework=${FRAMEWORK} --fwk_ver=${FRAMEWORK_VERSION} \
---model=${model} --model_src_dir=${model_src_dir} --dataset_location=${dataset_location} \
---input_model=${input_model} --batch_size=${batch_size} --strategy=${strategy} --new_benchmark=${new_benchmark} \
---tuning_cmd="${tuning_cmd}" --benchmark_cmd="${benchmark_cmd}" --tune_acc=${tune_acc} --build_id=${build_id}
+/bin/bash run_model_trigger_common.sh \
+    --yaml=${yaml} \
+    --framework=${FRAMEWORK} \
+    --fwk_ver=${FRAMEWORK_VERSION} \
+    --model=${model} \
+    --model_src_dir=${model_src_dir} \
+    --dataset_location=${dataset_location} \
+    --input_model=${input_model} \
+    --batch_size=${batch_size} \
+    --strategy=${strategy} \
+    --new_benchmark=${new_benchmark} \
+    --tuning_cmd="${tuning_cmd}" \
+    --benchmark_cmd="${benchmark_cmd}" \
+    --mode=${mode}
diff --git a/.azure-pipelines/scripts/models/run_tuning_common.sh b/.azure-pipelines/scripts/models/run_tuning_common.sh
index 26b223bf994..fbb68d65605 100644
--- a/.azure-pipelines/scripts/models/run_tuning_common.sh
+++ b/.azure-pipelines/scripts/models/run_tuning_common.sh
@@ -1,5 +1,6 @@
 #!/bin/bash
-set -x
+set -eo pipefail
+source /neural-compressor/.azure-pipelines/scripts/change_color.sh
 
 # get parameters
 PATTERN='[-a-zA-Z0-9_]*='
@@ -35,16 +36,15 @@ else
     output_model=${log_dir}/${framework}-${model}-tune.pb
 fi
 
-echo -e "-------- run_tuning_common --------"
-echo ${tuning_cmd}
+$BOLD_YELLOW && echo -e "-------- run_tuning_common --------" && $RESET
+$BOLD_YELLOW && echo ${tuning_cmd} && $RESET
+
 eval "/usr/bin/time -v ${tuning_cmd} --output_model=${output_model}"
 
-echo "====== finish tuning. echo information. ======"
+$BOLD_YELLOW && echo "====== finish tuning. echo information. ======" && $RESET
 endtime=`date +'%Y-%m-%d %H:%M:%S'`
 start_seconds=$(date --date="$starttime" +%s);
 end_seconds=$(date --date="$endtime" +%s);
-echo "Tuning time spend: "$((end_seconds-start_seconds))"s "
-
-echo "Tuning strategy: ${strategy}"
-
-echo "Total resident size (kbytes): $(cat /proc/meminfo |grep 'MemTotal' |sed 's/[^0-9]//g')"
+$BOLD_GREEN && echo "Tuning time spend: "$((end_seconds-start_seconds))"s " && $RESET
+$BOLD_GREEN && echo "Tuning strategy: ${strategy}" && $RESET
+$BOLD_GREEN && echo "Total resident size (kbytes): $(cat /proc/meminfo | grep 'MemTotal' | sed 's/[^0-9]//g')" && $RESET
diff --git a/.azure-pipelines/scripts/models/update_yaml_config.py b/.azure-pipelines/scripts/models/update_yaml_config.py
index 9d92a457f6d..c305134e18d 100644
--- a/.azure-pipelines/scripts/models/update_yaml_config.py
+++ b/.azure-pipelines/scripts/models/update_yaml_config.py
@@ -64,9 +64,9 @@ def update_yaml_dataset(yaml, framework, dataset_location):
                         line = re.sub(key_patterns["pattern"], key_patterns["replacement"], line)
                 config.write(line)
 
-    if framework == "pytorch":
-        val_dataset = dataset_location + f"\{os.path.sep}" + "val"
-        train_dataset = dataset_location + f"\{os.path.sep}" + "train"
+    else:
+        val_dataset = dataset_location + f"{os.path.sep}" + "val"
+        train_dataset = dataset_location + f"{os.path.sep}" + "train"
         patterns = {
             "calibration_dataset": {
                 "pattern": r'root:.*/path/to/calibration/dataset/?',
@@ -78,14 +78,6 @@ def update_yaml_dataset(yaml, framework, dataset_location):
             },
         }
 
-        if topology == "distilbert_base_MRPC":
-            patterns.update({
-                "bert_name": {
-                    "pattern": r'name:/s+bert',
-                    "replacement": "name: distilbert",
-                }
-        })
-
         print("======= update_yaml_dataset =======")
         with open(yaml, "w") as config:
             for line in lines:
diff --git a/.azure-pipelines/template/model-template.yml b/.azure-pipelines/template/model-template.yml
index 1bc9dd4a7bf..f145025faf2 100644
--- a/.azure-pipelines/template/model-template.yml
+++ b/.azure-pipelines/template/model-template.yml
@@ -13,18 +13,56 @@ parameters:
 steps:
   - template: docker-template.yml
     parameters:
-      dockerConfigName: 'commonDockerConfig'
-      repoName: 'neural-compressor'
-      repoTag: 'py38'
-      dockerFileName: 'Dockerfile'
+      dockerConfigName: "commonDockerConfig"
+      repoName: "neural-compressor"
+      repoTag: "py38"
+      dockerFileName: "Dockerfile"
       containerName: ${{ parameters.modelContainerName }}
 
   - script: |
       docker exec ${{ parameters.modelContainerName }} bash -c "cd /neural-compressor/.azure-pipelines/scripts/models \
-      && bash run_${{ parameters.framework }}_models_trigger.sh --model=${{ parameters.modelName }} --tune_acc=true --build_id=$(Build.BuildId)"
-    displayName: Tune&Benchmark ${{ parameters.modelName }}
+      && bash run_${{ parameters.framework }}_models_trigger.sh --model=${{ parameters.modelName }} --mode='env_setup'"
+    displayName: Env setup
+
+  - task: DownloadPipelineArtifact@2
+    inputs:
+      source: "specific"
+      artifact: ${{ parameters.framework }}_${{ parameters.modelName }}
+      patterns: "**_summary.log"
+      path: $(Build.SourcesDirectory)/.azure-pipelines/scripts/models/${{ parameters.modelName }}_refer_log
+      project: $(System.TeamProject)
+      pipeline: "Model-Test"
+      runVersion: "specific"
+      runId: $(refer_buildId)
+      retryDownloadCount: 3
+    displayName: "Download refer logs"
+
+  - script: |
+      docker exec ${{ parameters.modelContainerName }} bash -c "cd /neural-compressor/.azure-pipelines/scripts/models \
+      && bash run_${{ parameters.framework }}_models_trigger.sh --model=${{ parameters.modelName }} --mode='tuning'"
+    displayName: Tuning
+
+  - script: |
+      docker exec ${{ parameters.modelContainerName }} bash -c "cd /neural-compressor/.azure-pipelines/scripts/models \
+      && bash run_${{ parameters.framework }}_models_trigger.sh --model=${{ parameters.modelName }} --mode='int8_benchmark'"
+    displayName: INT8 Benchmark
+
+  - script: |
+      docker exec ${{ parameters.modelContainerName }} bash -c "cd /neural-compressor/.azure-pipelines/scripts/models \
+      && bash run_${{ parameters.framework }}_models_trigger.sh --model=${{ parameters.modelName }} --mode='fp32_benchmark'"
+    displayName: FP32 Benchmark
+
+  - task: Bash@3
+    condition: always()
+    inputs:
+      targetType: "inline"
+      script: |
+        docker exec ${{ parameters.modelContainerName }} bash -c "cd /neural-compressor/.azure-pipelines/scripts/models \
+        && bash run_${{ parameters.framework }}_models_trigger.sh --model=${{ parameters.modelName }} --mode='collect_log'"
+    displayName: Collect log
 
   - task: PublishPipelineArtifact@1
+    condition: always()
     inputs:
       targetPath: $(Build.SourcesDirectory)/.azure-pipelines/scripts/models/${{ parameters.modelName }}/
       artifact: ${{ parameters.framework }}_${{ parameters.modelName }}